You return
from your function prior to doing anything with any element other than the first one returned from querySelectorAll
. In addition, replace
does not modify the string, it returns a new copy. Thus, the count you are returning is that of wordCountTemp[i].innerHTML.split(" ").length
.
Your original code: (with comments)
function() {
var wordCounts;
var wordCountTemp = document.querySelectorAll("#text-block-10, #text-block-12");
var i;
for(i = 0; i < wordCountTemp.length; i++){
wordCounts = wordCountTemp[i].innerHTML;
wordCounts.replace(/(^\s*)|(\s*$)/gi,""); //Has no effect
wordCounts.replace(/[ ]{2,}/gi," "); //Has no effect
wordCounts.replace(/\n /,"\n"); //Has no effect
//This next line returns your result in the first pass through the loop.
// Only the first element returned by querySelectorAll is acted upon.
// No other element is processed other than the first one.
return wordCounts.split(" ").length;
}
}
Note: I am changing innerHTML
to textContent
. I'm assuming that you only want to count the words which are text (i.e. not HTML code, scripts, etc.). I also changed the variable name wordCountTemp
to nodeList
as that is more descriptive of what it is (it is, in fact, a NodeList)
To use a similar structure to what you are already using:
function countWords() {
var wordCounts;
var totalWordCount=0;
var nodeList = document.querySelectorAll("#text-block-10, #text-block-12");
for(var i = 0; i < nodeList.length; i++){
wordCounts = nodeList[i].textContent;
wordCounts = wordCounts.replace(/(^\s*)|(\s*$)/gi,"");
wordCounts = wordCounts.replace(/[ ]{2,}/gi," ");
wordCounts = wordCounts.replace(/\n /,"\n");
totalWordCount += wordCounts.split(" ").length;
}
return totalWordCount; //return the total count after all passes through loop
}
Instead of assigning your result of each replace
to wordCounts
over and over again to progressively modify it, you could just directly act on the new string returned by replace
:
function countWords() {
var totalWordCount=0;
var nodeList = document.querySelectorAll("#text-block-10, #text-block-12");
for(var i = 0; i < nodeList.length; i++){
totalWordCount += nodeList[i].textContent
.replace(/(^\s*)|(\s*$)/gi,"")
.replace(/[ ]{2,}/gi," ")
.replace(/\n /,"\n")
.split(" ").length;
}
return totalWordCount; //return the total count after all passes through loop
}
Using regular expressions is, relatively, expensive. not really that much, but there is no reason not to optimize in this situation. Instead of performing all of the replace
functions for each element, it is more efficient to, within the loop, concatenate the strings returned by textContent
. Then, after you have one big long string with all the text from all the elements, you can perform the replace
and split
actions once.
function countWords() {
var allText='';
var nodeList = document.querySelectorAll("#text-block-10, #text-block-12");
for(var i = 0; i < nodeList.length; i++){
allText += ' ' + nodeList[i].textContent;
}
//return the total count after getting the textContent from all elements
return allText.replace(/(^\s*)|(\s*$)/gi,"")
.replace(/[ ]{2,}/gi," ")
.replace(/\n /,"\n")
.split(" ").length;
}
Note: All of the above assume that none of the elements returned by the querySelectorAll
are children of other elements which are returned. If they are, you will be counting the same text twice.