var fileAndWordList = new Array();\r
var txt_wordsnotfound = "";\r
\r
-\r
- /* expressionInput, search input is lower cased, plus replacement of special chars\r
- * nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux\r
- * */\r
- searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.|%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");\r
+
+ // --------------------------------------
+ // Begin Thu's patch \r
+ /*nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux*/\r
+ //The original replacement expression is: \r
+ //searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.|%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");\r
+ //The above expression was error prone because it did not deal with words that have a . as part of the word correctly, for example, document.txt\r
+ \r
+ //Do not automatically replace a . with a space\r
+ searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");\r
+ \r
+ //If it ends with a period, replace it with a space\r
+ searchFor = searchFor.replace(/[.]$/,"");
+ // End Thu's Patch
+ // ------------------------------------------\r
\r
searchFor = searchFor.replace(/ +/g, " ");\r
searchFor = searchFor.replace(/ $/, "").replace(/^ /, "");\r
* Compare with the indexed words (in the w[] array), and push words that are in it to tempTab.\r
*/\r
var tempTab = new Array();\r
- \r
- var splitedValues = expressionInput.split(" ");\r
- finalWordsList = finalWordsList.concat(splitedValues);\r
- finalArray = finalWordsList;\r
- finalArray = removeDuplicate(finalArray);\r
- // OXYGEN PATCH START.\r
- var wordsArray = '';\r
- // OXYGEN PATCH END.\r
- for (var t in finalWordsList) { \r
- // OXYGEN PATCH START.\r
- if (doStem){\r
- // OXYGEN PATCH END.\r
- if (w[finalWordsList[t].toString()] == undefined) {\r
- txt_wordsnotfound += finalWordsList[t] + " ";\r
- } else {\r
- tempTab.push(finalWordsList[t]);\r
+
+ // ---------------------------------------
+ // Thu's patch\r
+ //Do not use associative array in for loop, for example:\r
+ //for(var t in finalWordsList)\r
+ //it causes errors when finalWordList contains \r
+ //stemmed words such as: kei from the stemmed word: key\r
+ for(var t=0;t<finalWordsList.length;++t){\r
+ var aWord=finalWordsList[t];\r
+ //w is a Map like Object, use the current word in finalWordList as the key\r
+ if(w[aWord] == undefined){\r
+ txt_wordsnotfound += aWord + " ";\r
}\r
- // OXYGEN PATCH START.\r
- } else {\r
- var searchedValue = finalWordsList[t].toString();\r
- if (wordsStartsWith(searchedValue) != undefined){\r
- wordsArray+=wordsStartsWith(searchedValue);\r
+ else{\r
+ tempTab.push(aWord);\r
}\r
}\r
- // OXYGEN PATCH END.\r
- }\r
- // OXYGEN PATCH START.\r
- wordsArray = wordsArray.substr(0, wordsArray.length - 1); \r
- if (!doStem){ \r
- finalWordsList = wordsArray.split(",");\r
- } else {\r
finalWordsList = tempTab; \r
- }\r
- // OXYGEN PATCH END.\r
+ //Check all the inputs to see if the root words are in the finalWordsList, if not add them there\r
+ var inputs = expressionInput.split(' ');
+ // Thu's Patch
+ // -------------------------------------------\r
\r
//-------------------------OXYGEN PATCH START-----------------------\r
txt_wordsnotfound = expressionInput;\r
\r
function tokenize(wordsList){\r
var stemmedWordsList = new Array(); // Array with the words to look for after removing spaces\r
- var cleanwordsList = new Array(); // Array with the words to look for\r
- for(var j in wordsList){\r
+ var cleanwordsList = new Array(); // Array with the words to look for
+ // -------------------------------------------------
+ // Thu's patch
+ for(var j=0;j<wordsList.length;++j){\r
var word = wordsList[j];\r
+ var originalWord=word;\r
if(typeof stemmer != "undefined" ){\r
+ var stemmedWord=stemmer(word);\r
+ if(w[stemmedWord]!=undefined){\r
stemQueryMap[stemmer(word)] = word;\r
+ }\r
+ else{\r
+ stemQueryMap[originalWord]=originalWord;\r
+ }\r
} else {\r
+ if(w[word]!=undefined){\r
stemQueryMap[word] = word;\r
}\r
+ else{\r
+ stemQueryMap[originalWord]=originalWord;\r
+ }\r
+ }\r
} \r
//stemmedWordsList is the stemmed list of words separated by spaces.\r
- for (var t in wordsList) {\r
+ for (var t=0;t<wordsList.length;++t) {\r
wordsList[t] = wordsList[t].replace(/(%22)|^-/g, "");\r
if (wordsList[t] != "%20") {\r
scriptLetterTab.add(wordsList[t].charAt(0));\r
//Do the stemming using Porter's stemming algorithm\r
for (var i = 0; i < cleanwordsList.length; i++) { \r
var stemWord = stemmer(cleanwordsList[i]); \r
+ if(w[stemWord]!=undefined){\r
stemmedWordsList.push(stemWord);\r
}\r
+ else{\r
+ stemmedWordsList.push(cleanwordsList[i]); \r
+ }\r
+ }
+ // End Thu's patch
+ // -------------------------------------------\r
} else {\r
stemmedWordsList = cleanwordsList;\r
}\r