Webhelp: fixed bug where words like key, day, and nucleus, were not found due to...

author David Cramer <david@thingbag.net>

Mon, 2 Jan 2012 15:46:43 +0000 (15:46 +0000)

committer David Cramer <david@thingbag.net>

Mon, 2 Jan 2012 15:46:43 +0000 (15:46 +0000)
author David Cramer <david@thingbag.net>
Mon, 2 Jan 2012 15:46:43 +0000 (15:46 +0000)
committer David Cramer <david@thingbag.net>
Mon, 2 Jan 2012 15:46:43 +0000 (15:46 +0000)
diff --git a/xsl/webhelp/template/content/search/nwSearchFnt.js b/xsl/webhelp/template/content/search/nwSearchFnt.js

index c00a0305656e333d92e62cf725ece8699b0b2d77..ae740df67ce7ded377560e0bb169d1ea99da73c6 100644 (file)
--- a/xsl/webhelp/template/content/search/nwSearchFnt.js
+++ b/xsl/webhelp/template/content/search/nwSearchFnt.js
@@ -128,11 +128,21 @@ function Effectuer_recherche(expressionInput) {
      var fileAndWordList = new Array();\r
      var txt_wordsnotfound = "";\r
  \r
-\r
-    /* expressionInput, search input is lower cased, plus replacement of special chars\r
-    * nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux\r
-    * */\r
-    searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.|%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");\r
+
+    // --------------------------------------
+    // Begin Thu's patch \r
+    /*nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux*/\r
+    //The original replacement expression is: \r
+    //searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.|%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");\r
+    //The above expression was error prone because it did not deal with words that have a . as part of the word correctly, for example, document.txt\r
+    \r
+    //Do not automatically replace a . with a space\r
+    searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");\r
+    \r
+    //If it ends with a period, replace it with a space\r
+    searchFor = searchFor.replace(/[.]$/,"");
+    // End Thu's Patch
+    // ------------------------------------------\r
  \r
      searchFor = searchFor.replace(/  +/g, " ");\r
      searchFor = searchFor.replace(/ $/, "").replace(/^ /, "");\r
@@ -169,40 +179,28 @@ function Effectuer_recherche(expressionInput) {
       * Compare with the indexed words (in the w[] array), and push words that are in it to tempTab.\r
       */\r
      var tempTab = new Array();\r
-       \r
-       var splitedValues = expressionInput.split(" ");\r
-       finalWordsList = finalWordsList.concat(splitedValues);\r
-            finalArray = finalWordsList;\r
-       finalArray = removeDuplicate(finalArray);\r
-    // OXYGEN PATCH START.\r
-    var wordsArray = '';\r
-    // OXYGEN PATCH END.\r
-    for (var t in finalWordsList) {    \r
-    // OXYGEN PATCH START.\r
-               if (doStem){\r
-    // OXYGEN PATCH END.\r
-               if (w[finalWordsList[t].toString()] == undefined) {\r
-                   txt_wordsnotfound += finalWordsList[t] + " ";\r
-               } else {\r
-                   tempTab.push(finalWordsList[t]);\r
+       
+    // ---------------------------------------
+    // Thu's patch\r
+    //Do not use associative array in for loop, for example:\r
+    //for(var t in finalWordsList)\r
+    //it causes errors when finalWordList contains \r
+    //stemmed words such as: kei from the stemmed word: key\r
+    for(var t=0;t<finalWordsList.length;++t){\r
+        var aWord=finalWordsList[t];\r
+        //w is a Map like Object, use the current word in finalWordList as the key\r
+        if(w[aWord] == undefined){\r
+            txt_wordsnotfound += aWord + " ";\r
                 }\r
-    // OXYGEN PATCH START.\r
-       } else {\r
-               var searchedValue = finalWordsList[t].toString();\r
-               if (wordsStartsWith(searchedValue) != undefined){\r
-                       wordsArray+=wordsStartsWith(searchedValue);\r
+        else{\r
+            tempTab.push(aWord);\r
                 }\r
         }\r
-    // OXYGEN PATCH END.\r
-    }\r
-    // OXYGEN PATCH START.\r
-    wordsArray = wordsArray.substr(0, wordsArray.length - 1);    \r
-       if (!doStem){           \r
-               finalWordsList = wordsArray.split(",");\r
-       } else {\r
         finalWordsList = tempTab;               \r
-       }\r
-    // OXYGEN PATCH END.\r
+    //Check all the inputs to see if the root words are in the finalWordsList, if not add them there\r
+    var inputs = expressionInput.split(' ');
+    // Thu's Patch 
+    // -------------------------------------------\r
  \r
      //-------------------------OXYGEN PATCH START-----------------------\r
      txt_wordsnotfound = expressionInput;\r
@@ -380,17 +378,31 @@ function wordsStartsWith(searchedValue){
  \r
  function tokenize(wordsList){\r
      var stemmedWordsList = new Array(); // Array with the words to look for after removing spaces\r
-    var cleanwordsList = new Array(); // Array with the words to look for\r
-    for(var j in wordsList){\r
+    var cleanwordsList = new Array(); // Array with the words to look for
+    // -------------------------------------------------
+    // Thu's patch
+    for(var j=0;j<wordsList.length;++j){\r
          var word = wordsList[j];\r
+        var originalWord=word;\r
          if(typeof stemmer != "undefined" ){\r
+            var stemmedWord=stemmer(word);\r
+            if(w[stemmedWord]!=undefined){\r
              stemQueryMap[stemmer(word)] = word;\r
+            }\r
+            else{\r
+                stemQueryMap[originalWord]=originalWord;\r
+            }\r
          } else {\r
+            if(w[word]!=undefined){\r
              stemQueryMap[word] = word;\r
          }\r
+            else{\r
+                stemQueryMap[originalWord]=originalWord;\r
+            }\r
+        }\r
      } \r
       //stemmedWordsList is the stemmed list of words separated by spaces.\r
-    for (var t in wordsList) {\r
+    for (var t=0;t<wordsList.length;++t) {\r
          wordsList[t] = wordsList[t].replace(/(%22)|^-/g, "");\r
          if (wordsList[t] != "%20") {\r
              scriptLetterTab.add(wordsList[t].charAt(0));\r
@@ -402,8 +414,15 @@ function tokenize(wordsList){
          //Do the stemming using Porter's stemming algorithm\r
          for (var i = 0; i < cleanwordsList.length; i++) {                      \r
              var stemWord = stemmer(cleanwordsList[i]);                 \r
+            if(w[stemWord]!=undefined){\r
              stemmedWordsList.push(stemWord);\r
          }\r
+            else{\r
+                stemmedWordsList.push(cleanwordsList[i]);               \r
+            }\r
+        }
+    // End Thu's patch
+    // -------------------------------------------\r
      } else {\r
          stemmedWordsList = cleanwordsList;\r
      }\r
diff --git a/xsl/webhelp/template/content/search/stemmers/en_stemmer.js b/xsl/webhelp/template/content/search/stemmers/en_stemmer.js

index f58012f2baf437d36944b52f0c3da1a1026abe3e..439b1696ffad223713865b0d1bee9d63ccad8653 100644 (file)
--- a/xsl/webhelp/template/content/search/stemmers/en_stemmer.js
+++ b/xsl/webhelp/template/content/search/stemmers/en_stemmer.js
@@ -105,7 +105,7 @@ var stemmer = (function(){
                 }
  
                 // Step 1c
-               re = /^(.+?)y$/;
+               re = /^(....+?)y$/;
                 if (re.test(w)) {
                         var fp = re.exec(w);
                         stem = fp[1];
author	David Cramer <david@thingbag.net>
	Mon, 2 Jan 2012 15:46:43 +0000 (15:46 +0000)
committer	David Cramer <david@thingbag.net>
	Mon, 2 Jan 2012 15:46:43 +0000 (15:46 +0000)
xsl/webhelp/template/content/search/nwSearchFnt.js		patch \| blob \| history
xsl/webhelp/template/content/search/stemmers/en_stemmer.js		patch \| blob \| history