From: Kasun Gajasinghe Date: Sun, 8 Aug 2010 16:43:13 +0000 (+0000) Subject: Added client-side support for cjk searching. By default, for cjk, 2-gram tokenizing... X-Git-Tag: release/1.79.1~6^2~831^2~40 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=41638a573b235380306772e2a522ef491add800d;p=docbook-dsssl Added client-side support for cjk searching. By default, for cjk, 2-gram tokenizing is used in both client run time and indexer build time. i.e. クに接続 will be tokenized to "クに", "に接", "接続". Better tokenizing suggestions are IKAnalzer and Paoding Analyzer. These are dictionary based, so that results are more accurate. --- diff --git a/xsl/webhelp/indexer/lib/nw-cms.jar b/xsl/webhelp/indexer/lib/nw-cms.jar index 886f2eb30..95bedc392 100755 Binary files a/xsl/webhelp/indexer/lib/nw-cms.jar and b/xsl/webhelp/indexer/lib/nw-cms.jar differ diff --git a/xsl/webhelp/indexer/src/com/nexwave/nquindexer/IndexerTask.java b/xsl/webhelp/indexer/src/com/nexwave/nquindexer/IndexerTask.java index 2a347d3f5..cfde68342 100755 --- a/xsl/webhelp/indexer/src/com/nexwave/nquindexer/IndexerTask.java +++ b/xsl/webhelp/indexer/src/com/nexwave/nquindexer/IndexerTask.java @@ -51,7 +51,7 @@ public class IndexerTask{ // ANT parameters private String htmldir=null; - private String indexerLanguage="en"; + public static String indexerLanguage="en"; //supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK, // as stemmers doesn't find a difference between them. @@ -90,7 +90,7 @@ public class IndexerTask{ int i=0; for (;i= 0; i--) { + for (var i = cpt - 1; i >= 0; i--) { if (fileAndWordList[i] != undefined) { - linkTab.push("

" + txt_results_for + " " + "" + fileAndWordList[i][0].motslisteDisplay + "" + "

"); linkTab.push(""); } } } - var results=""; - if (linkTab.length > 0) { - + var results = ""; + if (linkTab.length > 0) { /*writeln ("

" + txt_results_for + " " + "" + cleanwordsList + "" + "
"+"

");*/ results = "

"; //write("