minor changes to the indexer.
--- /dev/null
+Manifest-Version: 1.0
--- /dev/null
+compile.on.save=false
+do.depend=false
+do.jar=true
+javac.debug=true
+javadoc.preview=true
-annotation.processing.enabled=true\r
-annotation.processing.enabled.in.editor=false\r
-annotation.processing.run.all.processors=true\r
-application.args=\r
-application.title=webhelpindexer\r
-application.vendor=docbook\r
-build.classes.dir=${build.dir}/classes\r
-build.classes.excludes=**/*.java,**/*.form\r
-build.dir=.\r
-build.generated.dir=${build.dir}/generated\r
-build.generated.sources.dir=${build.dir}/generated-sources\r
-# Only compile against the classpath explicitly listed here:\r
-build.sysclasspath=ignore\r
-build.test.classes.dir=${build.dir}/test/classes\r
-build.test.results.dir=${build.dir}/test/results\r
-debug.classpath=\\r
- ${run.classpath}\r
-debug.test.classpath=\\r
- ${run.test.classpath}\r
-dist.dir=.\r
-# This jar file and javadoc are removed when the project is cleaned.\r
-dist.jar=${dist.dir}/webhelpindexer.jar\r
-dist.javadoc.dir=${dist.dir}/javadoc\r
-endorsed.classpath=\r
-excludes=\r
-file.reference.lucene-analyzers-3.0.0.jar=lib/lucene-analyzers-3.0.0.jar\r
-file.reference.lucene-core-3.0.0.jar=lib/lucene-core-3.0.0.jar\r
-includes=**\r
-jar.compress=false\r
-javac.classpath=\\r
- ${file.reference.lucene-analyzers-3.0.0.jar}:\\r
- ${file.reference.lucene-core-3.0.0.jar}:\\r
- ${ant.home}/lib/ant.jar\r
-# Space-separated list of extra javac options\r
-javac.compilerargs=\r
-javac.deprecation=false\r
-javac.processorpath=\\r
- ${javac.classpath}\r
-javac.source=1.5\r
-javac.target=1.5\r
-javac.test.classpath=\\r
- ${javac.classpath}:\\r
- ${build.classes.dir}:\\r
- ${libs.junit.classpath}\r
-javadoc.additionalparam=\r
-javadoc.author=false\r
-javadoc.encoding=\r
-javadoc.noindex=false\r
-javadoc.nonavbar=false\r
-javadoc.notree=false\r
-javadoc.private=false\r
-javadoc.splitindex=true\r
-javadoc.use=true\r
-javadoc.version=false\r
-javadoc.windowtitle=\r
-meta.inf.dir=${src.dir}/META-INF\r
-platform.active=default_platform\r
-run.classpath=\\r
- ${javac.classpath}:\\r
- ${build.classes.dir}\r
-# Space-separated list of JVM arguments used when running the project\r
-# (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value\r
-# or test-sys-prop.name=value to set system properties for unit tests):\r
-run.jvmargs=\r
-run.test.classpath=\\r
- ${javac.test.classpath}:\\r
- ${build.test.classes.dir}\r
-src.dir=src\r
-test.src.dir=test\r
+annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.run.all.processors=true
+application.args=
+application.title=webhelpindexer
+application.vendor=docbook
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+build.dir=.
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+debug.classpath=\
+ ${run.classpath}
+debug.test.classpath=\
+ ${run.test.classpath}
+dist.dir=.
+# This jar file and javadoc are removed when the project is cleaned.
+dist.jar=${dist.dir}/webhelpindexer.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
+excludes=
+file.reference.lucene-analyzers-3.0.0.jar=lib/lucene-analyzers-3.0.0.jar
+file.reference.lucene-core-3.0.0.jar=lib/lucene-core-3.0.0.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+ ${file.reference.lucene-analyzers-3.0.0.jar}:\
+ ${file.reference.lucene-core-3.0.0.jar}:\
+ ${ant.home}/lib/ant.jar
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+ ${javac.classpath}
+javac.source=1.5
+javac.target=1.5
+javac.test.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}:\
+ ${libs.junit.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+manifest.file=manifest.mf
+manifest.available=true
+main.class=com.nexwave.nquindexer.IndexerMain
+javadoc.windowtitle=
+meta.inf.dir=${src.dir}/META-INF
+platform.active=default_platform
+run.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+run.test.classpath=\
+ ${javac.test.classpath}:\
+ ${build.test.classes.dir}
+src.dir=src
+test.src.dir=test
package com.nexwave.nquindexer;\r
+\r
/**\r
* Constants used for the indexer.\r
* \r
*/\r
public abstract class IndexerConstants {\r
// European punctuation\r
- public static final String EUPUNCTUATION1 = "[$|%,;.':()\\/*\"{}=!&+<>#\\?]|\\[|\\]|[-][-]+";\r
- public static final String EUPUNCTUATION2 = "[$,;.':()\\/*\"{}=!&+<>\\\\]"; \r
- // Japanese punctuation\r
- public static final String JPPUNCTUATION1 = "\\u3000|\\u3001|\\u3002|\\u3003|\\u3008|\\u3009|\\u300C|\\u300D";\r
- public static final String JPPUNCTUATION2 = "\\u3013|\\u3014|\\u3015|\\u301C|\\u301D|\\u301E|\\u301F";\r
- public static final String JPPUNCTUATION3 = "\\u3013|\\u300C|\\u300D";\r
+ //TODO: Make sure European punctuation removal process doesn't affect the text with file locations etc.\r
+\r
+ public static final String EUPUNCTUATION1 = "[$|%,;.':()\\/*\"{}=!&+<>#\\?]|\\[|\\]|[-][-]+";\r
+ public static final String EUPUNCTUATION2 = "[$,;.':()\\/*\"{}=!&+<>\\\\]";\r
+ // Japanese punctuation\r
+ public static final String JPPUNCTUATION1 = "\\u3000|\\u3001|\\u3002|\\u3003|\\u3008|\\u3009|\\u300C|\\u300D";\r
+ public static final String JPPUNCTUATION2 = "\\u3013|\\u3014|\\u3015|\\u301C|\\u301D|\\u301E|\\u301F";\r
+ public static final String JPPUNCTUATION3 = "\\u3013|\\u300C|\\u300D";\r
}\r
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
package com.nexwave.nquindexer;
import com.nexwave.nsidita.DirList;
import java.util.*;
/**
+ * Main class of Stand-alone version of WebHelpIndexer
* User: Kasun Gajasinghe, University of Moratuwa, http://kasunbg.blogspot.com
* Date: Feb 10, 2011
+ * @author Kasun Gajasinghe
*/
public class IndexerMain {
} else if (args.length >= 2) {
indexer = new IndexerMain(args[0], args[1]);
- } else {
- throw new ArrayIndexOutOfBoundsException("Please specify the parameters htmlDirectory and (optional) " +
- "indexerLanguage");
+ } else {
+ throw new RuntimeException("Please specify the parameters htmlDirectory and " +
+ "indexerLanguage (optional). \n "+
+ "ex: java -jar webhelpindexer.jar docs/content en \n" +
+ "The program will exit now."
+ );
}
indexer.execute();
*/
private static void DisplayHelp() {
String lSep = System.getProperty("line.separator");
- StringBuffer msg = new StringBuffer();
- msg.append("USAGE:" + lSep);
- msg.append(" java -classpath TesterIndexer inputDir outputDir projectDir" + lSep);
- msg.append("with:" + lSep);
- msg.append(" inputDir (mandatory) : specify the html files ' directory to index" + lSep);
- msg.append(" outputDir (optional) : specify where to output the index files" + lSep);
- msg.append(" projectDir (optional) : specify the root of the documentation directory" + lSep);
- msg.append("Example:" + lSep);
- msg.append(" java -classpath TesterIndexer /home/$USER/DITA/doc" + lSep);
- msg.append("Example 2:" + lSep);
- msg.append(" java -classpath TesterIndexer /home/$USER/DITA/doc/customer/concepts /home/$USER/temp/search /home/$USER/DITA/doc/" + lSep);
+ StringBuilder msg = new StringBuilder();
+ msg.append("USAGE:").append(lSep);
+ msg.append(" java -classpath TesterIndexer inputDir outputDir projectDir").append(lSep);
+ msg.append("with:").append(lSep);
+ msg.append(" inputDir (mandatory) : specify the html files ' directory to index").append(lSep);
+ msg.append(" outputDir (optional) : specify where to output the index files").append(lSep);
+ msg.append(" projectDir (optional) : specify the root of the documentation directory").append(lSep);
+ msg.append("Example:").append(lSep);
+ msg.append(" java -classpath TesterIndexer /home/$USER/DITA/doc").append(lSep);
+ msg.append("Example 2:").append(lSep);
+ msg.append(" java -classpath TesterIndexer /home/$USER/webhelp/docs/content /home/$USER/docs/content/search /home/$USER/webhelp/docs").append(lSep);
System.out.println(msg.toString());
}
addHeaderInfo = true;\r
String attrName = attributes.getValue("name");\r
if(attrName != null && (attrName.equalsIgnoreCase("keywords") || attrName.equalsIgnoreCase("description"))){\r
- strbf.append(" " + attributes.getValue("content") + " ");\r
+ strbf.append(" ").append(attributes.getValue("content")).append(" ");\r
}\r
// dwc: adding this to make the docbook <abstract> element\r
// (which becomes <meta name="description".../> in html)\r
tempStrBuf.append("\\ba\\b");\r
Iterator it = cleanUpList.iterator();\r
while (it.hasNext()){\r
- tempStrBuf.append("|\\b"+it.next()+"\\b");\r
+ tempStrBuf.append("|\\b").append(it.next()).append("\\b");\r
}\r
}\r
if ((cleanUpPunctuation != null) && (!cleanUpPunctuation.isEmpty())){\r
tempCharBuf.append("\\u3002");\r
Iterator it = cleanUpPunctuation.iterator();\r
while (it.hasNext()){\r
- tempCharBuf.append("|"+it.next());\r
+ tempCharBuf.append("|").append(it.next());\r
}\r
}\r
\r