<pathelement location="${extensions.dir}/webhelpindexer.jar"/>
<pathelement location="${extensions.dir}/lucene-analyzers-3.0.0.jar"/>
<pathelement location="${extensions.dir}/lucene-core-3.0.0.jar"/>
+ <pathelement location="${extensions.dir}/tagsoup-1.2.1.jar"/>
</path>
<condition property="perform-validation-dtd">
<echo>Indexing html files in ${output-dir}/content</echo>
- <java classname="com.nexwave.nquindexer.IndexerMain">
+ <java classname="com.nexwave.nquindexer.IndexerMain" fork="true">
<sysproperty key="htmlDir" value="${output-dir}/content"/>
<sysproperty key="indexerLanguage" value="${webhelp.indexer.language}"/>
<sysproperty key="htmlExtension" value="${html.extension}"/>
<sysproperty key="doStem" value="${enable.stemming}"/>
<sysproperty key="tocFile" value="${toc.file}"/>
+ <!--TagSoup SAX Parser for parsing even the bad html contents. see
+ http://sourceforge.net/tracker/?func=detail&aid=3401185&group_id=21935&atid=373750-->
+ <sysproperty key="org.xml.sax.driver" value="org.ccil.cowan.tagsoup.Parser"/>
+ <sysproperty key="javax.xml.parsers.SAXParserFactory" value="org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl"/>
+
+ <!-- Uncomment the following if Xerces is your preference as the SAX XML Parser. Note that the indexing will fail with Xerces
+ if the html files are not XML-conformance -->
+ <!--sysproperty key="org.xml.sax.driver" value="org.apache.xerces.parsers.SAXParser"/>
+ <sysproperty key="javax.xml.parsers.SAXParserFactory" value="org.apache.xerces.jaxp.SAXParserFactoryImpl"/-->
+
+ <!-- Debug the indexer on port 5005 via remote-debug -->
+ <!--jvmarg line="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/-->
<classpath>
<path refid="classpath"/>
<pathelement location="/usr/share/java/xercesImpl.jar"/>
</classpath>
</java>
- <!--indexertask htmldir="${output-dir}/content" indexerLanguage="${webhelp.indexer.language}"/-->
-
<delete>
<fileset dir="${output-dir}/content/search" includes="*.props"/>
</delete>