]> granicus.if.org Git - docbook-dsssl/commitdiff
Webhelpindexer changes - HTML transformation support for WebHelp - Uses Tagsoup for...
authorKasun Gajasinghe <kasunbg@gmail.com>
Mon, 3 Oct 2011 19:07:29 +0000 (19:07 +0000)
committerKasun Gajasinghe <kasunbg@gmail.com>
Mon, 3 Oct 2011 19:07:29 +0000 (19:07 +0000)
Tracker -
http://sourceforge.net/tracker/?func=detail&aid=3401185&group_id=21935&atid=373750

xsl-webhelpindexer/lib/tagsoup-1.2.1.jar [new file with mode: 0755]
xsl-webhelpindexer/nbproject/project.properties
xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java
xsl-webhelpindexer/src/com/nexwave/nquindexer/SaxDocFileParser.java
xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java

diff --git a/xsl-webhelpindexer/lib/tagsoup-1.2.1.jar b/xsl-webhelpindexer/lib/tagsoup-1.2.1.jar
new file mode 100755 (executable)
index 0000000..2751601
Binary files /dev/null and b/xsl-webhelpindexer/lib/tagsoup-1.2.1.jar differ
index be92a30d9da07f5df44296191d090b716b7e937f..3b8886e75aba44aad2858bd565933416c8dae866 100755 (executable)
@@ -25,9 +25,11 @@ endorsed.classpath=
 excludes=
 file.reference.lucene-analyzers-3.0.0.jar=lib/lucene-analyzers-3.0.0.jar
 file.reference.lucene-core-3.0.0.jar=lib/lucene-core-3.0.0.jar
+file.reference.tagsoup-1.2.1.jar=lib/tagsoup-1.2.1.jar
 includes=**
 jar.compress=false
 javac.classpath=\
+    ${file.reference.tagsoup-1.2.1.jar}:\
     ${file.reference.lucene-analyzers-3.0.0.jar}:\
     ${file.reference.lucene-core-3.0.0.jar}:\
     ${ant.home}/lib/ant.jar
index 10acbbd0472585966aefc5f47bd20c7ac79a3687..d22e7213855a08f584b161e46756fd55bf3ea879 100644 (file)
@@ -11,10 +11,10 @@ import java.util.*;
 /**
  * Main class of Stand-alone version of WebHelpIndexer
  *
- * User: Kasun Gajasinghe, University of Moratuwa, http://kasunbg.blogspot.com
+ * User: Kasun Gajasinghe, University of Moratuwa, http://kasunbg.org
  * Date: Feb 10, 2011
  *
- * @author Kasun Gajasinghe
+ * @author Kasun Gajasinghe, University of Moratuwa, http://kasunbg.org
  */
 
 public class IndexerMain {
@@ -37,7 +37,7 @@ public class IndexerMain {
     private String outputDir = null;
     private String projectDir = null;
 
-    // ANT parameters
+    // two of the input parameters
     public String htmlDir = null;
     public String indexerLanguage = "en";
 
@@ -93,7 +93,7 @@ public class IndexerMain {
         setHtmlDir(htmlDir);
         setIndexerLanguage(indexerLanguage);
     }
-    
+
     /**
      * The content language defaults to English "en"
      *
@@ -175,7 +175,7 @@ public class IndexerMain {
                     System.getProperty("tocFile")
             );
         } else {
-            throw new RuntimeException("Specify at least the the directory containing html files (htmlDir)\n " +
+            throw new RuntimeException("Specify at least the directory containing html files (htmlDir)\n " +
                     "ex: java -jar webhelpindexer.jar -DhtmlDir=docs/content -DindexerLanguage=en \n" +
                     "The program will exit now."
             );
@@ -186,13 +186,24 @@ public class IndexerMain {
     }
 
     /**
-     * Implementation of the execute function (Task interface)
+     * The main execution happens here.
      */
     public void execute() {
+
+/*
+        //These system properties are set via command-line/ant-script now. See xsl/webhelp/build.xml#index target for
+        details.
         try {
+            //TagSoup SAX HTML Parser which supports parsing even the bad non-xml-conformed HTML
+            System.setProperty("org.xml.sax.driver", "org.ccil.cowan.tagsoup.Parser");
+                                            //org.ccil.cowan.tagsoup.jaxp.SAXParserImpl
+            System.setProperty("javax.xml.parsers.SAXParserFactory", "org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl");
+
             //Use Xerces as the parser. Does not support Saxon6.5.5 parser
-            System.setProperty("org.xml.sax.driver", "org.apache.xerces.parsers.SAXParser");
-            System.setProperty("javax.xml.parsers.SAXParserFactory", "org.apache.xerces.jaxp.SAXParserFactoryImpl");
+//            System.setProperty("org.xml.sax.driver", "org.apache.xerces.parsers.SAXParser");
+//            System.setProperty("javax.xml.parsers.SAXParserFactory", "org.apache.xerces.jaxp.SAXParserFactoryImpl");
+
+            //saxon
 //           System.setProperty("org.xml.sax.driver", "com.icl.saxon.aelfred.SAXDriver");
 //           System.setProperty("javax.xml.parsers.SAXParserFactory", "com.icl.saxon.aelfred.SAXParserFactoryImpl");
         } catch (SecurityException se) {
@@ -202,6 +213,7 @@ public class IndexerMain {
             System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " +
                     "is not in your CLASSPATH");
         }
+        */
 
         ArrayList<DocFileInfo> filesDescription = null; // list of information about the topic files
         ArrayList<File> htmlFiles = null; // topic files listed in the given directory
index a415d268e9113a5fd337501b24fda8ac548c2f4e..9ebec1016ee522624fb9a2cfb709959b422410bb 100755 (executable)
@@ -80,7 +80,8 @@ public class SaxDocFileParser extends org.xml.sax.helpers.DefaultHandler {
             javax.xml.parsers.SAXParser sp = spf.newSAXParser();\r
             // deactivate the validation\r
             sp.getXMLReader().setFeature("http://xml.org/sax/features/external-general-entities", false);\r
-            sp.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);\r
+//            sp.getXMLReader().setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);\r
+//          this feature isn't supported in TagSoup  \r
 \r
             //parse the file and also register this class for call backs\r
             //System.out.println("Parsing: " + file);\r
index c34d4b8195e28102c781d6801e7ff23667ef094d..859fe6b76bd2641ab2a983f6e00e25a4b5a75b4e 100755 (executable)
@@ -1,12 +1,6 @@
 package com.nexwave.nquindexer;
 
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.UnsupportedEncodingException;
+import java.io.*;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Map;
@@ -31,7 +25,7 @@ public class WriteJSFiles {
     private static String txt_indices_location = "The created index files are located in ";
 
     /**
-     * Create a javascript array listing the html files with their paths relative to the project root
+     * Create a JavaScript array listing the html files with their paths relative to the project root
      *
      * @param fileO  path and name of the file in which to output the list of html files
      * @param list   of the html files, relative to the doc root directory
@@ -161,7 +155,7 @@ public class WriteJSFiles {
     }
 
     /**
-     * Create javascript index files alphabetically.
+     * Create JavaScript index files alphabetically.
      *
      * @param fileOutStr      contains the path and the suffix of the index files to create.
      *                        The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...
@@ -207,9 +201,11 @@ public class WriteJSFiles {
                       The value is the numbers of the files in which the word exists.
                       Example: w["key"]="file1,file2,file3";*/
                     int count = 0;
-                    if (i == 1)
+                    if (i == 1) {
                         out.write("var indexerLanguage=\"" + indexerLanguage + "\";\n");
-                    out.write("//Auto generated index for searching.\n");
+                    }
+                    out.write("//Auto generated index for searching by xsl-webhelpindexer for DocBook Webhelp." +
+                            "# Kasun Gajasinghe, University of Moratuwa\n");
                     while (keyIt.hasNext()) {        //&& (tempLetter == tstr.charAt(0))
                         out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");
                         tstr = (String) keyIt.next();