Removed the WebHelpIndexer's ANT dependency. It's possible to use it as a standalone...

author Kasun Gajasinghe <kasunbg@gmail.com>

Sat, 26 Mar 2011 09:52:27 +0000 (09:52 +0000)

committer Kasun Gajasinghe <kasunbg@gmail.com>

Sat, 26 Mar 2011 09:52:27 +0000 (09:52 +0000)
author Kasun Gajasinghe <kasunbg@gmail.com>
Sat, 26 Mar 2011 09:52:27 +0000 (09:52 +0000)
committer Kasun Gajasinghe <kasunbg@gmail.com>
Sat, 26 Mar 2011 09:52:27 +0000 (09:52 +0000)
diff --git a/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java b/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java

new file mode 100644 (file)

index 0000000..04887c5
--- /dev/null
+++ b/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java
@@ -0,0 +1,404 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.nexwave.nquindexer;
+
+import com.nexwave.nsidita.DirList;
+import com.nexwave.nsidita.DocFileInfo;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * User: Kasun Gajasinghe, University of Moratuwa, http://kasunbg.blogspot.com
+ * Date: Feb 10, 2011
+ */
+
+public class IndexerMain {
+
+    // messages
+    private String txt_no_inputdir = "Input directory not found:";
+    private String txt_cannot_create_outputdir = "Cannot create output search directory.";
+    private String txt_no_files_found = "No html files found.";
+    private String txt_wrong_dita_basedir = "ERROR: Parser initialization failed. Wrong dita base dir";
+    private String txt_no_relative_files_found = "No relative html files calculated.";
+    private String txt_no_words_gathered = "No words have been indexed in";
+    private String txt_no_html_files = "No HTML Files found in";
+    private String txt_no_args = "No argument given: you must provide an htmlDir to the IndexerMain";
+    
+    private static String txt_no_lang_specified ="Language of the content is not specified. Defaults to English.";
+
+    //working directories
+    private String searchdir = "search";
+    private File inputDir = null;
+    private String outputDir = null;
+    private String projectDir = null;
+
+    // ANT parameters
+    public String htmlDir = null;
+    public String indexerLanguage = "en";
+
+    //supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK,
+    // as stemmers doesn't find a difference between them.
+    private String[] supportedLanguages = {"en", "de", "fr", "zh", "ja", "ko"}; //currently extended support available for
+    // English, German, French and CJK (Chinese [zh], Japanese [ja], Korean [ko]) languages only.
+
+    // Indexing features: words to remove
+    private ArrayList<String> cleanUpStrings = null;
+    private ArrayList<String> cleanUpChars = null;
+
+    //Html extension
+    private String htmlExtension = "html";
+
+    // Constructors
+    public IndexerMain(String htmlDir, String indexerLanguage) {
+        super();
+        setHtmlDir(htmlDir);
+        setIndexerLanguage(indexerLanguage);
+    }
+
+    /**
+     * The content language defaults to English "en" 
+     * @param htmlDir The directory where html files resides.
+     */
+    public IndexerMain(String htmlDir) {
+        super();
+        setHtmlDir(htmlDir);
+        setIndexerLanguage("en");
+    }
+
+    /**
+     * The setter for the "htmlDir" attribute (parameter of the task)
+     *
+     * @param htmlDir
+     */
+    public void setHtmlDir(String htmlDir) {
+        this.htmlDir = htmlDir;
+    }
+
+    /**
+     * Set the extension in which html files are generated
+     *
+     * @param htmlExtension The extension in which html files are generated
+     */
+    public void setHtmlextension(String htmlExtension) {
+        this.htmlExtension = htmlExtension;
+        //Trim the starting "."
+        if (this.htmlExtension.startsWith(".")) {
+            this.htmlExtension = this.htmlExtension.substring(1);
+        }
+    }
+
+    /**
+     * setter for "indexerLanguage" attribute from ANT
+     *
+     * @param indexerLanguage language for the search indexer. Used to differentiate which stemmer to be used.
+     */
+    public void setIndexerLanguage(String indexerLanguage) {
+        if (indexerLanguage != null && !"".equals(indexerLanguage)) {
+            int temp = indexerLanguage.indexOf('_');
+            if (temp != -1) {
+                indexerLanguage = indexerLanguage.substring(0, temp);
+            }
+            int i = 0;
+            for (; i < supportedLanguages.length; i++) {
+                if (indexerLanguage.equals(supportedLanguages[i])) {
+                    this.indexerLanguage = supportedLanguages[i];
+                    break;
+                }
+            }
+
+            //if not in supported language list,
+            if (i >= supportedLanguages.length) {
+//                System.out.println("The given language, \""+indexerLanguage+"\", does not have extensive support for " +
+//                        "searching. Check documentation for details. ");
+                this.indexerLanguage = indexerLanguage;
+            }
+        } else {
+            this.indexerLanguage = "@@"; //fail-safe mechanism, This vm should not reach this point.
+        }
+    }
+
+    /**
+     * com.nexwave.nquindexer.IndexerMain
+     * The main class without Ant dependencies.
+     * This can be used as a standalone jar.
+     *
+     * @param args need two parameters for this array. htmlDirectory indexerLanguage
+     *             If only one parameter is there (htmlDir), indexerLanguage defaults to english
+     */
+    public static void main(String[] args) {
+
+        IndexerMain indexer;
+        if (args.length == 1) {
+            System.out.println(txt_no_lang_specified);
+            indexer = new IndexerMain(args[0]);
+        } else if (args.length >= 2) {
+
+            indexer = new IndexerMain(args[0], args[1]);                        
+        } else {
+            throw new ArrayIndexOutOfBoundsException("Please specify the parameters htmlDirectory and (optional) " +
+                    "indexerLanguage");
+        }
+
+        indexer.execute();
+
+    }
+
+
+    /**
+     * Implementation of the execute function (Task interface)
+     */
+    public void execute() {
+        try {
+            //Use Xerces as the parser. Does not support Saxon6.5.5 parser
+            System.setProperty("org.xml.sax.driver", "org.apache.xerces.parsers.SAXParser");
+            System.setProperty("javax.xml.parsers.SAXParserFactory", "org.apache.xerces.jaxp.SAXParserFactoryImpl");
+//           System.setProperty("org.xml.sax.driver", "com.icl.saxon.aelfred.SAXDriver");
+//           System.setProperty("javax.xml.parsers.SAXParserFactory", "com.icl.saxon.aelfred.SAXParserFactoryImpl");
+        } catch (SecurityException se) {
+            System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " +
+                    "is not in your CLASSPATH.");
+        } catch (Exception e) {
+            System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " +
+                    "is not in your CLASSPATH");
+        }
+
+        ArrayList<DocFileInfo> filesDescription = null; // list of information about the topic files
+        ArrayList<File> htmlFiles = null; // topic files listed in the given directory
+        ArrayList<String> htmlFilesPathRel = null;
+        Map<String, String> tempDico = new HashMap<String, String>();
+        Iterator it;
+
+        //File name initialization
+        String htmlList = "htmlFileList.js";
+        String htmlInfoList = "htmlFileInfoList.js";
+        String indexName = ".js";
+
+        //timing
+        Date dateStart = new Date();
+
+        if (htmlDir == null) {
+            System.out.println(txt_no_args + ".");
+            return;
+        }
+        // Init input directory
+        inputDir = new File(htmlDir);
+
+        // Begin of init
+        // check if inputdir initialized
+        if (inputDir == null) {
+            DisplayHelp();
+            return;
+        }
+
+        // check if inputdir exists
+        if (!inputDir.exists()) {
+            System.out.println(txt_no_inputdir + " " + inputDir + ".");
+            return;
+        }
+
+        // check if outputdir defined
+        if (outputDir == null) {
+            //set the output directory: path= {inputDir}/search
+            outputDir = inputDir.getPath().concat(File.separator).concat(searchdir);
+        }
+
+        // check if outputdir exists
+        File tempfile = new File(outputDir);
+        if (!tempfile.exists()) {
+            boolean b = (new File(outputDir)).mkdir();
+            if (!b) {
+                System.out.println(txt_cannot_create_outputdir + " " + outputDir + ".");
+                return;
+            }
+        }
+
+        // check if projdir is defined
+        if (projectDir == null) {
+            projectDir = inputDir.getPath();
+        }
+        //end of init
+
+
+        // Get the list of all html files but the tocs, covers and indexes
+        DirList nsiDoc = new DirList(inputDir, "^.*\\." + htmlExtension + "?$", 1);
+        htmlFiles = nsiDoc.getListFiles();
+        // Check if found html files
+        if (htmlFiles.isEmpty()) {
+            System.out.println(txt_no_html_files + " " + inputDir + ".");
+            return;
+        }
+        // Get the list of all html files with relative paths
+        htmlFilesPathRel = nsiDoc.getListFilesRelTo(projectDir);
+
+        if (htmlFiles == null) {
+            System.out.println(txt_no_files_found);
+            return;
+        } else if (htmlFilesPathRel == null) {
+            System.out.println(txt_no_relative_files_found);
+            return;
+        }
+
+        // Create the list of the existing html files (index starts at 0)
+        WriteJSFiles.WriteHTMLList(outputDir.concat(File.separator).concat(htmlList), htmlFilesPathRel);
+
+        // Parse each html file to retrieve the words:
+        // ------------------------------------------
+
+        // Retrieve the clean-up properties for indexing
+        RetrieveCleanUpProps();
+        // System.out.print("clean"+" " +cleanUpStrings);
+
+        //create a default handler
+        //SaxHTMLIndex spe = new SaxHTMLIndex (); // do not use clean-up props files
+        //SaxHTMLIndex spe = new SaxHTMLIndex (cleanUpStrings); // use clean-up props files
+        SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files
+
+        if (spe.init(tempDico) == 0) {
+
+            //create a html file description list
+            filesDescription = new ArrayList<DocFileInfo>();
+
+            it = htmlFiles.iterator();
+
+            // parse each html files
+            while (it.hasNext()) {
+                File ftemp = (File) it.next();
+                //tempMap.put(key, value);
+                //The HTML file information are added in the list of FileInfoObject
+                DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, this.indexerLanguage));
+
+                ftemp = docFileInfoTemp.getFullpath();
+                String stemp = ftemp.toString();
+                int i = stemp.indexOf(projectDir);
+                if (i != 0) {
+                    System.out.println("the documentation root does not match with the documentation input!");
+                    return;
+                }
+                int ad = 1;
+                if (stemp.equals(projectDir)) ad = 0;
+                stemp = stemp.substring(i + projectDir.length() + ad);  //i is redundant (i==0 always)
+                ftemp = new File(stemp);
+                docFileInfoTemp.setFullpath(ftemp);
+
+                filesDescription.add(docFileInfoTemp);
+            }
+            /*remove empty strings from the map*/
+            if (tempDico.containsKey("")) {
+                tempDico.remove("");
+            }
+            // write the index files
+            if (tempDico.isEmpty()) {
+                System.out.println(txt_no_words_gathered + " " + inputDir + ".");
+                return;
+            }
+
+//            WriteJSFiles.WriteIndex(outputDir.concat(File.separator).concat(indexName), tempDico);
+            WriteJSFiles.WriteIndex(outputDir.concat(File.separator).concat(indexName), tempDico, indexerLanguage);
+
+            // write the html list file with title and shortdesc
+            //create the list of the existing html files (index starts at 0)
+            WriteJSFiles.WriteHTMLInfoList(outputDir.concat(File.separator).concat(htmlInfoList), filesDescription);
+
+            //perf measurement
+            Date dateEnd = new Date();
+            long diff = dateEnd.getTime() - dateStart.getTime();
+            if (diff < 1000)
+                System.out.println("Delay = " + diff + " milliseconds");
+            else
+                System.out.println("Delay = " + diff / 1000 + " seconds");
+        } else {
+            System.out.println(txt_wrong_dita_basedir);
+            return;
+        }
+    }
+
+    /**
+     * Prints the usage information for this class to <code>System.out</code>.
+     */
+    private static void DisplayHelp() {
+        String lSep = System.getProperty("line.separator");
+        StringBuffer msg = new StringBuffer();
+        msg.append("USAGE:" + lSep);
+        msg.append("   java -classpath TesterIndexer inputDir outputDir projectDir" + lSep);
+        msg.append("with:" + lSep);
+        msg.append("   inputDir (mandatory) :  specify the html files ' directory to index" + lSep);
+        msg.append("   outputDir (optional) : specify where to output the index files" + lSep);
+        msg.append("   projectDir (optional) : specify the root of the documentation directory" + lSep);
+        msg.append("Example:" + lSep);
+        msg.append("   java -classpath TesterIndexer /home/$USER/DITA/doc" + lSep);
+        msg.append("Example 2:" + lSep);
+        msg.append("   java -classpath TesterIndexer /home/$USER/DITA/doc/customer/concepts /home/$USER/temp/search /home/$USER/DITA/doc/" + lSep);
+        System.out.println(msg.toString());
+    }
+
+    private int RetrieveCleanUpProps() {
+
+        // Files for punctuation (only one for now)
+        String[] punctuationFiles = new String[]{"punctuation.props"};
+        FileInputStream input;
+        String tempStr;
+        File ftemp;
+        Collection c = new ArrayList<String>();
+
+        // Get the list of the props file containing the words to remove (not the punctuation)
+        DirList props = new DirList(inputDir, "^(?!(punctuation)).*\\.props$", 1);
+        ArrayList<File> wordsList = props.getListFiles();
+//             System.out.println("props files:"+wordsList);
+        //TODO all properties are taken to a single arraylist. does it ok?.
+        Properties enProps = new Properties();
+        String propsDir = inputDir.getPath().concat(File.separator).concat(searchdir);
+
+        // Init the lists which will contain the words and chars to remove
+        cleanUpStrings = new ArrayList<String>();
+        cleanUpChars = new ArrayList<String>();
+
+        try {
+            // Retrieve words to remove
+            for (File aWordsList : wordsList) {
+                ftemp = aWordsList;
+                if (ftemp.exists()) {
+                    enProps.load(input = new FileInputStream(ftemp.getAbsolutePath()));
+                    input.close();
+                    c = enProps.values();
+                    cleanUpStrings.addAll(c);
+                    enProps.clear();
+                }
+            }
+
+            // Retrieve char to remove (punctuation for ex.)
+            for (String punctuationFile : punctuationFiles) {
+                tempStr = propsDir.concat(File.separator).concat(punctuationFile);
+                ftemp = new File(tempStr);
+                if (ftemp.exists()) {
+                    enProps.load(input = new FileInputStream(tempStr));
+                    input.close();
+                    c = enProps.values();
+                    cleanUpChars.addAll(c);
+                    enProps.clear();
+                }
+            }
+        }
+        catch (IOException e) {
+            e.printStackTrace();
+            return 1;
+        }
+        return 0;
+    }
+
+}
diff --git a/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java b/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java

index d07eece72d5790e5af54de5c341b9df3bfd6bf44..373e89d01d958e85fc47295850abafdd1c5401f4 100755 (executable)
--- a/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java
+++ b/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java
@@ -1,3 +1,4 @@
+/*\r
  package com.nexwave.nquindexer;\r
  \r
  import java.io.File;\r
@@ -18,6 +19,7 @@ import org.apache.tools.ant.Task;
  import com.nexwave.nsidita.DirList;\r
  import com.nexwave.nsidita.DocFileInfo;\r
  \r
+*/\r
  /**\r
   * Indexer ant task.\r
   * \r
@@ -25,7 +27,8 @@ import com.nexwave.nsidita.DocFileInfo;
   * \r
   * @author N. Quaine\r
   * @author Kasun Gajasinghe <http://kasunbg.blogspot.com>\r
- */\r
+ *//*\r
+\r
  public class IndexerTask extends Task {\r
  \r
         // messages\r
@@ -36,7 +39,7 @@ public class IndexerTask extends Task {
         private String txt_no_relative_files_found= "No relative html files calculated.";\r
         private String txt_no_words_gathered= "No words have been indexed in";\r
         private String txt_no_html_files="No HTML Files found in";\r
-       private String txt_no_args="No argument given: you must provide an htmldir to the IndexerTask";\r
+       private String txt_no_args="No argument given: you must provide an htmlDir to the IndexerTask";\r
         \r
         //working directories\r
         private String searchdir = "search";\r
@@ -45,7 +48,7 @@ public class IndexerTask extends Task {
         private String projectDir = null;\r
  \r
         // ANT parameters\r
-       private String htmldir=null;\r
+       private String htmlDir=null;\r
      public static String indexerLanguage="en";\r
  \r
      //supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK,\r
@@ -64,18 +67,22 @@ public class IndexerTask extends Task {
         public IndexerTask() {\r
                 super();\r
         }\r
-       /** The setter for the "htmldir" attribute (parameter of the task)\r
+       */\r
+/** The setter for the "htmlDir" attribute (parameter of the task)\r
          * @param htmldir\r
          * @throws InterruptedException \r
-        */\r
-    public void setHtmldir(String htmldir) {\r
-        this.htmldir = htmldir;\r
+        *//*\r
+\r
+    public void setHtmlDir(String htmlDir) {\r
+        this.htmlDir = htmlDir;\r
      }\r
  \r
-     /**\r
+     */\r
+/**\r
       * Set the extension in which html files are generated\r
       * @param htmlExtension The extension in wich html files are generated\r
-     */\r
+     *//*\r
+\r
      public void setHtmlextension(String htmlExtension) {\r
                 this.htmlExtension = htmlExtension;\r
                 //Trim the starting "."\r
@@ -84,11 +91,13 @@ public class IndexerTask extends Task {
                 }\r
         }\r
  \r
-    /**\r
+    */\r
+/**\r
       * setter for "indexerLanguage" attribute from ANT\r
       * @param indexerLanguage language for the search indexer. Used to differerentiate which stemmer to be used.\r
       * @throws InterruptedException for ant\r
-     */\r
+     *//*\r
+\r
      public void setIndexerLanguage(String indexerLanguage){\r
          if(indexerLanguage !=null && !"".equals(indexerLanguage)) {\r
              int temp = indexerLanguage.indexOf('_');\r
@@ -114,9 +123,11 @@ public class IndexerTask extends Task {
          } \r
      }\r
         \r
-       /**\r
+       */\r
+/**\r
          * Implementation of the execute function (Task interface)\r
-        */\r
+        *//*\r
+\r
         public void execute() throws BuildException {\r
          try{\r
              //Use Xerces as the parser. Does not support Saxon6.5.5 parser \r
@@ -146,12 +157,12 @@ public class IndexerTask extends Task {
                 //timing\r
                 Date dateStart = new Date();\r
                 \r
-               if (htmldir == null) {\r
+               if (htmlDir == null) {\r
                         System.out.println(txt_no_args + ".");\r
                         return;\r
                 }\r
                 // Init input directory\r
-               inputDir = new File(htmldir);\r
+               inputDir = new File(htmlDir);\r
  \r
                 // Begin of init\r
                 // check if inputdir initialized\r
@@ -252,7 +263,9 @@ public class IndexerTask extends Task {
                                 \r
                                 filesDescription.add(docFileInfoTemp);\r
                         }\r
-                       /*remove empty strings from the map*/\r
+                       */\r
+/*remove empty strings from the map*//*\r
+\r
                         if (tempDico.containsKey("")) {\r
                                 tempDico.remove("");\r
                         }\r
@@ -281,9 +294,11 @@ public class IndexerTask extends Task {
                 }\r
         }\r
         \r
-       /**\r
+       */\r
+/**\r
       * Prints the usage information for this class to <code>System.out</code>.\r
-     */\r
+     *//*\r
+\r
      private static void DisplayHelp() {\r
         String lSep = System.getProperty("line.separator");\r
          StringBuffer msg = new StringBuffer();\r
@@ -354,3 +369,4 @@ public class IndexerTask extends Task {
      }\r
  \r
  }\r
+*/\r
diff --git a/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java b/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java

index 607a2457e9fc80e25ebfb4662cb903f0a48b3ae6..1aff3e93329450fbccaebc3596904fbc78ca71df 100755 (executable)
--- a/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java
+++ b/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java
@@ -1,5 +1,7 @@
+/*\r
  package com.nexwave.nquindexer;\r
  \r
+*/\r
  /**\r
   * For running tests with the indexertask.\r
   * \r
@@ -7,17 +9,20 @@ package com.nexwave.nquindexer;
   * \r
   * @author N. Quaine\r
   * @author Kasun Gajasinghe\r
- */\r
+ *//*\r
+\r
      public class TesterIndexer {\r
         public static IndexerTask IT = null; \r
-       /**\r
+       */\r
+/**\r
          * @param args\r
          * @throws InterruptedException \r
-        */\r
+        *//*\r
+\r
         public static void main(String[] args) throws InterruptedException {\r
          if (args.length != 0) {\r
              IT = new IndexerTask();\r
-            IT.setHtmldir(args[0]);\r
+            IT.setHtmlDir(args[0]);\r
              IT.setIndexerLanguage(args[1]);\r
              IT.execute();\r
          } else {\r
@@ -27,7 +32,7 @@ package com.nexwave.nquindexer;
              String dir = "../doc/content";\r
              String lang = "en";\r
              IT = new IndexerTask();\r
-            IT.setHtmldir(dir);\r
+            IT.setHtmlDir(dir);\r
              IT.setIndexerLanguage(lang);\r
              IT.execute();\r
          }\r
@@ -36,3 +41,4 @@ package com.nexwave.nquindexer;
         \r
  }\r
  \r
+*/\r
diff --git a/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java b/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java

index fce30bb5f9c1557564eeddac3443df379fac9857..9f83a577e1a4733f49a11b25ecda432b567082a7 100755 (executable)
--- a/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java
+++ b/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java
@@ -13,163 +13,247 @@ import java.util.Map;
  import java.util.TreeSet;\r
  \r
  import com.nexwave.nsidita.DocFileInfo;\r
+\r
  /**\r
   * Outputs the js files with:\r
   * - the list of html files and their description\r
   * - the words retrieved from the html files and their location\r
- * \r
- * @version 2.0 2010-08-13\r
- * \r
+ *\r
   * @author N. Quaine\r
   * @author Kasun Gajasinghe\r
+ * @version 2.0 2010-08-13\r
   */\r
  public class WriteJSFiles {\r
-       \r
-       private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding.";\r
-       private static String txt_indices_location = "The created index files are located in ";\r
-       \r
-       /** Create a javascript array listing the html files with their paths relative to the project root\r
-        * @param fileO path and name of the file in which to output the list of html files  \r
-        * @param list of the html files, relative to the doc root directory  \r
-        */\r
-       public static void WriteHTMLList (String fileO,ArrayList<String> list) {\r
-               int i = 0;\r
-               Iterator it;\r
-               \r
-               if (list == null) {\r
-                       return;\r
-               }\r
-               if (fileO == null) {\r
-                       return;\r
-               }\r
-               it = list.iterator ( ) ;\r
-               \r
-               try {\r
-                       // open a outputstream, here a file\r
-                       OutputStream fOut= new FileOutputStream(fileO);\r
-                       OutputStream bout= new BufferedOutputStream(fOut);\r
-               OutputStreamWriter out  = new OutputStreamWriter(bout, "UTF-8");\r
-               \r
-               /*fl : file list*/\r
-               out.write("//List of files which are indexed.\n");\r
-               out.write("fl = new Array();\n");\r
-               String temp;\r
-               while ( it.hasNext ( ) ) {\r
-                       temp = (String)it.next();\r
-                       //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/'));\r
-                          out.write("fl[\""+i+"\"]"+"= \""+temp.replace(File.separatorChar, '/')+"\";\n");\r
-                          i++;\r
-                       }\r
-               \r
-               out.flush();  // Don't forget to flush!\r
-               out.close();\r
+\r
+    private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding.";\r
+    private static String txt_indices_location = "The created index files are located in ";\r
+\r
+    /**\r
+     * Create a javascript array listing the html files with their paths relative to the project root\r
+     *\r
+     * @param fileO path and name of the file in which to output the list of html files\r
+     * @param list  of the html files, relative to the doc root directory\r
+     */\r
+    public static void WriteHTMLList(String fileO, ArrayList<String> list) {\r
+        int i = 0;\r
+        Iterator it;\r
+\r
+        if (list == null) {\r
+            return;\r
+        }\r
+        if (fileO == null) {\r
+            return;\r
+        }\r
+        it = list.iterator();\r
+\r
+        try {\r
+            // open a outputstream, here a file\r
+            OutputStream fOut = new FileOutputStream(fileO);\r
+            OutputStream bout = new BufferedOutputStream(fOut);\r
+            OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8");\r
+\r
+            /*fl : file list*/\r
+            out.write("//List of files which are indexed.\n");\r
+            out.write("fl = new Array();\n");\r
+            String temp;\r
+            while (it.hasNext()) {\r
+                temp = (String) it.next();\r
+                //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/'));\r
+                out.write("fl[\"" + i + "\"]" + "= \"" + temp.replace(File.separatorChar, '/') + "\";\n");\r
+                i++;\r
+            }\r
+\r
+            out.flush();  // Don't forget to flush!\r
+            out.close();\r
  //             System.out.println("the array of html is in " + fileO);\r
  \r
-               }\r
-           catch (UnsupportedEncodingException e) {\r
-                 System.out.println(txt_VM_encoding_not_supported);\r
-               }\r
-               catch (IOException e) {\r
-                 System.out.println(e.getMessage());        \r
-           }\r
-                               \r
-       }\r
-\r
-       /** Create a javascript array listing the html files with \r
-        * their paths relative to project root, their titles and shortdescs\r
-        * @param fileO path and name of the file in which to output the list of html files  \r
-        * @param list of the html files, relative to the doc root directory  \r
-        */\r
-       public static void WriteHTMLInfoList (String fileO,ArrayList<DocFileInfo> list) {\r
-               int i = 0;\r
-               Iterator it = null;\r
-               \r
-               if (list == null) {\r
-                       return;\r
-               }\r
-               if (fileO == null) {\r
-                       return;\r
-               }\r
-               it = list.iterator ( ) ;\r
-               try {\r
-                       // open a outputstream, here a file\r
-                       OutputStream fOut= new FileOutputStream(fileO);\r
-                       // open a buffer output stream\r
-                       OutputStream bout= new BufferedOutputStream(fOut);\r
-               OutputStreamWriter out \r
-                = new OutputStreamWriter(bout, "UTF-8");\r
-               \r
-               /*fil : file list*/\r
-               out.write("fil = new Array();\n");\r
-               \r
-               DocFileInfo tempInfo;\r
-               String tempPath;\r
-               String tempTitle;\r
-               String tempShortdesc;\r
-               while ( it.hasNext ( ) ) {\r
-                       // Retrieve file information: path, title and shortdesc.\r
-                       tempInfo = (DocFileInfo)it.next();\r
-                       tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/');\r
-                       tempTitle = tempInfo.getTitle();\r
-                       tempShortdesc = tempInfo.getShortdesc();\r
-                       //Remove unwanted white char\r
-                       if (tempTitle != null ) {\r
-                                       tempTitle = tempTitle.replaceAll("\\s+", " ");\r
-                               tempTitle = tempTitle.replaceAll("['�\"]", " ");\r
-                               }\r
-                       if (tempShortdesc != null ) {\r
-                               tempShortdesc = tempShortdesc.replaceAll("\\s+", " ");\r
-                               tempShortdesc = tempShortdesc.replaceAll("['�\"]", " ");\r
-                       }\r
-                       //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc);\r
-                          out.write("fil[\""+i+"\"]"+"= \""+tempPath+"@@@"+tempTitle+"@@@"+tempShortdesc+"\";\n");\r
-                          i++;\r
-                       }\r
-               \r
-               out.flush();  // Don't forget to flush!\r
-               out.close();\r
-\r
-               }\r
-           catch (UnsupportedEncodingException e) {\r
-                 System.out.println(txt_VM_encoding_not_supported);\r
-               }\r
-               catch (IOException e) {\r
-                 System.out.println(e.getMessage());        \r
-           }\r
-                               \r
-       }\r
-\r
-       /** Create javascript index files alphabetically.\r
-        * @param fileOutStr contains the path and the suffix of the index files to create. \r
-        * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...  \r
-        * @param indexMap its keys are the indexed words and\r
-        *  its values are the list of the files which contain the word.  \r
-        */\r
-       public static void WriteIndex (String fileOutStr, Map<String, ?> indexMap) {\r
-               OutputStreamWriter out;\r
-               OutputStream bout;\r
-               OutputStream fOut;\r
-               String tstr;            \r
-               \r
-               // check arguments\r
-               if (indexMap == null || fileOutStr ==null) {\r
-                       return;\r
-               }\r
-\r
-               // Collect the key of the index map\r
-               TreeSet<String> sortedKeys = new TreeSet<String>();\r
-               sortedKeys.addAll(indexMap.keySet());\r
-               Iterator keyIt = sortedKeys.iterator();\r
-               tstr = (String)keyIt.next();\r
-               \r
-               File fileOut= new File(fileOutStr);\r
+        }\r
+        catch (UnsupportedEncodingException e) {\r
+            System.out.println(txt_VM_encoding_not_supported);\r
+        }\r
+        catch (IOException e) {\r
+            System.out.println(e.getMessage());\r
+        }\r
+\r
+    }\r
+\r
+    /**\r
+     * Create a javascript array listing the html files with\r
+     * their paths relative to project root, their titles and shortdescs\r
+     *\r
+     * @param fileO path and name of the file in which to output the list of html files\r
+     * @param list  of the html files, relative to the doc root directory\r
+     */\r
+    public static void WriteHTMLInfoList(String fileO, ArrayList<DocFileInfo> list) {\r
+        int i = 0;\r
+        Iterator it = null;\r
+\r
+        if (list == null) {\r
+            return;\r
+        }\r
+        if (fileO == null) {\r
+            return;\r
+        }\r
+        it = list.iterator();\r
+        try {\r
+            // open a outputstream, here a file\r
+            OutputStream fOut = new FileOutputStream(fileO);\r
+            // open a buffer output stream\r
+            OutputStream bout = new BufferedOutputStream(fOut);\r
+            OutputStreamWriter out\r
+                    = new OutputStreamWriter(bout, "UTF-8");\r
+\r
+            /*fil : file list*/\r
+            out.write("fil = new Array();\n");\r
+\r
+            DocFileInfo tempInfo;\r
+            String tempPath;\r
+            String tempTitle;\r
+            String tempShortdesc;\r
+            while (it.hasNext()) {\r
+                // Retrieve file information: path, title and shortdesc.\r
+                tempInfo = (DocFileInfo) it.next();\r
+                tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/');\r
+                tempTitle = tempInfo.getTitle();\r
+                tempShortdesc = tempInfo.getShortdesc();\r
+                //Remove unwanted white char\r
+                if (tempTitle != null) {\r
+                    tempTitle = tempTitle.replaceAll("\\s+", " ");\r
+                    tempTitle = tempTitle.replaceAll("['�\"]", " ");\r
+                }\r
+                if (tempShortdesc != null) {\r
+                    tempShortdesc = tempShortdesc.replaceAll("\\s+", " ");\r
+                    tempShortdesc = tempShortdesc.replaceAll("['�\"]", " ");\r
+                }\r
+                //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc);\r
+                out.write("fil[\"" + i + "\"]" + "= \"" + tempPath + "@@@" + tempTitle + "@@@" + tempShortdesc + "\";\n");\r
+                i++;\r
+            }\r
+\r
+            out.flush();  // Don't forget to flush!\r
+            out.close();\r
+\r
+        }\r
+        catch (UnsupportedEncodingException e) {\r
+            System.out.println(txt_VM_encoding_not_supported);\r
+        }\r
+        catch (IOException e) {\r
+            System.out.println(e.getMessage());\r
+        }\r
+\r
+    }\r
+\r
+    /**\r
+     * Create javascript index files alphabetically.\r
+     *\r
+     * @param fileOutStr      contains the path and the suffix of the index files to create.\r
+     *                        The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...\r
+     * @param indexMap        its keys are the indexed words and\r
+     *                        its values are the list of the files which contain the word.\r
+     * @param indexerLanguage The language of the content that gets indexed\r
+     */\r
+    public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) {\r
+        OutputStreamWriter out;\r
+        OutputStream bout;\r
+        OutputStream fOut;\r
+        String tstr;\r
+\r
+        // check arguments\r
+        if (indexMap == null || fileOutStr == null) {\r
+            return;\r
+        }\r
+\r
+        // Collect the key of the index map\r
+        TreeSet<String> sortedKeys = new TreeSet<String>();\r
+        sortedKeys.addAll(indexMap.keySet());\r
+        Iterator keyIt = sortedKeys.iterator();\r
+        tstr = (String) keyIt.next();\r
+\r
+        File fileOut = new File(fileOutStr);\r
+\r
+        /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js\r
+                * Index will be distributed evenly in these three files.\r
+                * tstr is the current key\r
+                * keyIt is the iterator of the key set\r
+                * */\r
+        int indexSize = sortedKeys.size();\r
+        for (int i = 1; i <= 3; i++) {\r
+            try {\r
+                // open a outputstream, here a file\r
+                fOut = new FileOutputStream(fileOut.getParent() + File.separator + "index-" + i + fileOut.getName());\r
+                bout = new BufferedOutputStream(fOut);\r
+                out = new OutputStreamWriter(bout, "UTF-8");\r
+\r
+                try {\r
+                    /* Populate a javascript hashmap:\r
+                      The key is a word to look for in the index,\r
+                      The value is the numbers of the files in which the word exists.\r
+                      Example: w["key"]="file1,file2,file3";*/\r
+                    int count = 0;\r
+                    if (i == 1)\r
+                        out.write("var indexerLanguage=\"" + indexerLanguage + "\";\n");\r
+                    out.write("//Auto generated index for searching.\n");\r
+                    while (keyIt.hasNext()) {        //&& (tempLetter == tstr.charAt(0))\r
+                        out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");\r
+                        tstr = (String) keyIt.next();\r
+                        count++;\r
+                        if (indexSize / count < 3) {\r
+                            break;\r
+                        }\r
+                    }\r
+                    out.write("\n");\r
+                    out.flush();  // Don't forget to flush!\r
+                    out.close();\r
+                }\r
+                catch (UnsupportedEncodingException e) {\r
+                    System.out.println(txt_VM_encoding_not_supported);\r
+                }\r
+            }\r
+            catch (IOException e) {\r
+                System.out.println(e.getMessage());\r
+            }\r
+        }\r
+        System.out.println(txt_indices_location + fileOutStr);\r
+    }\r
+\r
+\r
+    /**\r
+     * Create javascript index files alphabetically.\r
+     *\r
+     * @deprecated replaced by WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) {   \r
+     *\r
+     * @param fileOutStr contains the path and the suffix of the index files to create.\r
+     *                   The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...\r
+     * @param indexMap   its keys are the indexed words and\r
+     *                   its values are the list of the files which contain the word.\r
+     */\r
+\r
+\r
+    public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap) {\r
+        OutputStreamWriter out;\r
+        OutputStream bout;\r
+        OutputStream fOut;\r
+        String tstr;\r
+\r
+        // check arguments\r
+        if (indexMap == null || fileOutStr == null) {\r
+            return;\r
+        }\r
+\r
+        // Collect the key of the index map\r
+        TreeSet<String> sortedKeys = new TreeSet<String>();\r
+        sortedKeys.addAll(indexMap.keySet());\r
+        Iterator keyIt = sortedKeys.iterator();\r
+        tstr = (String) keyIt.next();\r
+\r
+        File fileOut = new File(fileOutStr);\r
  \r
          /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js\r
                  * Index will be distributed evenly in these three files. \r
                  * tstr is the current key\r
                  * keyIt is the iterator of the key set\r
                  * */\r
-        int indexSize = sortedKeys.size(); \r
+        int indexSize = sortedKeys.size();\r
          for (int i = 1; i <= 3; i++) {\r
              try {\r
                  // open a outputstream, here a file\r
@@ -183,17 +267,17 @@ public class WriteJSFiles {
                        The value is the numbers of the files in which the word exists.\r
                        Example: w["key"]="file1,file2,file3";*/\r
                      int count = 0;\r
-                    if(i==1)\r
-                        out.write("var indexerLanguage=\""+IndexerTask.indexerLanguage+"\";\n");\r
+//                    if (i == 1)\r
+//                        out.write("var indexerLanguage=\"" + IndexerTask.indexerLanguage + "\";\n");\r
                      out.write("//Auto generated index for searching.\n");\r
                      while (keyIt.hasNext()) {        //&& (tempLetter == tstr.charAt(0)) \r
                          out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");\r
                          tstr = (String) keyIt.next();\r
                          count++;\r
-                        if (indexSize / count < 3){\r
+                        if (indexSize / count < 3) {\r
                              break;\r
                          }\r
-                    } \r
+                    }\r
                      out.write("\n");\r
                      out.flush();  // Don't forget to flush!\r
                      out.close();\r
@@ -205,7 +289,7 @@ public class WriteJSFiles {
              catch (IOException e) {\r
                  System.out.println(e.getMessage());\r
              }\r
-        } \r
-           System.out.println(txt_indices_location + fileOutStr);\r
-       }\r
+        }\r
+        System.out.println(txt_indices_location + fileOutStr);\r
+    }\r
  }\r
author	Kasun Gajasinghe <kasunbg@gmail.com>
	Sat, 26 Mar 2011 09:52:27 +0000 (09:52 +0000)
committer	Kasun Gajasinghe <kasunbg@gmail.com>
	Sat, 26 Mar 2011 09:52:27 +0000 (09:52 +0000)
xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java	[new file with mode: 0644]	patch \| blob
xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java		patch \| blob \| history
xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java		patch \| blob \| history
xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java		patch \| blob \| history