From: Kasun Gajasinghe Date: Sat, 26 Mar 2011 09:52:27 +0000 (+0000) Subject: Removed the WebHelpIndexer's ANT dependency. It's possible to use it as a standalone... X-Git-Tag: release/1.79.1~6^2~769 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=96902680f5079606d1635eb39df28ee362e17216;p=docbook-dsssl Removed the WebHelpIndexer's ANT dependency. It's possible to use it as a standalone version now. ex: java -cp webhelpindexer.jar:lib/lucene-core-3.0.0.jar:lib/lucene-analyzers-3.0.0.jar:/usr/share/java/xercesImpl.jar com.nexwave.nquindexer.IndexerMain ../x$ discussion: http://lists.oasis-open.org/archives/docbook-apps/201102/msg00079.html --- diff --git a/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java b/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java new file mode 100644 index 000000000..04887c516 --- /dev/null +++ b/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java @@ -0,0 +1,404 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.nexwave.nquindexer; + +import com.nexwave.nsidita.DirList; +import com.nexwave.nsidita.DocFileInfo; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.*; + +/** + * User: Kasun Gajasinghe, University of Moratuwa, http://kasunbg.blogspot.com + * Date: Feb 10, 2011 + */ + +public class IndexerMain { + + // messages + private String txt_no_inputdir = "Input directory not found:"; + private String txt_cannot_create_outputdir = "Cannot create output search directory."; + private String txt_no_files_found = "No html files found."; + private String txt_wrong_dita_basedir = "ERROR: Parser initialization failed. Wrong dita base dir"; + private String txt_no_relative_files_found = "No relative html files calculated."; + private String txt_no_words_gathered = "No words have been indexed in"; + private String txt_no_html_files = "No HTML Files found in"; + private String txt_no_args = "No argument given: you must provide an htmlDir to the IndexerMain"; + + private static String txt_no_lang_specified ="Language of the content is not specified. Defaults to English."; + + //working directories + private String searchdir = "search"; + private File inputDir = null; + private String outputDir = null; + private String projectDir = null; + + // ANT parameters + public String htmlDir = null; + public String indexerLanguage = "en"; + + //supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK, + // as stemmers doesn't find a difference between them. + private String[] supportedLanguages = {"en", "de", "fr", "zh", "ja", "ko"}; //currently extended support available for + // English, German, French and CJK (Chinese [zh], Japanese [ja], Korean [ko]) languages only. + + // Indexing features: words to remove + private ArrayList cleanUpStrings = null; + private ArrayList cleanUpChars = null; + + //Html extension + private String htmlExtension = "html"; + + // Constructors + public IndexerMain(String htmlDir, String indexerLanguage) { + super(); + setHtmlDir(htmlDir); + setIndexerLanguage(indexerLanguage); + } + + /** + * The content language defaults to English "en" + * @param htmlDir The directory where html files resides. + */ + public IndexerMain(String htmlDir) { + super(); + setHtmlDir(htmlDir); + setIndexerLanguage("en"); + } + + /** + * The setter for the "htmlDir" attribute (parameter of the task) + * + * @param htmlDir + */ + public void setHtmlDir(String htmlDir) { + this.htmlDir = htmlDir; + } + + /** + * Set the extension in which html files are generated + * + * @param htmlExtension The extension in which html files are generated + */ + public void setHtmlextension(String htmlExtension) { + this.htmlExtension = htmlExtension; + //Trim the starting "." + if (this.htmlExtension.startsWith(".")) { + this.htmlExtension = this.htmlExtension.substring(1); + } + } + + /** + * setter for "indexerLanguage" attribute from ANT + * + * @param indexerLanguage language for the search indexer. Used to differentiate which stemmer to be used. + */ + public void setIndexerLanguage(String indexerLanguage) { + if (indexerLanguage != null && !"".equals(indexerLanguage)) { + int temp = indexerLanguage.indexOf('_'); + if (temp != -1) { + indexerLanguage = indexerLanguage.substring(0, temp); + } + int i = 0; + for (; i < supportedLanguages.length; i++) { + if (indexerLanguage.equals(supportedLanguages[i])) { + this.indexerLanguage = supportedLanguages[i]; + break; + } + } + + //if not in supported language list, + if (i >= supportedLanguages.length) { +// System.out.println("The given language, \""+indexerLanguage+"\", does not have extensive support for " + +// "searching. Check documentation for details. "); + this.indexerLanguage = indexerLanguage; + } + } else { + this.indexerLanguage = "@@"; //fail-safe mechanism, This vm should not reach this point. + } + } + + /** + * com.nexwave.nquindexer.IndexerMain + * The main class without Ant dependencies. + * This can be used as a standalone jar. + * + * @param args need two parameters for this array. htmlDirectory indexerLanguage + * If only one parameter is there (htmlDir), indexerLanguage defaults to english + */ + public static void main(String[] args) { + + IndexerMain indexer; + if (args.length == 1) { + System.out.println(txt_no_lang_specified); + indexer = new IndexerMain(args[0]); + } else if (args.length >= 2) { + + indexer = new IndexerMain(args[0], args[1]); + } else { + throw new ArrayIndexOutOfBoundsException("Please specify the parameters htmlDirectory and (optional) " + + "indexerLanguage"); + } + + indexer.execute(); + + } + + + /** + * Implementation of the execute function (Task interface) + */ + public void execute() { + try { + //Use Xerces as the parser. Does not support Saxon6.5.5 parser + System.setProperty("org.xml.sax.driver", "org.apache.xerces.parsers.SAXParser"); + System.setProperty("javax.xml.parsers.SAXParserFactory", "org.apache.xerces.jaxp.SAXParserFactoryImpl"); +// System.setProperty("org.xml.sax.driver", "com.icl.saxon.aelfred.SAXDriver"); +// System.setProperty("javax.xml.parsers.SAXParserFactory", "com.icl.saxon.aelfred.SAXParserFactoryImpl"); + } catch (SecurityException se) { + System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " + + "is not in your CLASSPATH."); + } catch (Exception e) { + System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " + + "is not in your CLASSPATH"); + } + + ArrayList filesDescription = null; // list of information about the topic files + ArrayList htmlFiles = null; // topic files listed in the given directory + ArrayList htmlFilesPathRel = null; + Map tempDico = new HashMap(); + Iterator it; + + //File name initialization + String htmlList = "htmlFileList.js"; + String htmlInfoList = "htmlFileInfoList.js"; + String indexName = ".js"; + + //timing + Date dateStart = new Date(); + + if (htmlDir == null) { + System.out.println(txt_no_args + "."); + return; + } + // Init input directory + inputDir = new File(htmlDir); + + // Begin of init + // check if inputdir initialized + if (inputDir == null) { + DisplayHelp(); + return; + } + + // check if inputdir exists + if (!inputDir.exists()) { + System.out.println(txt_no_inputdir + " " + inputDir + "."); + return; + } + + // check if outputdir defined + if (outputDir == null) { + //set the output directory: path= {inputDir}/search + outputDir = inputDir.getPath().concat(File.separator).concat(searchdir); + } + + // check if outputdir exists + File tempfile = new File(outputDir); + if (!tempfile.exists()) { + boolean b = (new File(outputDir)).mkdir(); + if (!b) { + System.out.println(txt_cannot_create_outputdir + " " + outputDir + "."); + return; + } + } + + // check if projdir is defined + if (projectDir == null) { + projectDir = inputDir.getPath(); + } + //end of init + + + // Get the list of all html files but the tocs, covers and indexes + DirList nsiDoc = new DirList(inputDir, "^.*\\." + htmlExtension + "?$", 1); + htmlFiles = nsiDoc.getListFiles(); + // Check if found html files + if (htmlFiles.isEmpty()) { + System.out.println(txt_no_html_files + " " + inputDir + "."); + return; + } + // Get the list of all html files with relative paths + htmlFilesPathRel = nsiDoc.getListFilesRelTo(projectDir); + + if (htmlFiles == null) { + System.out.println(txt_no_files_found); + return; + } else if (htmlFilesPathRel == null) { + System.out.println(txt_no_relative_files_found); + return; + } + + // Create the list of the existing html files (index starts at 0) + WriteJSFiles.WriteHTMLList(outputDir.concat(File.separator).concat(htmlList), htmlFilesPathRel); + + // Parse each html file to retrieve the words: + // ------------------------------------------ + + // Retrieve the clean-up properties for indexing + RetrieveCleanUpProps(); + // System.out.print("clean"+" " +cleanUpStrings); + + //create a default handler + //SaxHTMLIndex spe = new SaxHTMLIndex (); // do not use clean-up props files + //SaxHTMLIndex spe = new SaxHTMLIndex (cleanUpStrings); // use clean-up props files + SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files + + if (spe.init(tempDico) == 0) { + + //create a html file description list + filesDescription = new ArrayList(); + + it = htmlFiles.iterator(); + + // parse each html files + while (it.hasNext()) { + File ftemp = (File) it.next(); + //tempMap.put(key, value); + //The HTML file information are added in the list of FileInfoObject + DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, this.indexerLanguage)); + + ftemp = docFileInfoTemp.getFullpath(); + String stemp = ftemp.toString(); + int i = stemp.indexOf(projectDir); + if (i != 0) { + System.out.println("the documentation root does not match with the documentation input!"); + return; + } + int ad = 1; + if (stemp.equals(projectDir)) ad = 0; + stemp = stemp.substring(i + projectDir.length() + ad); //i is redundant (i==0 always) + ftemp = new File(stemp); + docFileInfoTemp.setFullpath(ftemp); + + filesDescription.add(docFileInfoTemp); + } + /*remove empty strings from the map*/ + if (tempDico.containsKey("")) { + tempDico.remove(""); + } + // write the index files + if (tempDico.isEmpty()) { + System.out.println(txt_no_words_gathered + " " + inputDir + "."); + return; + } + +// WriteJSFiles.WriteIndex(outputDir.concat(File.separator).concat(indexName), tempDico); + WriteJSFiles.WriteIndex(outputDir.concat(File.separator).concat(indexName), tempDico, indexerLanguage); + + // write the html list file with title and shortdesc + //create the list of the existing html files (index starts at 0) + WriteJSFiles.WriteHTMLInfoList(outputDir.concat(File.separator).concat(htmlInfoList), filesDescription); + + //perf measurement + Date dateEnd = new Date(); + long diff = dateEnd.getTime() - dateStart.getTime(); + if (diff < 1000) + System.out.println("Delay = " + diff + " milliseconds"); + else + System.out.println("Delay = " + diff / 1000 + " seconds"); + } else { + System.out.println(txt_wrong_dita_basedir); + return; + } + } + + /** + * Prints the usage information for this class to System.out. + */ + private static void DisplayHelp() { + String lSep = System.getProperty("line.separator"); + StringBuffer msg = new StringBuffer(); + msg.append("USAGE:" + lSep); + msg.append(" java -classpath TesterIndexer inputDir outputDir projectDir" + lSep); + msg.append("with:" + lSep); + msg.append(" inputDir (mandatory) : specify the html files ' directory to index" + lSep); + msg.append(" outputDir (optional) : specify where to output the index files" + lSep); + msg.append(" projectDir (optional) : specify the root of the documentation directory" + lSep); + msg.append("Example:" + lSep); + msg.append(" java -classpath TesterIndexer /home/$USER/DITA/doc" + lSep); + msg.append("Example 2:" + lSep); + msg.append(" java -classpath TesterIndexer /home/$USER/DITA/doc/customer/concepts /home/$USER/temp/search /home/$USER/DITA/doc/" + lSep); + System.out.println(msg.toString()); + } + + private int RetrieveCleanUpProps() { + + // Files for punctuation (only one for now) + String[] punctuationFiles = new String[]{"punctuation.props"}; + FileInputStream input; + String tempStr; + File ftemp; + Collection c = new ArrayList(); + + // Get the list of the props file containing the words to remove (not the punctuation) + DirList props = new DirList(inputDir, "^(?!(punctuation)).*\\.props$", 1); + ArrayList wordsList = props.getListFiles(); +// System.out.println("props files:"+wordsList); + //TODO all properties are taken to a single arraylist. does it ok?. + Properties enProps = new Properties(); + String propsDir = inputDir.getPath().concat(File.separator).concat(searchdir); + + // Init the lists which will contain the words and chars to remove + cleanUpStrings = new ArrayList(); + cleanUpChars = new ArrayList(); + + try { + // Retrieve words to remove + for (File aWordsList : wordsList) { + ftemp = aWordsList; + if (ftemp.exists()) { + enProps.load(input = new FileInputStream(ftemp.getAbsolutePath())); + input.close(); + c = enProps.values(); + cleanUpStrings.addAll(c); + enProps.clear(); + } + } + + // Retrieve char to remove (punctuation for ex.) + for (String punctuationFile : punctuationFiles) { + tempStr = propsDir.concat(File.separator).concat(punctuationFile); + ftemp = new File(tempStr); + if (ftemp.exists()) { + enProps.load(input = new FileInputStream(tempStr)); + input.close(); + c = enProps.values(); + cleanUpChars.addAll(c); + enProps.clear(); + } + } + } + catch (IOException e) { + e.printStackTrace(); + return 1; + } + return 0; + } + +} diff --git a/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java b/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java index d07eece72..373e89d01 100755 --- a/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java +++ b/xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java @@ -1,3 +1,4 @@ +/* package com.nexwave.nquindexer; import java.io.File; @@ -18,6 +19,7 @@ import org.apache.tools.ant.Task; import com.nexwave.nsidita.DirList; import com.nexwave.nsidita.DocFileInfo; +*/ /** * Indexer ant task. * @@ -25,7 +27,8 @@ import com.nexwave.nsidita.DocFileInfo; * * @author N. Quaine * @author Kasun Gajasinghe - */ + *//* + public class IndexerTask extends Task { // messages @@ -36,7 +39,7 @@ public class IndexerTask extends Task { private String txt_no_relative_files_found= "No relative html files calculated."; private String txt_no_words_gathered= "No words have been indexed in"; private String txt_no_html_files="No HTML Files found in"; - private String txt_no_args="No argument given: you must provide an htmldir to the IndexerTask"; + private String txt_no_args="No argument given: you must provide an htmlDir to the IndexerTask"; //working directories private String searchdir = "search"; @@ -45,7 +48,7 @@ public class IndexerTask extends Task { private String projectDir = null; // ANT parameters - private String htmldir=null; + private String htmlDir=null; public static String indexerLanguage="en"; //supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK, @@ -64,18 +67,22 @@ public class IndexerTask extends Task { public IndexerTask() { super(); } - /** The setter for the "htmldir" attribute (parameter of the task) + */ +/** The setter for the "htmlDir" attribute (parameter of the task) * @param htmldir * @throws InterruptedException - */ - public void setHtmldir(String htmldir) { - this.htmldir = htmldir; + *//* + + public void setHtmlDir(String htmlDir) { + this.htmlDir = htmlDir; } - /** + */ +/** * Set the extension in which html files are generated * @param htmlExtension The extension in wich html files are generated - */ + *//* + public void setHtmlextension(String htmlExtension) { this.htmlExtension = htmlExtension; //Trim the starting "." @@ -84,11 +91,13 @@ public class IndexerTask extends Task { } } - /** + */ +/** * setter for "indexerLanguage" attribute from ANT * @param indexerLanguage language for the search indexer. Used to differerentiate which stemmer to be used. * @throws InterruptedException for ant - */ + *//* + public void setIndexerLanguage(String indexerLanguage){ if(indexerLanguage !=null && !"".equals(indexerLanguage)) { int temp = indexerLanguage.indexOf('_'); @@ -114,9 +123,11 @@ public class IndexerTask extends Task { } } - /** + */ +/** * Implementation of the execute function (Task interface) - */ + *//* + public void execute() throws BuildException { try{ //Use Xerces as the parser. Does not support Saxon6.5.5 parser @@ -146,12 +157,12 @@ public class IndexerTask extends Task { //timing Date dateStart = new Date(); - if (htmldir == null) { + if (htmlDir == null) { System.out.println(txt_no_args + "."); return; } // Init input directory - inputDir = new File(htmldir); + inputDir = new File(htmlDir); // Begin of init // check if inputdir initialized @@ -252,7 +263,9 @@ public class IndexerTask extends Task { filesDescription.add(docFileInfoTemp); } - /*remove empty strings from the map*/ + */ +/*remove empty strings from the map*//* + if (tempDico.containsKey("")) { tempDico.remove(""); } @@ -281,9 +294,11 @@ public class IndexerTask extends Task { } } - /** + */ +/** * Prints the usage information for this class to System.out. - */ + *//* + private static void DisplayHelp() { String lSep = System.getProperty("line.separator"); StringBuffer msg = new StringBuffer(); @@ -354,3 +369,4 @@ public class IndexerTask extends Task { } } +*/ diff --git a/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java b/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java index 607a2457e..1aff3e933 100755 --- a/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java +++ b/xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java @@ -1,5 +1,7 @@ +/* package com.nexwave.nquindexer; +*/ /** * For running tests with the indexertask. * @@ -7,17 +9,20 @@ package com.nexwave.nquindexer; * * @author N. Quaine * @author Kasun Gajasinghe - */ + *//* + public class TesterIndexer { public static IndexerTask IT = null; - /** + */ +/** * @param args * @throws InterruptedException - */ + *//* + public static void main(String[] args) throws InterruptedException { if (args.length != 0) { IT = new IndexerTask(); - IT.setHtmldir(args[0]); + IT.setHtmlDir(args[0]); IT.setIndexerLanguage(args[1]); IT.execute(); } else { @@ -27,7 +32,7 @@ package com.nexwave.nquindexer; String dir = "../doc/content"; String lang = "en"; IT = new IndexerTask(); - IT.setHtmldir(dir); + IT.setHtmlDir(dir); IT.setIndexerLanguage(lang); IT.execute(); } @@ -36,3 +41,4 @@ package com.nexwave.nquindexer; } +*/ diff --git a/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java b/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java index fce30bb5f..9f83a577e 100755 --- a/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java +++ b/xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java @@ -13,163 +13,247 @@ import java.util.Map; import java.util.TreeSet; import com.nexwave.nsidita.DocFileInfo; + /** * Outputs the js files with: * - the list of html files and their description * - the words retrieved from the html files and their location - * - * @version 2.0 2010-08-13 - * + * * @author N. Quaine * @author Kasun Gajasinghe + * @version 2.0 2010-08-13 */ public class WriteJSFiles { - - private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding."; - private static String txt_indices_location = "The created index files are located in "; - - /** Create a javascript array listing the html files with their paths relative to the project root - * @param fileO path and name of the file in which to output the list of html files - * @param list of the html files, relative to the doc root directory - */ - public static void WriteHTMLList (String fileO,ArrayList list) { - int i = 0; - Iterator it; - - if (list == null) { - return; - } - if (fileO == null) { - return; - } - it = list.iterator ( ) ; - - try { - // open a outputstream, here a file - OutputStream fOut= new FileOutputStream(fileO); - OutputStream bout= new BufferedOutputStream(fOut); - OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8"); - - /*fl : file list*/ - out.write("//List of files which are indexed.\n"); - out.write("fl = new Array();\n"); - String temp; - while ( it.hasNext ( ) ) { - temp = (String)it.next(); - //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/')); - out.write("fl[\""+i+"\"]"+"= \""+temp.replace(File.separatorChar, '/')+"\";\n"); - i++; - } - - out.flush(); // Don't forget to flush! - out.close(); + + private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding."; + private static String txt_indices_location = "The created index files are located in "; + + /** + * Create a javascript array listing the html files with their paths relative to the project root + * + * @param fileO path and name of the file in which to output the list of html files + * @param list of the html files, relative to the doc root directory + */ + public static void WriteHTMLList(String fileO, ArrayList list) { + int i = 0; + Iterator it; + + if (list == null) { + return; + } + if (fileO == null) { + return; + } + it = list.iterator(); + + try { + // open a outputstream, here a file + OutputStream fOut = new FileOutputStream(fileO); + OutputStream bout = new BufferedOutputStream(fOut); + OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8"); + + /*fl : file list*/ + out.write("//List of files which are indexed.\n"); + out.write("fl = new Array();\n"); + String temp; + while (it.hasNext()) { + temp = (String) it.next(); + //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/')); + out.write("fl[\"" + i + "\"]" + "= \"" + temp.replace(File.separatorChar, '/') + "\";\n"); + i++; + } + + out.flush(); // Don't forget to flush! + out.close(); // System.out.println("the array of html is in " + fileO); - } - catch (UnsupportedEncodingException e) { - System.out.println(txt_VM_encoding_not_supported); - } - catch (IOException e) { - System.out.println(e.getMessage()); - } - - } - - /** Create a javascript array listing the html files with - * their paths relative to project root, their titles and shortdescs - * @param fileO path and name of the file in which to output the list of html files - * @param list of the html files, relative to the doc root directory - */ - public static void WriteHTMLInfoList (String fileO,ArrayList list) { - int i = 0; - Iterator it = null; - - if (list == null) { - return; - } - if (fileO == null) { - return; - } - it = list.iterator ( ) ; - try { - // open a outputstream, here a file - OutputStream fOut= new FileOutputStream(fileO); - // open a buffer output stream - OutputStream bout= new BufferedOutputStream(fOut); - OutputStreamWriter out - = new OutputStreamWriter(bout, "UTF-8"); - - /*fil : file list*/ - out.write("fil = new Array();\n"); - - DocFileInfo tempInfo; - String tempPath; - String tempTitle; - String tempShortdesc; - while ( it.hasNext ( ) ) { - // Retrieve file information: path, title and shortdesc. - tempInfo = (DocFileInfo)it.next(); - tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/'); - tempTitle = tempInfo.getTitle(); - tempShortdesc = tempInfo.getShortdesc(); - //Remove unwanted white char - if (tempTitle != null ) { - tempTitle = tempTitle.replaceAll("\\s+", " "); - tempTitle = tempTitle.replaceAll("['�\"]", " "); - } - if (tempShortdesc != null ) { - tempShortdesc = tempShortdesc.replaceAll("\\s+", " "); - tempShortdesc = tempShortdesc.replaceAll("['�\"]", " "); - } - //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc); - out.write("fil[\""+i+"\"]"+"= \""+tempPath+"@@@"+tempTitle+"@@@"+tempShortdesc+"\";\n"); - i++; - } - - out.flush(); // Don't forget to flush! - out.close(); - - } - catch (UnsupportedEncodingException e) { - System.out.println(txt_VM_encoding_not_supported); - } - catch (IOException e) { - System.out.println(e.getMessage()); - } - - } - - /** Create javascript index files alphabetically. - * @param fileOutStr contains the path and the suffix of the index files to create. - * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc... - * @param indexMap its keys are the indexed words and - * its values are the list of the files which contain the word. - */ - public static void WriteIndex (String fileOutStr, Map indexMap) { - OutputStreamWriter out; - OutputStream bout; - OutputStream fOut; - String tstr; - - // check arguments - if (indexMap == null || fileOutStr ==null) { - return; - } - - // Collect the key of the index map - TreeSet sortedKeys = new TreeSet(); - sortedKeys.addAll(indexMap.keySet()); - Iterator keyIt = sortedKeys.iterator(); - tstr = (String)keyIt.next(); - - File fileOut= new File(fileOutStr); + } + catch (UnsupportedEncodingException e) { + System.out.println(txt_VM_encoding_not_supported); + } + catch (IOException e) { + System.out.println(e.getMessage()); + } + + } + + /** + * Create a javascript array listing the html files with + * their paths relative to project root, their titles and shortdescs + * + * @param fileO path and name of the file in which to output the list of html files + * @param list of the html files, relative to the doc root directory + */ + public static void WriteHTMLInfoList(String fileO, ArrayList list) { + int i = 0; + Iterator it = null; + + if (list == null) { + return; + } + if (fileO == null) { + return; + } + it = list.iterator(); + try { + // open a outputstream, here a file + OutputStream fOut = new FileOutputStream(fileO); + // open a buffer output stream + OutputStream bout = new BufferedOutputStream(fOut); + OutputStreamWriter out + = new OutputStreamWriter(bout, "UTF-8"); + + /*fil : file list*/ + out.write("fil = new Array();\n"); + + DocFileInfo tempInfo; + String tempPath; + String tempTitle; + String tempShortdesc; + while (it.hasNext()) { + // Retrieve file information: path, title and shortdesc. + tempInfo = (DocFileInfo) it.next(); + tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/'); + tempTitle = tempInfo.getTitle(); + tempShortdesc = tempInfo.getShortdesc(); + //Remove unwanted white char + if (tempTitle != null) { + tempTitle = tempTitle.replaceAll("\\s+", " "); + tempTitle = tempTitle.replaceAll("['�\"]", " "); + } + if (tempShortdesc != null) { + tempShortdesc = tempShortdesc.replaceAll("\\s+", " "); + tempShortdesc = tempShortdesc.replaceAll("['�\"]", " "); + } + //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc); + out.write("fil[\"" + i + "\"]" + "= \"" + tempPath + "@@@" + tempTitle + "@@@" + tempShortdesc + "\";\n"); + i++; + } + + out.flush(); // Don't forget to flush! + out.close(); + + } + catch (UnsupportedEncodingException e) { + System.out.println(txt_VM_encoding_not_supported); + } + catch (IOException e) { + System.out.println(e.getMessage()); + } + + } + + /** + * Create javascript index files alphabetically. + * + * @param fileOutStr contains the path and the suffix of the index files to create. + * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc... + * @param indexMap its keys are the indexed words and + * its values are the list of the files which contain the word. + * @param indexerLanguage The language of the content that gets indexed + */ + public static void WriteIndex(String fileOutStr, Map indexMap, String indexerLanguage) { + OutputStreamWriter out; + OutputStream bout; + OutputStream fOut; + String tstr; + + // check arguments + if (indexMap == null || fileOutStr == null) { + return; + } + + // Collect the key of the index map + TreeSet sortedKeys = new TreeSet(); + sortedKeys.addAll(indexMap.keySet()); + Iterator keyIt = sortedKeys.iterator(); + tstr = (String) keyIt.next(); + + File fileOut = new File(fileOutStr); + + /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js + * Index will be distributed evenly in these three files. + * tstr is the current key + * keyIt is the iterator of the key set + * */ + int indexSize = sortedKeys.size(); + for (int i = 1; i <= 3; i++) { + try { + // open a outputstream, here a file + fOut = new FileOutputStream(fileOut.getParent() + File.separator + "index-" + i + fileOut.getName()); + bout = new BufferedOutputStream(fOut); + out = new OutputStreamWriter(bout, "UTF-8"); + + try { + /* Populate a javascript hashmap: + The key is a word to look for in the index, + The value is the numbers of the files in which the word exists. + Example: w["key"]="file1,file2,file3";*/ + int count = 0; + if (i == 1) + out.write("var indexerLanguage=\"" + indexerLanguage + "\";\n"); + out.write("//Auto generated index for searching.\n"); + while (keyIt.hasNext()) { //&& (tempLetter == tstr.charAt(0)) + out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n"); + tstr = (String) keyIt.next(); + count++; + if (indexSize / count < 3) { + break; + } + } + out.write("\n"); + out.flush(); // Don't forget to flush! + out.close(); + } + catch (UnsupportedEncodingException e) { + System.out.println(txt_VM_encoding_not_supported); + } + } + catch (IOException e) { + System.out.println(e.getMessage()); + } + } + System.out.println(txt_indices_location + fileOutStr); + } + + + /** + * Create javascript index files alphabetically. + * + * @deprecated replaced by WriteIndex(String fileOutStr, Map indexMap, String indexerLanguage) { + * + * @param fileOutStr contains the path and the suffix of the index files to create. + * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc... + * @param indexMap its keys are the indexed words and + * its values are the list of the files which contain the word. + */ + + + public static void WriteIndex(String fileOutStr, Map indexMap) { + OutputStreamWriter out; + OutputStream bout; + OutputStream fOut; + String tstr; + + // check arguments + if (indexMap == null || fileOutStr == null) { + return; + } + + // Collect the key of the index map + TreeSet sortedKeys = new TreeSet(); + sortedKeys.addAll(indexMap.keySet()); + Iterator keyIt = sortedKeys.iterator(); + tstr = (String) keyIt.next(); + + File fileOut = new File(fileOutStr); /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js * Index will be distributed evenly in these three files. * tstr is the current key * keyIt is the iterator of the key set * */ - int indexSize = sortedKeys.size(); + int indexSize = sortedKeys.size(); for (int i = 1; i <= 3; i++) { try { // open a outputstream, here a file @@ -183,17 +267,17 @@ public class WriteJSFiles { The value is the numbers of the files in which the word exists. Example: w["key"]="file1,file2,file3";*/ int count = 0; - if(i==1) - out.write("var indexerLanguage=\""+IndexerTask.indexerLanguage+"\";\n"); +// if (i == 1) +// out.write("var indexerLanguage=\"" + IndexerTask.indexerLanguage + "\";\n"); out.write("//Auto generated index for searching.\n"); while (keyIt.hasNext()) { //&& (tempLetter == tstr.charAt(0)) out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n"); tstr = (String) keyIt.next(); count++; - if (indexSize / count < 3){ + if (indexSize / count < 3) { break; } - } + } out.write("\n"); out.flush(); // Don't forget to flush! out.close(); @@ -205,7 +289,7 @@ public class WriteJSFiles { catch (IOException e) { System.out.println(e.getMessage()); } - } - System.out.println(txt_indices_location + fileOutStr); - } + } + System.out.println(txt_indices_location + fileOutStr); + } }