*/\r
public abstract class IndexerConstants {\r
// European punctuation\r
- //TODO: Make sure European punctuation removal process doesn't affect the text with file locations etc.\r
-\r
- public static final String EUPUNCTUATION1 = "[$|%,;.':()\\/*\"{}=!&+<>#\\?]|\\[|\\]|[-][-]+";\r
- public static final String EUPUNCTUATION2 = "[$,;.':()\\/*\"{}=!&+<>\\\\]";\r
+ public static final String EUPUNCTUATION1 = "[$|%,;'()\\/*\"{}=!&+<>#\\?]|\\[|\\]|[-][-]+";
+ public static final String EUPUNCTUATION2 = "[$,;'()\\/*\"{}=!&+<>\\\\]";
// Japanese punctuation\r
public static final String JPPUNCTUATION1 = "\\u3000|\\u3001|\\u3002|\\u3003|\\u3008|\\u3009|\\u300C|\\u300D";\r
public static final String JPPUNCTUATION2 = "\\u3013|\\u3014|\\u3015|\\u301C|\\u301D|\\u301E|\\u301F";\r
//Html extension
private String htmlExtension = "html";
+ // OXYGEN PATCH START
+ //Table of contents file name
+ private String tocfile;
+ private boolean stem;
+ // OXYGEN PATCH END
+
// Constructors
public IndexerMain(String htmlDir, String indexerLanguage) {
super();
// Get the list of all html files with relative paths
htmlFilesPathRel = nsiDoc.getListFilesRelTo(projectDir);
+ // OXYGEN PATCH START.
+ // Remove the table of content file
+ Iterator<String> iterator = htmlFilesPathRel.iterator();
+ while (iterator.hasNext()) {
+ if (iterator.next().endsWith(tocfile + "." + htmlExtension)) {
+ iterator.remove();
+ }
+ }
+ // OXYGEN PATCH END
if (htmlFiles == null) {
System.out.println(txt_no_files_found);
return;
}
// Create the list of the existing html files (index starts at 0)
- WriteJSFiles.WriteHTMLList(outputDir.concat(File.separator).concat(htmlList), htmlFilesPathRel);
+ WriteJSFiles.WriteHTMLList(outputDir.concat(File.separator).concat(htmlList), htmlFilesPathRel, stem);
// Parse each html file to retrieve the words:
// ------------------------------------------
// parse each html files
while (it.hasNext()) {
File ftemp = (File) it.next();
+ // OXYGEN PATCH START. Remove table of content file
+ if (!ftemp.getAbsolutePath().endsWith(tocfile + "." + htmlExtension)) {
+ // OXYGEN PATCH END
//tempMap.put(key, value);
//The HTML file information are added in the list of FileInfoObject
- DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, this.indexerLanguage));
+ DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp,indexerLanguage, stem));
ftemp = docFileInfoTemp.getFullpath();
String stemp = ftemp.toString();
docFileInfoTemp.setFullpath(ftemp);
filesDescription.add(docFileInfoTemp);
+ // OXYGEN PATCH START
+ // Remove the table of content file
+ } else {
+ it.remove();
}
+ // OXYGEN PATCH END
+ }
/*remove empty strings from the map*/
if (tempDico.containsKey("")) {
tempDico.remove("");
return 0;
}
+ // OXYGEN PATCH START
+ // Set table of content file
+ public void setTocfile(String tocfile) {
+ this.tocfile = tocfile;
+ }
+ // If true then generate js files with stemming words
+ public void setStem(boolean stem) {
+ this.stem = stem;
+ }
+ // OXYGEN PATCH END
}
\r
\r
import java.io.*;\r
+import java.util.Stack;\r
+import java.util.regex.Matcher;\r
+import java.util.regex.Pattern;\r
\r
import com.nexwave.nsidita.BlankRemover;\r
import com.nexwave.nsidita.DocFileInfo;\r
private boolean shortdescBool = false;\r
private int shortTagCpt = 0;\r
\r
+ // OXYGEN PATCH. Keep the stack of elements\r
+ Stack<String> stack = new Stack<String>();\r
//methods\r
/**\r
* Constructor\r
sp.getXMLReader().setFeature( "http://apache.org/xml/features/nonvalidating/load-external-dtd", false);\r
\r
//parse the file and also register this class for call backs\r
- System.out.println("Parsing: " + file);\r
+ //System.out.println("Parsing: " + file);\r
\r
long start = System.currentTimeMillis();\r
//System.out.println("about to parse " + file.getName() + " >>> " + start);\r
public void startElement(String uri, String localName, String qName, org.xml.sax.Attributes attributes) throws org.xml.sax.SAXException {\r
\r
//dwc: capture current element name\r
+ // START OXYGEN PATCH, add current element in stack\r
+ stack.add(qName);\r
+ // END OXYGEN PATCH\r
currentElName = qName;\r
\r
// dwc: Adding contents of some meta tags to the index\r
if((qName.equalsIgnoreCase("meta")) ) {\r
addHeaderInfo = true;\r
String attrName = attributes.getValue("name");\r
- if(attrName != null && (attrName.equalsIgnoreCase("keywords") || attrName.equalsIgnoreCase("description"))){\r
- strbf.append(" ").append(attributes.getValue("content")).append(" ");\r
- }\r
+ // OXYGEN PATCH START EXM-20576 - add scoring for keywords
+ if(attrName != null && (attrName.equalsIgnoreCase("keywords")
+ || attrName.equalsIgnoreCase("description")
+ || attrName.equalsIgnoreCase("indexterms")
+ )){
+ if (attrName.equalsIgnoreCase("keywords")) {
+ String[] keywords = attributes.getValue("content").split(", ");
+ for (int i = 0; i < keywords.length; i++) {
+ strbf.append(" " + keywords[i] + "@@@elem_meta_keywords@@@ ");
+ }
+ } else if (attrName.equalsIgnoreCase("indexterms")) {
+ String[] indexterms = attributes.getValue("content").split(", ");
+ for (int i = 0; i < indexterms.length; i++) {
+ strbf.append(" " + indexterms[i] + "@@@elem_meta_indexterms@@@ ");
+ }
+ } else {
+ strbf.append(" " + attributes.getValue("content") + " ");
+ }
+ }
+ // OXYGEN PATCH END EXM-20576 - add scoring for indexterms
// dwc: adding this to make the docbook <abstract> element\r
// (which becomes <meta name="description".../> in html)\r
// into the brief description that shows up in search\r
// index certain elements. E.g. Use this to implement a\r
// "titles only" index,\r
\r
- if((addContent || addHeaderInfo) && !doNotIndex && !currentElName.equalsIgnoreCase("script")){\r
+ //OXYGEN PATCH, gather more keywords.\r
+ if(\r
+// (addContent || addHeaderInfo) && \r
+ !doNotIndex && !currentElName.equalsIgnoreCase("script")){\r
String text = new String(ch,start,length);\r
+ // START OXYGEN PATCH, append a marker after each word\r
+ // The marker is used to compute the scoring\r
+ // Create the marker\r
+ String originalText = text.replaceAll("\\s+"," ");\r
+ text = text.trim();\r
+ // Do a minimal clean\r
+ text = minimalClean(text, null, null);\r
+ text = text.replaceAll("\\s+"," ");\r
+ String marker = "@@@elem_" + stack.peek() + "@@@ ";
+ Matcher m = Pattern.compile("(\\w|-|:)+").matcher(text);
+ if (text.trim().length() > 0 && m.find()) {\r
+ String copyText = new String(originalText);
+ text = duplicateWords(copyText, text, "-");
+ copyText = new String(originalText);
+ text = duplicateWords(copyText, text, ":");
+ copyText = new String(originalText);
+ text = duplicateWords(copyText, text, ".");
+ // Replace whitespace with the marker\r
+ text = text.replace(" ", marker);\r
+ text = text + marker;\r
+ }\r
+ // END OXYGEN PATCH\r
strbf.append(text);\r
- if (tempVal != null) { tempVal.append(text);}\r
+// System.out.println("=== marked text: " + text);
+ // START OXYGEN PATCH, append the original text\r
+ if (tempVal != null) { tempVal.append(originalText);}\r
+ // END OXYGEN PATCH\r
}\r
}\r
\r
+ // START OXYGEN PATCH EXM-20414
+ private String duplicateWords(String sourceText, String acumulator, String separator) {
+// System.out.println("sourceText: " + sourceText + " separator: " + separator);
+ int index = sourceText.indexOf(separator);
+ while (index >= 0) {
+ int indexSpaceAfter = sourceText.indexOf(" ", index);
+ String substring = null;
+ if (indexSpaceAfter >= 0) {
+ substring = sourceText.substring(0, indexSpaceAfter);
+ sourceText = sourceText.substring(indexSpaceAfter);
+ } else {
+ substring = sourceText;
+ sourceText = "";
+ }
+
+ int indexSpaceBefore = substring.lastIndexOf(" ");
+ if (indexSpaceBefore >= 0) {
+ substring = substring.substring(indexSpaceBefore + 1);
+ }
+ if (separator.indexOf(".") >= 0) {
+ separator = separator.replaceAll("\\.", "\\\\.");
+// System.out.println("++++++++++ separator: " + separator);
+ }
+ String[] tokens = substring.split(separator);
+
+ for (int i = 0; i < tokens.length; i++) {
+ acumulator = acumulator + " " + tokens[i];
+// System.out.println("added token: " + tokens[i] + " new text: " + acumulator);
+ }
+
+ index = sourceText.indexOf(separator);
+ }
+
+ return acumulator;
+ }
+ // END OXYGEN PATCH EXM-20414
public void endElement(String uri, String localName, String qName) throws org.xml.sax.SAXException {\r
+ // START OXYGEN PATCH, remove element from stack\r
+ stack.pop();\r
+ // END OXYGEN PATCH\r
if(qName.equalsIgnoreCase("title")) {\r
//add it to the list\r
//myEmpls.add(tempEmp);\r
else if (shortdescBool) {\r
shortTagCpt --;\r
if (shortTagCpt == 0) {\r
- fileDesc.setShortdesc(BlankRemover.rmWhiteSpace(tempVal.toString().replace('\n', ' ')));\r
+ String shortdesc = tempVal.toString().replace('\n', ' ');
+ if(shortdesc.trim().length() > 0) {
+ fileDesc.setShortdesc(BlankRemover.rmWhiteSpace(shortdesc));
+ }
tempVal = null;\r
shortdescBool = false;\r
}\r
\r
}\r
\r
+ // START OXYGEN PATCH, moved from subclass\r
+ protected String minimalClean(String str, StringBuffer tempStrBuf, StringBuffer tempCharBuf) {\r
+ String tempPunctuation = null;\r
+ if (tempCharBuf!= null) {\r
+ tempPunctuation = new String(tempCharBuf);\r
+ }\r
+\r
+ str = str.replaceAll("\\s+", " ");\r
+ str = str.replaceAll("->", " ");\r
+ str = str.replaceAll(IndexerConstants.EUPUNCTUATION1, " ");\r
+ str = str.replaceAll(IndexerConstants.EUPUNCTUATION2, " ");\r
+ str = str.replaceAll(IndexerConstants.JPPUNCTUATION1, " ");\r
+ str = str.replaceAll(IndexerConstants.JPPUNCTUATION2, " ");\r
+ str = str.replaceAll(IndexerConstants.JPPUNCTUATION3, " ");\r
+ if (tempPunctuation != null && tempPunctuation.length() > 0)\r
+ {\r
+ str = str.replaceAll(tempPunctuation, " ");\r
+ }\r
+\r
+ if (tempStrBuf != null) {\r
+ //remove useless words\r
+ str = str.replaceAll(tempStrBuf.toString(), " ");\r
+ }\r
+\r
+ // Redo punctuation after removing some words: (TODO: useful?)\r
+ str = str.replaceAll(IndexerConstants.EUPUNCTUATION1, " ");\r
+ str = str.replaceAll(IndexerConstants.EUPUNCTUATION2, " ");\r
+ str = str.replaceAll(IndexerConstants.JPPUNCTUATION1, " ");\r
+ str = str.replaceAll(IndexerConstants.JPPUNCTUATION2, " ");\r
+ str = str.replaceAll(IndexerConstants.JPPUNCTUATION3, " ");\r
+ if (tempPunctuation != null && tempPunctuation.length() > 0)\r
+ {\r
+ str = str.replaceAll(tempPunctuation, " ");\r
+ } return str;\r
+ }\r
+ // END OXYGEN PATCH\r
}\r
private ArrayList <String> cleanUpList = null;\r
private ArrayList <String> cleanUpPunctuation = null;\r
\r
+ // START OXYGEN PATCH, scoring for HTML elements\r
+ private int SCORING_FOR_H1 = 50;\r
+ private int SCORING_FOR_H2 = 45;\r
+ private int SCORING_FOR_H3 = 40;\r
+ private int SCORING_FOR_H4 = 35;\r
+ private int SCORING_FOR_H5 = 30;\r
+ private int SCORING_FOR_H6 = 25;\r
+ private int SCORING_FOR_BOLD = 5;\r
+ private int SCORING_FOR_ITALIC = 3;\r
+ private int SCORING_FOR_NORMAL_TEXT = 1;\r
+ private int SCORING_FOR_KEYWORD = 100;
+ private int SCORING_FOR_INDEXTERM = 75;
+ \r
+ /**\r
+ * The list with the word and scoring object\r
+ */\r
+ private List<WordAndScoring> wsList = null;\r
+\r
+ /**\r
+ * Used for Oxygen TestCases\r
+ * @return the wsList\r
+ */\r
+ public List<WordAndScoring> getWsList() {\r
+ return wsList;\r
+ }\r
+ // END OXYGEN PATCH\r
//methods\r
/**\r
* Constructor\r
* some data characterizing the file.\r
* @param file contains the fullpath of the document to parse\r
* @param indexerLanguage this will be used to tell the program which stemmer to be used.\r
+ * @param stem if true then generate js files with words stemmed\r
* @return a DitaFileInfo object filled with data describing the file\r
*/\r
- public DocFileInfo runExtractData(File file, String indexerLanguage) {\r
+ public DocFileInfo runExtractData(File file, String indexerLanguage, boolean stem) {\r
//initialization\r
fileDesc = new DocFileInfo(file);\r
strbf = new StringBuffer("");\r
String str = cleanBuffer(strbf);\r
str = str.replaceAll("\\s+"," "); //there's still redundant spaces in the middle\r
// System.out.println(file.toString()+" "+ str +"\n");\r
- String[] items = str.split("\\s"); //contains all the words in the array\r
+ // START OXYGEN PATCH\r
+// String[] items = str.split("\\s"); //contains all the words in the array\r
+ // END OXYGEN PATCH\r
\r
//get items one-by-one, tunnel through the stemmer, and get the stem.\r
//Then, add them to tempSet\r
//Do Stemming for words in items\r
//TODO currently, stemming support is for english and german only. Add support for other languages as well.\r
\r
- String[] tokenizedItems;\r
+ // START OXYGEN PATCH
+ wsList = new ArrayList<WordAndScoring>();
+ // START OXYGEN PATCH, create the words and scoring list\r
+// String[] tokenizedItems;\r
+ // END OXYGEN PATCH\r
if(indexerLanguage.equalsIgnoreCase("ja") || indexerLanguage.equalsIgnoreCase("zh")\r
|| indexerLanguage.equalsIgnoreCase("ko")){\r
LinkedList<String> tokens = new LinkedList<String>();\r
try{\r
+ //EXM-21501 Oxygen patch, replace the extra "@@@"s.
+ str = str.replaceAll("@@@([^\\s]*)@@@", "");
CJKAnalyzer analyzer = new CJKAnalyzer(org.apache.lucene.util.Version.LUCENE_30);\r
Reader reader = new StringReader(str);\r
TokenStream stream = analyzer.tokenStream("", reader);\r
while (stream.incrementToken()) {\r
String term = termAtt.term();\r
tokens.add(term);\r
-// System.out.println(term + " " + offAtt.startOffset() + " " + offAtt.endOffset());\r
+ WordAndScoring ws = new WordAndScoring(term, term, 1);
+ boolean found = false;
+ for (int i = 0; i < wsList.size(); i++) {
+ // If the stem of the current word is already in list,
+ // do not add the word in the list, just recompute scoring
+ if (wsList.get(i).getStem().equals(ws.getStem())) {
+ found = true;
+ int scoring = wsList.get(i).getScoring();
+ wsList.get(i).setScoring(scoring + ws.getScoring());
+ break;
}\r
\r
- tokenizedItems = tokens.toArray(new String[tokens.size()]);\r
+ }
+ if (!found) {
+ wsList.add(ws);
+ }
+ }
+ // START OXYGEN PATCH\r
+ //tokenizedItems = tokens.toArray(new String[tokens.size()]);\r
+ // END OXYGEN PATCH\r
\r
}catch (IOException ex){\r
- tokenizedItems = items;\r
+ // START OXYGEN PATCH\r
+// tokenizedItems = items;\r
+ // END OXYGEN PATCH\r
System.out.println("Error tokenizing content using CJK Analyzer. IOException");\r
ex.printStackTrace();\r
}\r
} else {\r
stemmer = null;//Languages which stemming is not yet supproted.So, No stemmers will be used.\r
}\r
- if(stemmer != null) //If a stemmer available\r
- tokenizedItems = stemmer.doStem(items);\r
- else //if no stemmer available for the particular language\r
- tokenizedItems = items;\r
+ // START OXYGEN PATCH\r
+ wsList = new ArrayList<WordAndScoring>();\r
+ StringTokenizer st = new StringTokenizer(str, " ");\r
+ // Tokenize the string and populate the words and scoring list\r
+ while (st.hasMoreTokens()) {\r
+ String token = st.nextToken();\r
+ WordAndScoring ws = getWordAndScoring(token, stemmer, stem);\r
+ if (ws != null) {\r
+ boolean found = false;\r
+ for (int i = 0; i < wsList.size(); i++) { \r
+ // If the stem of the current word is already in list, \r
+ // do not add the word in the list, just recompute scoring\r
+ if (wsList.get(i).getStem().equals(ws.getStem())) {\r
+ found = true;\r
+ int scoring = wsList.get(i).getScoring();\r
+ wsList.get(i).setScoring(scoring + ws.getScoring());\r
+ break;\r
+ }\r
+ }\r
+ if (!found) {\r
+ wsList.add(ws);\r
+ }\r
+ } \r
+ } \r
+// if(stemmer != null) //If a stemmer available\r
+// tokenizedItems = stemmer.doStem(items.toArray(new String[0]));\r
+// else //if no stemmer available for the particular language\r
+// tokenizedItems = items.toArray(new String[0]);\r
+ // END OXYGEN PATCH\r
\r
}\r
\r
System.out.print(stemmedItem+"| ");\r
}*/\r
\r
- //items: remove the duplicated strings first\r
- HashSet <String> tempSet = new HashSet<String>();\r
- tempSet.addAll(Arrays.asList(tokenizedItems));\r
- Iterator it = tempSet.iterator();\r
- String s;\r
+ // START OXYGEN PATCH\r
+// //items: remove the duplicated strings first\r
+// HashSet <String> tempSet = new HashSet<String>();\r
+// tempSet.addAll(Arrays.asList(tokenizedItems));\r
+// Iterator it = tempSet.iterator();\r
+ // Iterate over the words and scoring list\r
+ Iterator<WordAndScoring> it = wsList.iterator();\r
+ WordAndScoring s;\r
while (it.hasNext()) {\r
- s = (String)it.next();\r
- if (tempDico.containsKey(s)) {\r
- String temp = tempDico.get(s);\r
- temp = temp.concat(",").concat(Integer.toString(i));\r
+ s = it.next();\r
+ // Do not add results from 'toc.html'\r
+ if (s != null && tempDico.containsKey(s.getStem())) {\r
+ String temp = tempDico.get(s.getStem());\r
+ temp = temp.concat(",").concat(Integer.toString(i))\r
+ // Concat also the scoring for the stem\r
+ .concat("*").concat(Integer.toString(s.getScoring()))\r
+ ;\r
//System.out.println("temp="+s+"="+temp);\r
- tempDico.put(s, temp);\r
+ tempDico.put(s.getStem(), temp);\r
}else {\r
- tempDico.put(s, Integer.toString(i));\r
+ String temp = Integer.toString(i).concat("*").concat(Integer.toString(s.getScoring()));\r
+ tempDico.put(s.getStem(), temp);\r
}\r
+ // END OXYGEN PATCH\r
}\r
\r
i++;\r
return fileDesc;\r
}\r
\r
+ // START OXYGEN PATCH\r
+ /**\r
+ * Get the word, stem and scoring for the given token.\r
+ * @param token The token to parse.\r
+ * @param stemmer The stemmer.\r
+ * @param doStemming If true then generate js files with words stemmed.\r
+ * @return the word, stem and scoring for the given token.\r
+ */\r
+ private WordAndScoring getWordAndScoring(String token, SnowballStemmer stemmer, boolean doStemming) {\r
+ WordAndScoring wordScoring = null;\r
+ if (token.indexOf("@@@") != -1 && token.indexOf("@@@") != token.lastIndexOf("@@@")) {\r
+ // Extract the word from token\r
+ String word = token.substring(0, token.indexOf("@@@"));\r
+ if (word.length() > 0) {\r
+ // Extract the element name from token\r
+ String elementName = token.substring(token.indexOf("@@@elem_") + "@@@elem_".length(), token.lastIndexOf("@@@"));\r
+ // Compute scoring\r
+ int scoring = SCORING_FOR_NORMAL_TEXT;\r
+ if ("h1".equalsIgnoreCase(elementName)) {\r
+ scoring = SCORING_FOR_H1;\r
+ } else if ("h2".equalsIgnoreCase(elementName)) {\r
+ scoring = SCORING_FOR_H2;\r
+ } else if ("h3".equalsIgnoreCase(elementName)) {\r
+ scoring = SCORING_FOR_H3;\r
+ } else if ("h4".equalsIgnoreCase(elementName)) {\r
+ scoring = SCORING_FOR_H4;\r
+ } else if ("h5".equalsIgnoreCase(elementName)) {\r
+ scoring = SCORING_FOR_H5;\r
+ } else if ("h6".equalsIgnoreCase(elementName)) {\r
+ scoring = SCORING_FOR_H6;\r
+ } else if ("em".equalsIgnoreCase(elementName)) {\r
+ scoring = SCORING_FOR_ITALIC;\r
+ } else if ("strong".equalsIgnoreCase(elementName)) {\r
+ scoring = SCORING_FOR_BOLD;\r
+ } else if ("meta_keywords".equalsIgnoreCase(elementName)) {
+ scoring = SCORING_FOR_KEYWORD;
+ } else if ("meta_indexterms".equalsIgnoreCase(elementName)) {
+ scoring = SCORING_FOR_INDEXTERM;
+ }\r
+ // Get the stemmed word\r
+ String stemWord = word;\r
+ if (stemmer != null && doStemming) {\r
+ stemWord = stemmer.doStem(word);\r
+ }\r
+ wordScoring = new WordAndScoring(word, stemWord, scoring);\r
+ }\r
+ } else {\r
+ // The token contains only the word\r
+ String stemWord = token;\r
+ // Stem the word\r
+ if (stemmer != null && doStemming) {\r
+ stemWord = stemmer.doStem(token);\r
+ }\r
+ wordScoring = new WordAndScoring(token, stemWord, SCORING_FOR_NORMAL_TEXT);\r
+ }\r
+ return wordScoring;\r
+ }\r
+ // END OXYGEN PATCH\r
+\r
/**\r
* Cleans the string buffer containing all the text retrieved from\r
* the html file: remove punctuation, clean white spaces, remove the words\r
tempCharBuf.append("\\u3002");\r
Iterator it = cleanUpPunctuation.iterator();\r
while (it.hasNext()){\r
- tempCharBuf.append("|").append(it.next());\r
+ tempCharBuf.append("|"+it.next());
}\r
}\r
\r
return str;\r
}\r
\r
- private String minimalClean(String str, StringBuffer tempStrBuf, StringBuffer tempCharBuf) {\r
- String tempPunctuation = new String(tempCharBuf);\r
-\r
- str = str.replaceAll("\\s+", " ");\r
- str = str.replaceAll("->", " ");\r
- str = str.replaceAll(IndexerConstants.EUPUNCTUATION1, " ");\r
- str = str.replaceAll(IndexerConstants.EUPUNCTUATION2, " ");\r
- str = str.replaceAll(IndexerConstants.JPPUNCTUATION1, " ");\r
- str = str.replaceAll(IndexerConstants.JPPUNCTUATION2, " ");\r
- str = str.replaceAll(IndexerConstants.JPPUNCTUATION3, " ");\r
- if (tempPunctuation.length() > 0)\r
- {\r
- str = str.replaceAll(tempPunctuation, " ");\r
- }\r
-\r
- //remove useless words\r
- str = str.replaceAll(tempStrBuf.toString(), " ");\r
-\r
- // Redo punctuation after removing some words: (TODO: useful?)\r
- str = str.replaceAll(IndexerConstants.EUPUNCTUATION1, " ");\r
- str = str.replaceAll(IndexerConstants.EUPUNCTUATION2, " ");\r
- str = str.replaceAll(IndexerConstants.JPPUNCTUATION1, " ");\r
- str = str.replaceAll(IndexerConstants.JPPUNCTUATION2, " ");\r
- str = str.replaceAll(IndexerConstants.JPPUNCTUATION3, " ");\r
- if (tempPunctuation.length() > 0)\r
- {\r
- str = str.replaceAll(tempPunctuation, " ");\r
- } return str;\r
- }\r
+ // OXYGEN PATCH, moved method in superclass\r
+// private String minimalClean(String str, StringBuffer tempStrBuf, StringBuffer tempCharBuf) {\r
+// String tempPunctuation = new String(tempCharBuf);\r
+//\r
+// str = str.replaceAll("\\s+", " ");\r
+// str = str.replaceAll("->", " ");\r
+// str = str.replaceAll(IndexerConstants.EUPUNCTUATION1, " ");\r
+// str = str.replaceAll(IndexerConstants.EUPUNCTUATION2, " ");\r
+// str = str.replaceAll(IndexerConstants.JPPUNCTUATION1, " ");\r
+// str = str.replaceAll(IndexerConstants.JPPUNCTUATION2, " ");\r
+// str = str.replaceAll(IndexerConstants.JPPUNCTUATION3, " ");\r
+// if (tempPunctuation.length() > 0)\r
+// {\r
+// str = str.replaceAll(tempPunctuation, " ");\r
+// }\r
+//\r
+// //remove useless words\r
+// str = str.replaceAll(tempStrBuf.toString(), " ");\r
+//\r
+// // Redo punctuation after removing some words: (TODO: useful?)\r
+// str = str.replaceAll(IndexerConstants.EUPUNCTUATION1, " ");\r
+// str = str.replaceAll(IndexerConstants.EUPUNCTUATION2, " ");\r
+// str = str.replaceAll(IndexerConstants.JPPUNCTUATION1, " ");\r
+// str = str.replaceAll(IndexerConstants.JPPUNCTUATION2, " ");\r
+// str = str.replaceAll(IndexerConstants.JPPUNCTUATION3, " ");\r
+// if (tempPunctuation.length() > 0)\r
+// {\r
+// str = str.replaceAll(tempPunctuation, " ");\r
+// } return str;\r
+// }\r
+ // END OXYGEN PATCH\r
\r
}\r
--- /dev/null
+package com.nexwave.nquindexer;\r
+\r
+// OXYGEN PATCH, Create object \r
+public class WordAndScoring {\r
+ \r
+ /**\r
+ * The original word\r
+ */\r
+ private String word;\r
+ /**\r
+ * Scoring for given word\r
+ */\r
+ private int scoring;\r
+ /**\r
+ * Stemmed word\r
+ */\r
+ private String stem;\r
+ \r
+ /**\r
+ * Constructor\r
+ * @param word Original word\r
+ * @param stem Stemmed word\r
+ * @param scoring Scoring of word \r
+ */\r
+ public WordAndScoring(String word, String stem, int scoring) {\r
+ this.word = word;\r
+ this.stem = stem;\r
+ this.scoring = scoring;\r
+ }\r
+ /**\r
+ * @return the word\r
+ */\r
+ public String getWord() {\r
+ return word;\r
+ }\r
+ \r
+ /**\r
+ * @return the scoring\r
+ */\r
+ public int getScoring() {\r
+ return scoring;\r
+ }\r
+ /**\r
+ * @param scoring the scoring to set\r
+ */\r
+ public void setScoring(int scoring) {\r
+ this.scoring = scoring;\r
+ }\r
+\r
+ @Override\r
+ public String toString() {\r
+ StringBuilder sb = new StringBuilder();\r
+ sb.append("Word: ").append(word).append(" Score: ").append(scoring).append(" Stem: ").append(stem);\r
+ return sb.toString();\r
+ }\r
+ \r
+ /**\r
+ * \r
+ * @return stemmed word\r
+ */\r
+ public String getStem() {\r
+ return stem;\r
+ }\r
+}\r
-package com.nexwave.nquindexer;\r
-\r
-import java.io.BufferedOutputStream;\r
-import java.io.File;\r
-import java.io.FileOutputStream;\r
-import java.io.IOException;\r
-import java.io.OutputStream;\r
-import java.io.OutputStreamWriter;\r
-import java.io.UnsupportedEncodingException;\r
-import java.util.ArrayList;\r
-import java.util.Iterator;\r
-import java.util.Map;\r
-import java.util.TreeSet;\r
-\r
-import com.nexwave.nsidita.DocFileInfo;\r
-\r
-/**\r
- * Outputs the js files with:\r
- * - the list of html files and their description\r
- * - the words retrieved from the html files and their location\r
- *\r
- * @author N. Quaine\r
- * @author Kasun Gajasinghe\r
- * @version 2.0 2010-08-13\r
- */\r
-public class WriteJSFiles {\r
-\r
- private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding.";\r
- private static String txt_indices_location = "The created index files are located in ";\r
-\r
- /**\r
- * Create a javascript array listing the html files with their paths relative to the project root\r
- *\r
- * @param fileO path and name of the file in which to output the list of html files\r
- * @param list of the html files, relative to the doc root directory\r
- */\r
- public static void WriteHTMLList(String fileO, ArrayList<String> list) {\r
- int i = 0;\r
- Iterator it;\r
-\r
- if (list == null) {\r
- return;\r
- }\r
- if (fileO == null) {\r
- return;\r
- }\r
- it = list.iterator();\r
-\r
- try {\r
- // open a outputstream, here a file\r
- OutputStream fOut = new FileOutputStream(fileO);\r
- OutputStream bout = new BufferedOutputStream(fOut);\r
- OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8");\r
-\r
- /*fl : file list*/\r
- out.write("//List of files which are indexed.\n");\r
- out.write("fl = new Array();\n");\r
- String temp;\r
- while (it.hasNext()) {\r
- temp = (String) it.next();\r
- //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/'));\r
- out.write("fl[\"" + i + "\"]" + "= \"" + temp.replace(File.separatorChar, '/') + "\";\n");\r
- i++;\r
- }\r
-\r
- out.flush(); // Don't forget to flush!\r
- out.close();\r
-// System.out.println("the array of html is in " + fileO);\r
-\r
- }\r
- catch (UnsupportedEncodingException e) {\r
- System.out.println(txt_VM_encoding_not_supported);\r
- }\r
- catch (IOException e) {\r
- System.out.println(e.getMessage());\r
- }\r
-\r
- }\r
-\r
- /**\r
- * Create a javascript array listing the html files with\r
- * their paths relative to project root, their titles and shortdescs\r
- *\r
- * @param fileO path and name of the file in which to output the list of html files\r
- * @param list of the html files, relative to the doc root directory\r
- */\r
- public static void WriteHTMLInfoList(String fileO, ArrayList<DocFileInfo> list) {\r
- int i = 0;\r
- Iterator it = null;\r
-\r
- if (list == null) {\r
- return;\r
- }\r
- if (fileO == null) {\r
- return;\r
- }\r
- it = list.iterator();\r
- try {\r
- // open a outputstream, here a file\r
- OutputStream fOut = new FileOutputStream(fileO);\r
- // open a buffer output stream\r
- OutputStream bout = new BufferedOutputStream(fOut);\r
- OutputStreamWriter out\r
- = new OutputStreamWriter(bout, "UTF-8");\r
-\r
- /*fil : file list*/\r
- out.write("fil = new Array();\n");\r
-\r
- DocFileInfo tempInfo;\r
- String tempPath;\r
- String tempTitle;\r
- String tempShortdesc;\r
- while (it.hasNext()) {\r
- // Retrieve file information: path, title and shortdesc.\r
- tempInfo = (DocFileInfo) it.next();\r
- tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/');\r
- tempTitle = tempInfo.getTitle();\r
- tempShortdesc = tempInfo.getShortdesc();\r
- //Remove unwanted white char\r
- if (tempTitle != null) {\r
- tempTitle = tempTitle.replaceAll("\\s+", " ");\r
- tempTitle = tempTitle.replaceAll("['�\"]", " ");\r
- }\r
- if (tempShortdesc != null) {\r
- tempShortdesc = tempShortdesc.replaceAll("\\s+", " ");\r
- tempShortdesc = tempShortdesc.replaceAll("['�\"]", " ");\r
- }\r
- //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc);\r
- out.write("fil[\"" + i + "\"]" + "= \"" + tempPath + "@@@" + tempTitle + "@@@" + tempShortdesc + "\";\n");\r
- i++;\r
- }\r
-\r
- out.flush(); // Don't forget to flush!\r
- out.close();\r
-\r
- }\r
- catch (UnsupportedEncodingException e) {\r
- System.out.println(txt_VM_encoding_not_supported);\r
- }\r
- catch (IOException e) {\r
- System.out.println(e.getMessage());\r
- }\r
-\r
- }\r
-\r
- /**\r
- * Create javascript index files alphabetically.\r
- *\r
- * @param fileOutStr contains the path and the suffix of the index files to create.\r
- * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...\r
- * @param indexMap its keys are the indexed words and\r
- * its values are the list of the files which contain the word.\r
- * @param indexerLanguage The language of the content that gets indexed\r
- */\r
- public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) {\r
- OutputStreamWriter out;\r
- OutputStream bout;\r
- OutputStream fOut;\r
- String tstr;\r
-\r
- // check arguments\r
- if (indexMap == null || fileOutStr == null) {\r
- return;\r
- }\r
-\r
- // Collect the key of the index map\r
- TreeSet<String> sortedKeys = new TreeSet<String>();\r
- sortedKeys.addAll(indexMap.keySet());\r
- Iterator keyIt = sortedKeys.iterator();\r
- tstr = (String) keyIt.next();\r
-\r
- File fileOut = new File(fileOutStr);\r
-\r
- /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js\r
- * Index will be distributed evenly in these three files.\r
- * tstr is the current key\r
- * keyIt is the iterator of the key set\r
- * */\r
- int indexSize = sortedKeys.size();\r
- for (int i = 1; i <= 3; i++) {\r
- try {\r
- // open a outputstream, here a file\r
- fOut = new FileOutputStream(fileOut.getParent() + File.separator + "index-" + i + fileOut.getName());\r
- bout = new BufferedOutputStream(fOut);\r
- out = new OutputStreamWriter(bout, "UTF-8");\r
-\r
- try {\r
- /* Populate a javascript hashmap:\r
- The key is a word to look for in the index,\r
- The value is the numbers of the files in which the word exists.\r
- Example: w["key"]="file1,file2,file3";*/\r
- int count = 0;\r
- if (i == 1)\r
- out.write("var indexerLanguage=\"" + indexerLanguage + "\";\n");\r
- out.write("//Auto generated index for searching.\n");\r
- while (keyIt.hasNext()) { //&& (tempLetter == tstr.charAt(0))\r
- out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");\r
- tstr = (String) keyIt.next();\r
- count++;\r
- if (indexSize / count < 3) {\r
- break;\r
- }\r
- }\r
- out.write("\n");\r
- out.flush(); // Don't forget to flush!\r
- out.close();\r
- }\r
- catch (UnsupportedEncodingException e) {\r
- System.out.println(txt_VM_encoding_not_supported);\r
- }\r
- }\r
- catch (IOException e) {\r
- System.out.println(e.getMessage());\r
- }\r
- }\r
- System.out.println(txt_indices_location + fileOutStr);\r
- }\r
-\r
-\r
- /**\r
- * Create javascript index files alphabetically.\r
- *\r
- * @deprecated replaced by WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) { \r
- *\r
- * @param fileOutStr contains the path and the suffix of the index files to create.\r
- * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...\r
- * @param indexMap its keys are the indexed words and\r
- * its values are the list of the files which contain the word.\r
- */\r
-\r
-\r
- public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap) {\r
- OutputStreamWriter out;\r
- OutputStream bout;\r
- OutputStream fOut;\r
- String tstr;\r
-\r
- // check arguments\r
- if (indexMap == null || fileOutStr == null) {\r
- return;\r
- }\r
-\r
- // Collect the key of the index map\r
- TreeSet<String> sortedKeys = new TreeSet<String>();\r
- sortedKeys.addAll(indexMap.keySet());\r
- Iterator keyIt = sortedKeys.iterator();\r
- tstr = (String) keyIt.next();\r
-\r
- File fileOut = new File(fileOutStr);\r
-\r
- /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js\r
- * Index will be distributed evenly in these three files. \r
- * tstr is the current key\r
- * keyIt is the iterator of the key set\r
- * */\r
- int indexSize = sortedKeys.size();\r
- for (int i = 1; i <= 3; i++) {\r
- try {\r
- // open a outputstream, here a file\r
- fOut = new FileOutputStream(fileOut.getParent() + File.separator + "index-" + i + fileOut.getName());\r
- bout = new BufferedOutputStream(fOut);\r
- out = new OutputStreamWriter(bout, "UTF-8");\r
-\r
- try {\r
- /* Populate a javascript hashmap:\r
- The key is a word to look for in the index,\r
- The value is the numbers of the files in which the word exists.\r
- Example: w["key"]="file1,file2,file3";*/\r
- int count = 0;\r
-// if (i == 1)\r
-// out.write("var indexerLanguage=\"" + IndexerTask.indexerLanguage + "\";\n");\r
- out.write("//Auto generated index for searching.\n");\r
- while (keyIt.hasNext()) { //&& (tempLetter == tstr.charAt(0)) \r
- out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");\r
- tstr = (String) keyIt.next();\r
- count++;\r
- if (indexSize / count < 3) {\r
- break;\r
- }\r
- }\r
- out.write("\n");\r
- out.flush(); // Don't forget to flush!\r
- out.close();\r
- }\r
- catch (UnsupportedEncodingException e) {\r
- System.out.println(txt_VM_encoding_not_supported);\r
- }\r
- }\r
- catch (IOException e) {\r
- System.out.println(e.getMessage());\r
- }\r
- }\r
- System.out.println(txt_indices_location + fileOutStr);\r
- }\r
-}\r
+package com.nexwave.nquindexer;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeSet;
+
+import com.nexwave.nsidita.DocFileInfo;
+
+/**
+ * Outputs the js files with:
+ * - the list of html files and their description
+ * - the words retrieved from the html files and their location
+ *
+ * 20110803: Adding improvements from Radu/Oxygen.
+ *
+ * @author N. Quaine
+ * @author Kasun Gajasinghe
+ * @version 2.0 2010-08-13
+ */
+public class WriteJSFiles {
+
+ private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding.";
+ private static String txt_indices_location = "The created index files are located in ";
+
+ /** Create a javascript array listing the html files with their paths relative to the project root
+ * @param fileO path and name of the file in which to output the list of html files
+ * @param list of the html files, relative to the doc root directory
+ * @param doStem If true then js files will generate words stemmed
+ */
+ public static void WriteHTMLList (String fileO,ArrayList<String> list, boolean doStem) {
+ int i = 0;
+ Iterator it;
+
+ if (list == null) {
+ return;
+ }
+ if (fileO == null) {
+ return;
+ }
+ it = list.iterator();
+
+ try {
+ // open a outputstream, here a file
+ OutputStream fOut = new FileOutputStream(fileO);
+ OutputStream bout = new BufferedOutputStream(fOut);
+ OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8");
+
+ /*fl : file list*/
+ out.write("//List of files which are indexed.\n");
+ out.write("fl = new Array();\n");
+ String temp;
+ while (it.hasNext()) {
+ temp = (String) it.next();
+ //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/'));
+ out.write("fl[\"" + i + "\"]" + "= \"" + temp.replace(File.separatorChar, '/') + "\";\n");
+ i++;
+ }
+
+ out.write("var doStem = " + doStem + "");
+ out.flush(); // Don't forget to flush!
+ out.close();
+// System.out.println("the array of html is in " + fileO);
+
+ }
+ catch (UnsupportedEncodingException e) {
+ System.out.println(txt_VM_encoding_not_supported);
+ }
+ catch (IOException e) {
+ System.out.println(e.getMessage());
+ }
+
+ }
+
+ /** Create a javascript array listing the html files with
+ * their paths relative to project root, their titles and shortdescs
+ * @param fileO path and name of the file in which to output the list of html files
+ * @param list of the html files, relative to the doc root directory
+ */
+ public static void WriteHTMLInfoList(String fileO, ArrayList<DocFileInfo> list) {
+ int i = 0;
+ Iterator it = null;
+
+ if (list == null) {
+ return;
+ }
+ if (fileO == null) {
+ return;
+ }
+ it = list.iterator();
+ try {
+ // open a outputstream, here a file
+ OutputStream fOut = new FileOutputStream(fileO);
+ // open a buffer output stream
+ OutputStream bout = new BufferedOutputStream(fOut);
+ OutputStreamWriter out
+ = new OutputStreamWriter(bout, "UTF-8");
+
+ /*fil : file list*/
+ out.write("fil = new Array();\n");
+
+ DocFileInfo tempInfo;
+ String tempPath;
+ String tempTitle;
+ String tempShortdesc;
+ while (it.hasNext()) {
+ // Retrieve file information: path, title and shortdesc.
+ tempInfo = (DocFileInfo) it.next();
+ tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/');
+ tempTitle = tempInfo.getTitle();
+ tempShortdesc = tempInfo.getShortdesc();
+ //Remove unwanted white char
+ if (tempTitle != null) {
+ tempTitle = tempTitle.replaceAll("\\s+", " ");
+ tempTitle = tempTitle.replaceAll("['�\"]", " ");
+ //EXM-21239 Escape "\"
+ tempTitle = tempTitle.replaceAll("\\\\", "\\\\\\\\");
+ }
+ if (tempShortdesc != null) {
+ tempShortdesc = tempShortdesc.replaceAll("\\s+", " ");
+ tempShortdesc = tempShortdesc.replaceAll("['�\"]", " ");
+ //EXM-21239 Escape "\"
+ tempShortdesc = tempShortdesc.replaceAll("\\\\", "\\\\\\\\");
+ }
+ if (tempShortdesc != null) {
+ String stripNonAlphabeticalChars = stripNonAlphabeticalChars(tempShortdesc);
+ //stripNonAlphabeticalChars = stripWords(stripNonAlphabeticalChars);
+ stripNonAlphabeticalChars = stripNonAlphabeticalChars + "...";
+ out.write("fil[\""+i+"\"]"+"= \""+tempPath+"@@@"+tempTitle+"@@@"+stripNonAlphabeticalChars+"\";\n");
+ i++;
+ }else{
+ out.write("fil[\""+i+"\"]"+"= \""+tempPath+"@@@"+tempTitle+"@@@null"+"\";\n");
+ i++;
+
+
+ }
+ }
+
+ out.flush(); // Don't forget to flush!
+ out.close();
+
+ }
+ catch (UnsupportedEncodingException e) {
+ System.out.println(txt_VM_encoding_not_supported);
+ }
+ catch (IOException e) {
+ System.out.println(e.getMessage());
+ }
+
+ }
+
+ /** Create javascript index files alphabetically.
+ * @param fileOutStr contains the path and the suffix of the index files to create.
+ * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...
+ * @param indexMap its keys are the indexed words and
+ * its values are the list of the files which contain the word.
+ * @param indexerLanguage The language of the content that gets indexed
+ */
+ public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) {
+ OutputStreamWriter out;
+ OutputStream bout;
+ OutputStream fOut;
+ String tstr;
+
+ // check arguments
+ if (indexMap == null || fileOutStr == null) {
+ return;
+ }
+
+ // Collect the key of the index map
+ TreeSet<String> sortedKeys = new TreeSet<String>();
+ sortedKeys.addAll(indexMap.keySet());
+ Iterator keyIt = sortedKeys.iterator();
+ tstr = (String) keyIt.next();
+
+ File fileOut = new File(fileOutStr);
+
+ /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js
+ * Index will be distributed evenly in these three files.
+ * tstr is the current key
+ * keyIt is the iterator of the key set
+ * */
+ int indexSize = sortedKeys.size();
+ for (int i = 1; i <= 3; i++) {
+ try {
+ // open a outputstream, here a file
+ fOut = new FileOutputStream(fileOut.getParent() + File.separator + "index-" + i + fileOut.getName());
+ bout = new BufferedOutputStream(fOut);
+ out = new OutputStreamWriter(bout, "UTF-8");
+
+ try {
+ /* Populate a javascript hashmap:
+ The key is a word to look for in the index,
+ The value is the numbers of the files in which the word exists.
+ Example: w["key"]="file1,file2,file3";*/
+ int count = 0;
+ if (i == 1)
+ out.write("var indexerLanguage=\"" + indexerLanguage + "\";\n");
+ out.write("//Auto generated index for searching.\n");
+ while (keyIt.hasNext()) { //&& (tempLetter == tstr.charAt(0))
+ out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");
+ tstr = (String) keyIt.next();
+ count++;
+ if (indexSize / count < 3) {
+ break;
+ }
+ }
+ out.write("\n");
+ out.flush(); // Don't forget to flush!
+ out.close();
+ }
+ catch (UnsupportedEncodingException e) {
+ System.out.println(txt_VM_encoding_not_supported);
+ }
+ }
+ catch (IOException e) {
+ System.out.println(e.getMessage());
+ }
+ }
+ System.out.println(txt_indices_location + fileOutStr);
+ }
+
+
+ /**
+ * Remove all non alphabetical chars from the end of a text.
+ * @param input The text who will be striped.
+ * @return The striped text.
+ */
+ private static String stripNonAlphabeticalChars(String input) {
+ String output = input;
+ for (int i = input.length() - 1; i > 0 ; i--) {
+ char charAt = input.charAt(i);
+ int k = (int)charAt;
+ if ((k > 65 && k < 91) || (k > 97 && k < 123) || (k > 48 && k < 58)) {
+ return output;
+ } else {
+ output = output.substring(0, output.length() - 1);
+ }
+ }
+ return output;
+ }
+
+ private static String stripWords(String input) {
+ int idx = input.lastIndexOf(" ");
+ if (idx != -1) {
+ return input.substring(0, idx);
+ } else {
+ return input;
+ }
+ }
+}
\ No newline at end of file
-package com.nexwave.nsidita;\r
-\r
-import java.io.File;\r
-import java.io.FilenameFilter;\r
-import java.util.ArrayList;\r
-import java.util.Iterator;\r
-import java.util.regex.*;\r
-\r
-public class DirList {\r
- \r
- ArrayList<File> listFiles = null;\r
- ArrayList<String> listFilesRelTo = null;\r
- String [] topicFiles = null;\r
- public static final int MAX_DEPTH = 10;\r
- \r
- public DirList(File inputdir, String regex, int depth) {\r
- try {\r
- \r
- listFiles = new ArrayList<File> ();\r
- \r
- // not yet implemented \r
- if(regex == null) {\r
- for (File f: inputdir.listFiles()) {\r
- if (!f.isDirectory()){\r
- listFiles.add(f);\r
- }else {\r
- if (depth < MAX_DEPTH ) {\r
- DirList nsiDoc = new DirList(f,regex,depth+1);\r
- listFiles.addAll(new ArrayList<File>(nsiDoc.getListFiles()));\r
- }\r
- }\r
- }\r
- }\r
- else {\r
- for (File f: inputdir.listFiles(new DirFilter(regex))) {\r
- listFiles.add(f);\r
- }\r
- for (File f: inputdir.listFiles(new DirFilter("^[^\\.].*$"))) {\r
- if (f.isDirectory()){\r
- if (depth < MAX_DEPTH ) {\r
- DirList nsiDoc = new DirList(f,regex, depth+1);\r
- listFiles.addAll(new ArrayList<File>(nsiDoc.getListFiles()));\r
- }\r
- }\r
- }\r
- }\r
- } \r
- catch(Exception e) {\r
- // TODO gerer exception\r
- e.printStackTrace();\r
- }\r
- }\r
- \r
- public ArrayList<File> getListFiles() {\r
- return this.listFiles;\r
- }\r
- /**\r
- * Calculate the path of the files already listed relative to projectDir\r
- * @param projectDir Root from where to calculate the relative path\r
- * @return The list of files with their relative path\r
- */ \r
- public ArrayList<String> getListFilesRelTo(String projectDir) {\r
- Iterator it;\r
- \r
- if (this.listFiles == null) return null;\r
- \r
- listFilesRelTo = new ArrayList<String>();\r
- it = this.listFiles.iterator ( ) ;\r
- while ( it.hasNext ( ) ) {\r
- File ftemp = (File) it.next();\r
- String stemp = ftemp.getPath();\r
- int i = stemp.indexOf(projectDir);\r
- if ( i != 0 ) {\r
- System.out.println("the documentation root does not match with the documentation input!");\r
- return null;\r
- }\r
- int ad = 1;\r
- if (stemp.equals(projectDir)) ad = 0; \r
- stemp = stemp.substring(i+projectDir.length()+ad);\r
- listFilesRelTo.add(stemp);\r
- }\r
- return this.listFilesRelTo;\r
- }\r
-\r
-}\r
-\r
-class DirFilter implements FilenameFilter {\r
- private Pattern pattern;\r
- public DirFilter(String regex) {\r
- pattern = Pattern.compile(regex);\r
- }\r
- public boolean accept(File dir, String name) {\r
- String thisname = new File(name).getName();\r
- //System.out.println("Testing: "+ thisname);\r
- if(thisname.equals("index.html") || thisname.equals("ix01.html")){\r
- return false;\r
- }else{\r
- // Strip path information, search for regex:\r
- return pattern.matcher(new File(name).getName()).matches();\r
- }\r
- }\r
-} \r
+package com.nexwave.nsidita;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.regex.*;
+
+public class DirList {
+
+ ArrayList<File> listFiles = null;
+ ArrayList<String> listFilesRelTo = null;
+ String [] topicFiles = null;
+ public static final int MAX_DEPTH = 10;
+
+ public DirList(File inputdir, String regex, int depth) {
+ try {
+
+ listFiles = new ArrayList<File> ();
+
+ // not yet implemented
+ if(regex == null) {
+ for (File f: inputdir.listFiles()) {
+ if (!f.isDirectory()){
+ listFiles.add(f);
+ }else {
+ if (depth < MAX_DEPTH ) {
+ DirList nsiDoc = new DirList(f,regex,depth+1);
+ listFiles.addAll(new ArrayList<File>(nsiDoc.getListFiles()));
+ }
+ }
+ }
+ }
+ else {
+ for (File f: inputdir.listFiles(new DirFilter(regex))) {
+ listFiles.add(f);
+ }
+// Patch from Oxygen to address problem where directories
+// containing . were not traversed.
+ for (File f: inputdir.listFiles(new DirFilter(".*"))) {
+ if (f.isDirectory()){
+ if (depth < MAX_DEPTH ) {
+ DirList nsiDoc = new DirList(f,regex, depth+1);
+ listFiles.addAll(new ArrayList<File>(nsiDoc.getListFiles()));
+ }
+ }
+ }
+ }
+ }
+ catch(Exception e) {
+ // TODO gerer exception
+ e.printStackTrace();
+ }
+ }
+
+ public ArrayList<File> getListFiles() {
+ return this.listFiles;
+ }
+ /**
+ * Calculate the path of the files already listed relative to projectDir
+ * @param projectDir Root from where to calculate the relative path
+ * @return The list of files with their relative path
+ */
+ public ArrayList<String> getListFilesRelTo(String projectDir) {
+ Iterator it;
+
+ if (this.listFiles == null) return null;
+
+ listFilesRelTo = new ArrayList<String>();
+ it = this.listFiles.iterator ( ) ;
+ while ( it.hasNext ( ) ) {
+ File ftemp = (File) it.next();
+ String stemp = ftemp.getPath();
+ int i = stemp.indexOf(projectDir);
+ if ( i != 0 ) {
+ System.out.println("the documentation root does not match with the documentation input!");
+ return null;
+ }
+ int ad = 1;
+ if (stemp.equals(projectDir)) ad = 0;
+ stemp = stemp.substring(i+projectDir.length()+ad);
+ listFilesRelTo.add(stemp);
+ }
+ return this.listFilesRelTo;
+ }
+
+}
+
+class DirFilter implements FilenameFilter {
+ private Pattern pattern;
+ public DirFilter(String regex) {
+ pattern = Pattern.compile(regex);
+ }
+ public boolean accept(File dir, String name) {
+ String thisname = new File(name).getName();
+ //System.out.println("Testing: "+ thisname);
+ if(thisname.equals("index.html") || thisname.equals("ix01.html")){
+ return false;
+ }else{
+ // Strip path information, search for regex:
+ return pattern.matcher(new File(name).getName()).matches();
+ }
+ }
+}
}
return stemmedWords;
}
+ //OXYGEN PATCH START, Stem only one word
+ /**
+ * Do stemming of a given String and returns the stemmed word.
+ * @param word Word to be stemmed
+ * @return stemmed word
+ */
+ public String doStem(String word){
+ word = word.trim().toLowerCase();
+
+ //Do the stemming of the given word.
+ setCurrent(word); //set the word to be stemmed
+ stem(); //tell stemmer to stem
+
+ String stemmedWord = getCurrent(); //Get the stemmed word.
+ return stemmedWord;
+ }
+ //OXYGEN PATCH END
}
--- /dev/null
+var BrowserDetect = {\r
+ init: function () {\r
+ this.browser = this.searchString(this.dataBrowser) || "An unknown browser";\r
+ this.version = this.searchVersion(navigator.userAgent)\r
+ || this.searchVersion(navigator.appVersion)\r
+ || "an unknown version";\r
+ this.OS = this.searchString(this.dataOS) || "an unknown OS";\r
+ },\r
+ searchString: function (data) {\r
+ for (var i=0;i<data.length;i++) {\r
+ var dataString = data[i].string;\r
+ var dataProp = data[i].prop;\r
+ this.versionSearchString = data[i].versionSearch || data[i].identity;\r
+ if (dataString) {\r
+ if (dataString.indexOf(data[i].subString) != -1)\r
+ return data[i].identity;\r
+ }\r
+ else if (dataProp)\r
+ return data[i].identity;\r
+ }\r
+ },\r
+ searchVersion: function (dataString) {\r
+ var index = dataString.indexOf(this.versionSearchString);\r
+ if (index == -1) return;\r
+ return parseFloat(dataString.substring(index+this.versionSearchString.length+1));\r
+ },\r
+ dataBrowser: [\r
+ {\r
+ string: navigator.userAgent,\r
+ subString: "Chrome",\r
+ identity: "Chrome"\r
+ },\r
+ { string: navigator.userAgent,\r
+ subString: "OmniWeb",\r
+ versionSearch: "OmniWeb/",\r
+ identity: "OmniWeb"\r
+ },\r
+ {\r
+ string: navigator.vendor,\r
+ subString: "Apple",\r
+ identity: "Safari",\r
+ versionSearch: "Version"\r
+ },\r
+ {\r
+ prop: window.opera,\r
+ identity: "Opera"\r
+ },\r
+ {\r
+ string: navigator.vendor,\r
+ subString: "iCab",\r
+ identity: "iCab"\r
+ },\r
+ {\r
+ string: navigator.vendor,\r
+ subString: "KDE",\r
+ identity: "Konqueror"\r
+ },\r
+ {\r
+ string: navigator.userAgent,\r
+ subString: "Firefox",\r
+ identity: "Firefox"\r
+ },\r
+ {\r
+ string: navigator.vendor,\r
+ subString: "Camino",\r
+ identity: "Camino"\r
+ },\r
+ { // for newer Netscapes (6+)\r
+ string: navigator.userAgent,\r
+ subString: "Netscape",\r
+ identity: "Netscape"\r
+ },\r
+ {\r
+ string: navigator.userAgent,\r
+ subString: "MSIE",\r
+ identity: "Explorer",\r
+ versionSearch: "MSIE"\r
+ },\r
+ {\r
+ string: navigator.userAgent,\r
+ subString: "Gecko",\r
+ identity: "Mozilla",\r
+ versionSearch: "rv"\r
+ },\r
+ { // for older Netscapes (4-)\r
+ string: navigator.userAgent,\r
+ subString: "Mozilla",\r
+ identity: "Netscape",\r
+ versionSearch: "Mozilla"\r
+ }\r
+ ],\r
+ dataOS : [\r
+ {\r
+ string: navigator.platform,\r
+ subString: "Win",\r
+ identity: "Windows"\r
+ },\r
+ {\r
+ string: navigator.platform,\r
+ subString: "Mac",\r
+ identity: "Mac"\r
+ },\r
+ {\r
+ string: navigator.userAgent,\r
+ subString: "iPhone",\r
+ identity: "iPhone/iPod"\r
+ },\r
+ {\r
+ string: navigator.platform,\r
+ subString: "Linux",\r
+ identity: "Linux"\r
+ }\r
+ ]\r
+\r
+};\r
+BrowserDetect.init();
\ No newline at end of file
.title, div.toc>p{
font-weight: bold;
- }
\ No newline at end of file
+ }
+
+/* OXYGEN ADDITION FOR SEARCH RESULT RATING */
+
+#star ul.star {
+ LIST-STYLE: none;
+ MARGIN: 0;
+ PADDING: 0;
+ WIDTH: 85px;
+ /* was 100 */
+ HEIGHT: 20px;
+ LEFT: 1px;
+ TOP: -5px;
+ POSITION: relative;
+ FLOAT: right;
+ BACKGROUND: url('../images/starsSmall.png') repeat-x 0 -25px;
+}
+#star li {
+ PADDING: 0;
+ MARGIN: 0;
+ FLOAT: right;
+ DISPLAY: block;
+ WIDTH: 85px;
+ /* was 100 */
+ HEIGHT: 20px;
+ TEXT-DECORATION: none;
+ text-indent: -9000px;
+ Z-INDEX: 20;
+ POSITION: absolute;
+ PADDING: 0;
+}
+#star li.curr {
+ BACKGROUND: url('../images/starsSmall.png') left 25px;
+ FONT-SIZE: 1px;
+}
+
\ No newline at end of file
if ($.cookie('ui-tabs-1') === '1') { //search tab is visible
if ($.cookie('textToSearch') != undefined && $.cookie('textToSearch').length > 0) {
document.getElementById('textToSearch').value = $.cookie('textToSearch');
- Verifie('diaSearch_Form');
+ Verifie('searchForm');
searchHighlight($.cookie('textToSearch'));
$("#showHideHighlight").css("display", "block");
}
var htmlfileinfoList = "htmlFileInfoList.js";\r
var useCJKTokenizing = false;\r
\r
+//-------------------------OXYGEN PATCH START-------------------------\r
+var w = new Object();\r
+var scoring = new Object();\r
+\r
+var searchTextField = '';\r
+var no = 0;\r
+var noWords = 0;\r
+var partialSearch = "<font class=\"highlightText\">There is no page containing all the search terms.<br>Partial results:</font>";\r
+var warningMsg = '<div style="padding: 5px;margin-right:5px;;background-color:#FFFF00;">';\r
+warningMsg+='<b>Please note that due to security settings, Google Chrome does not highlight';\r
+warningMsg+=' the search results in the right frame.</b><br>';\r
+warningMsg+='This happens only when the WebHelp files are loaded from the local file system.<br>';\r
+warningMsg+='Workarounds:';\r
+warningMsg+='<ul>';\r
+warningMsg+='<li>Try using another web browser.</li>';\r
+warningMsg+='<li>Deploy the WebHelp files on a web server.</li>';\r
+warningMsg+='</div>';\r
+txt_filesfound = 'Results';\r
+txt_enter_at_least_1_char = "You must enter at least one character.";\r
+txt_enter_more_than_10_words = "Only first 10 words will be processed.";\r
+txt_browser_not_supported = "Your browser is not supported. Use of Mozilla Firefox is recommended.";\r
+txt_please_wait = "Please wait. Search in progress...";\r
+txt_results_for = "Results for: ";\r
+//-------------------------OXYGEN PATCH END-------------------------\r
+\r
/* Cette fonction verifie la validite de la recherche entrre par l utilisateur */\r
function Verifie(ditaSearch_Form) {\r
\r
return;\r
}\r
\r
+ //-------------------------OXYGEN PATCH START-------------------------\r
+ /*\r
+ var expressionInput = document.ditaSearch_Form.textToSearch.value\r
+ */\r
+ searchTextField = trim(document.searchForm.textToSearch.value);\r
+ var expressionInput = searchTextField; \r
+\r
\r
- var expressionInput = document.ditaSearch_Form.textToSearch.value;\r
- //Set a cookie to store the searched keywords\r
$.cookie('textToSearch', expressionInput);\r
\r
+ //-------------------------OXYGEN PATCH END-------------------------\r
+\r
\r
if (expressionInput.length < 1) {\r
\r
// expression is invalid\r
alert(txt_enter_at_least_1_char);\r
// reactive la fenetre de search (utile car cadres)\r
+\r
+ //-------------------------OXYGEN PATCH START-------------------------\r
+ /*\r
document.ditaSearch_Form.textToSearch.focus();\r
+ */\r
+ document.searchForm.textToSearch.focus();\r
+ //-------------------------OXYGEN PATCH END-------------------------\r
}\r
else {\r
-\r
- // Effectuer la recherche\r
- Effectuer_recherche(expressionInput);\r
-\r
- // reactive la fenetre de search (utile car cadres)\r
- document.ditaSearch_Form.textToSearch.focus();\r
+ //-------------------------OXYGEN PATCH START-------------------------\r
+ // OXYGEN PATCH START - EXM-20996 - split by " ", ".", ":", "-"\r
+ var splitSpace = searchTextField.split(" ");\r
+ var splitWords = [];\r
+ for (var i = 0 ; i < splitSpace.length ; i++) {\r
+ var splitDot = splitSpace[i].split(".");\r
+ for (var i1 = 0; i1 < splitDot.length; i1++) {\r
+ var splitColon = splitDot[i1].split(":");\r
+ for (var i2 = 0; i2 < splitColon.length; i2++) {\r
+ var splitDash = splitColon[i2].split("-");\r
+ for (var i3 = 0; i3 < splitDash.length; i3++) {\r
+ if (splitDash[i3].split("").length > 0) {\r
+ splitWords.push(splitDash[i3]);\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
+ noWords = splitWords;\r
+ // OXYGEN PATCH END - EXM-20996 - split by " ", ".", ":", "-"\r
+ if (noWords.length > 9){\r
+ // Allow to search maximum 10 words\r
+ alert(txt_enter_more_than_10_words);\r
+ expressionInput = '';\r
+ for (var x = 0 ; x < 10 ; x++){\r
+ expressionInput = expressionInput + " " + noWords[x]; \r
+ } \r
+ Effectuer_recherche(expressionInput);\r
+ document.searchForm.textToSearch.focus();\r
+ } else {\r
+ // Effectuer la recherche\r
+ // OXYGEN PATCH START - EXM-20996\r
+ expressionInput = '';\r
+ for (var x = 0 ; x < noWords.length ; x++) {\r
+ expressionInput = expressionInput + " " + noWords[x]; \r
+ }\r
+ // OXYGEN PATCH END - EXM-20996\r
+ Effectuer_recherche(expressionInput);\r
+ // reactive la fenetre de search (utile car cadres)\r
+ /*\r
+ document.ditaSearch_Form.textToSearch.focus();\r
+ */\r
+ document.searchForm.textToSearch.focus(); \r
+ //-------------------------OXYGEN PATCH END-------------------------\r
+ }\r
}\r
}\r
\r
useCJKTokenizing=false;\r
}\r
//If Lucene CJKTokenizer was used as the indexer, then useCJKTokenizing will be true. Else, do normal tokenizing.\r
- // 2-gram tokenizinghappens in CJKTokenizing, \r
- if(useCJKTokenizing){\r
- finalWordsList = cjkTokenize(wordsList);\r
- } else { \r
- finalWordsList = tokenize(wordsList);\r
- }\r
+ // 2-gram tokenizinghappens in CJKTokenizing, \r
+ // OXYGEN PATCH START. If doStem then make tokenize with Stemmer\r
+ var finalArray;\r
+ if (doStem){\r
+ // OXYGEN PATCH END.\r
+ if(useCJKTokenizing){\r
+ finalWordsList = cjkTokenize(wordsList);\r
+ finalArray = finalWordsList;\r
+ } else { \r
+ finalWordsList = tokenize(wordsList);\r
+ finalArray = finalWordsList;\r
+ }\r
+ } else if(useCJKTokenizing){\r
+ finalWordsList = cjkTokenize(wordsList);\r
+ finalArray = finalWordsList;\r
+ } else{\r
\r
//load the scripts with the indices: the following lines do not work on the server. To be corrected\r
/*if (IEBrowser) {\r
* Compare with the indexed words (in the w[] array), and push words that are in it to tempTab.\r
*/\r
var tempTab = new Array();\r
- for (var t in finalWordsList) {\r
- if (w[finalWordsList[t].toString()] == undefined) {\r
- txt_wordsnotfound += finalWordsList[t] + " ";\r
- } else {\r
- tempTab.push(finalWordsList[t]);\r
- }\r
+ \r
+ var splitedValues = expressionInput.split(" ");\r
+ finalWordsList = finalWordsList.concat(splitedValues);\r
+ finalArray = finalWordsList;\r
+ finalArray = removeDuplicate(finalArray);\r
+ // OXYGEN PATCH START.\r
+ var wordsArray = '';\r
+ // OXYGEN PATCH END.\r
+ for (var t in finalWordsList) { \r
+ // OXYGEN PATCH START.\r
+ if (doStem){\r
+ // OXYGEN PATCH END.\r
+ if (w[finalWordsList[t].toString()] == undefined) {\r
+ txt_wordsnotfound += finalWordsList[t] + " ";\r
+ } else {\r
+ tempTab.push(finalWordsList[t]);\r
+ }\r
+ // OXYGEN PATCH START.\r
+ } else {\r
+ var searchedValue = finalWordsList[t].toString();\r
+ if (wordsStartsWith(searchedValue) != undefined){\r
+ wordsArray+=wordsStartsWith(searchedValue);\r
+ }\r
+ }\r
+ // OXYGEN PATCH END.\r
}\r
- finalWordsList = tempTab;\r
+ // OXYGEN PATCH START.\r
+ wordsArray = wordsArray.substr(0, wordsArray.length - 1); \r
+ if (!doStem){ \r
+ finalWordsList = wordsArray.split(",");\r
+ } else {\r
+ finalWordsList = tempTab; \r
+ }\r
+ // OXYGEN PATCH END.\r
\r
+ //-------------------------OXYGEN PATCH START-----------------------\r
+ txt_wordsnotfound = expressionInput;\r
+ finalWordsList = removeDuplicate(finalWordsList);\r
+ //-------------------------OXYGEN PATCH END-------------------------\r
+ }\r
if (finalWordsList.length) {\r
-\r
- //search 'and' and 'or' one time\r
- fileAndWordList = SortResults(finalWordsList);\r
-\r
- var cpt = fileAndWordList.length;\r
- for (var i = cpt - 1; i >= 0; i--) {\r
+ //search 'and' and 'or' one time\r
+ fileAndWordList = SortResults(finalWordsList);\r
+ //-------------------------OXYGEN PATCH START-----------------------\r
+ if (fileAndWordList == undefined){\r
+ var cpt = 0;\r
+ } else {\r
+ var cpt = fileAndWordList.length;\r
+ var maxNumberOfWords = fileAndWordList[0][0].motsnb;\r
+ }\r
+ if (cpt > 0){\r
+ var searchedWords = noWords.length;\r
+ var foundedWords = fileAndWordList[0][0].motslisteDisplay.split(",").length;\r
+ //console.info("search : " + noWords.length + " found : " + fileAndWordList[0][0].motslisteDisplay.split(",").length);\r
+ if (searchedWords != foundedWords){\r
+ linkTab.push(partialSearch);\r
+ }\r
+ }\r
+ \r
+ //-------------------------OXYGEN PATCH END-----------------------\r
+ for (var i = 0; i < cpt; i++) {\r
+ //-------------------------OXYGEN PATCH START-----------------------\r
+ var hundredProcent = fileAndWordList[i][0].scoring + 100 * fileAndWordList[i][0].motsnb;\r
+ var ttScore_first = fileAndWordList[i][0].scoring;\r
+ var numberOfWords = fileAndWordList[i][0].motsnb;\r
+ //-------------------------OXYGEN PATCH END-----------------------\r
if (fileAndWordList[i] != undefined) {\r
linkTab.push("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + fileAndWordList[i][0].motslisteDisplay + "</span>" + "</p>");\r
\r
linkTab.push("<ul class='searchresult'>");\r
for (t in fileAndWordList[i]) {\r
- //DEBUG: alert(": "+ fileAndWordList[i][t].filenb+" " +fileAndWordList[i][t].motsliste);\r
//linkTab.push("<li><a href=\"../"+fl[fileAndWordList[i][t].filenb]+"\">"+fl[fileAndWordList[i][t].filenb]+"</a></li>");\r
- var tempInfo = fil[fileAndWordList[i][t].filenb];\r
+ //-------------------------OXYGEN PATCH START----------------------- \r
+ var ttInfo = fileAndWordList[i][t].filenb;\r
+ // Get scoring\r
+ var ttScore = fileAndWordList[i][t].scoring;\r
+ var tempInfo = fil[ttInfo];\r
+ //-------------------------OXYGEN PATCH END-----------------------\r
var pos1 = tempInfo.indexOf("@@@");\r
var pos2 = tempInfo.lastIndexOf("@@@");\r
var tempPath = tempInfo.substring(0, pos1);\r
var tempTitle = tempInfo.substring(pos1 + 3, pos2);\r
var tempShortdesc = tempInfo.substring(pos2 + 3, tempInfo.length);\r
\r
+ //-------------------------OXYGEN PATCH START-------------------------\r
+ // toc.html will not be displayed on search result\r
+ if (tempPath == 'toc.html'){\r
+ continue;\r
+ }\r
+ /*\r
//file:///home/kasun/docbook/WEBHELP/webhelp-draft-output-format-idea/src/main/resources/web/webhelp/installation.html\r
var linkString = "<li><a href=" + tempPath + ">" + tempTitle + "</a>";\r
// var linkString = "<li><a href=\"installation.html\">" + tempTitle + "</a>";\r
- if ((tempShortdesc != "null")) {\r
+ */\r
+ var split = fileAndWordList[i][t].motsliste.split(",");\r
+ // var splitedValues = expressionInput.split(" ");\r
+ // var finalArray = split.concat(splitedValues); \r
+ \r
+ arrayString = 'Array(';\r
+ for(var x in finalArray){\r
+ if (finalArray[x].length > 2 || useCJKTokenizing){\r
+ arrayString+= "'" + finalArray[x] + "',";\r
+ } \r
+ }\r
+ arrayString = arrayString.substring(0,arrayString.length - 1) + ")";\r
+ var idLink = 'foundLink' + no;\r
+ var link = 'openAndHighlight(\'' + tempPath + '\', ' + arrayString + ', \'' + idLink + '\')';\r
+ var linkString = '<li><a id="' + idLink + '" href="' + tempPath + '" class="foundResult" onclick="'+link+'">' + tempTitle + '</a>';\r
+ var starWidth = (ttScore * 100/ hundredProcent)/(ttScore_first/hundredProcent) * (numberOfWords/maxNumberOfWords);\r
+ starWidth = starWidth < 10 ? (starWidth + 5) : starWidth;\r
+ // Keep the 5 stars format\r
+ if (starWidth > 85){\r
+ starWidth = 85;\r
+ }\r
+ /*\r
+ var noFullStars = Math.ceil(starWidth/17);\r
+ var fullStar = "curr";\r
+ var emptyStar = "";\r
+ if (starWidth % 17 == 0){\r
+ // am stea plina\r
+ \r
+ } else {\r
+ \r
+ }\r
+ console.info(noFullStars);\r
+ */\r
+ // Also check if we have a valid description\r
+ if ((tempShortdesc != "null" && tempShortdesc != '...')) {\r
+ //-------------------------OXYGEN PATCH END-------------------------\r
linkString += "\n<div class=\"shortdesclink\">" + tempShortdesc + "</div>";\r
}\r
linkString += "</li>";\r
+ //-------------------------OXYGEN PATCH START-------------------------\r
+ // Add rating values for scoring at the list of matches \r
+ linkString += "<div id=\"rightDiv\">";\r
+ linkString += "<div id=\"star\">";\r
+ //linkString += "<div style=\"color: rgb(136, 136, 136);\" id=\"starUser0\" class=\"user\">" \r
+ // + ((ttScore * 100/ hundredProcent)/(ttScore_first/hundredProcent)) * 1 + "</div>";\r
+ linkString += "<ul id=\"star0\" class=\"star\">";\r
+ linkString += "<li id=\"starCur0\" class=\"curr\" style=\"width: " + starWidth + "px;\"></li>";\r
+ linkString += "</ul>";\r
+ \r
+ linkString += "<br style=\"clear: both;\">";\r
+ linkString += "</div>";\r
+ linkString += "</div>";\r
+ //linkString += '<b>Rating: ' + ttScore + '</b>';\r
+ //-------------------------OXYGEN PATCH END------------------------- \r
linkTab.push(linkString);\r
+ no++;\r
}\r
linkTab.push("</ul>");\r
}\r
}\r
results += "</p>";\r
} else {\r
- results = "<p>" + localeresource.search_no_results + "</p>";\r
+ results = "<p>" + localeresource.search_no_results + " <span class=\"searchExpression\">" + txt_wordsnotfound + "</span>" + "</p>";\r
}\r
- //alert(results);\r
- document.getElementById('searchResults').innerHTML = results; \r
+ \r
+ //-------------------------OXYGEN PATCH START-------------------------\r
+ // Verify if the browser is Google Chrome and the WebHelp is used on a local machine\r
+ // If browser is Google Chrome and WebHelp is used on a local machine a warning message will appear\r
+ // Highlighting will not work in this conditions. There is 2 workarounds\r
+ if (verifyBrowser()){\r
+ document.getElementById('searchResults').innerHTML = results;\r
+ } else {\r
+ document.getElementById('searchResults').innerHTML = warningMsg + results;\r
+ }\r
+ //-------------------------OXYGEN PATCH END-------------------------\r
+}\r
+\r
+//-------------------------OXYGEN PATCH START-------------------------\r
+// Verify if the stemmed word is aproximately the same as the searched word\r
+function verifyWord(word, arr){\r
+ for (var i = 0 ; i < arr.length ; i++){\r
+ if (word[0] == arr[i][0] \r
+ && word[1] == arr[i][1] \r
+ //&& word[2] == arr[i][2]\r
+ ){\r
+ return true;\r
+ }\r
+ }\r
+ return false;\r
+}\r
+\r
+// Look for elements that start with searchedValue.\r
+function wordsStartsWith(searchedValue){\r
+ var toReturn = '';\r
+ for (var sv in w){\r
+ if (searchedValue.length < 3){\r
+ continue;\r
+ } else {\r
+ if (sv.toLowerCase().indexOf(searchedValue.toLowerCase()) == 0){\r
+ toReturn+=sv + ","; \r
+ }\r
+ }\r
+ }\r
+ return toReturn.length > 0 ? toReturn : undefined;\r
}\r
+//-------------------------OXYGEN PATCH END-------------------------\r
\r
function tokenize(wordsList){\r
var stemmedWordsList = new Array(); // Array with the words to look for after removing spaces\r
\r
if(typeof stemmer != "undefined" ){\r
//Do the stemming using Porter's stemming algorithm\r
- for (var i = 0; i < cleanwordsList.length; i++) {\r
- var stemWord = stemmer(cleanwordsList[i]);\r
+ for (var i = 0; i < cleanwordsList.length; i++) { \r
+ var stemWord = stemmer(cleanwordsList[i]); \r
stemmedWordsList.push(stemWord);\r
}\r
} else {\r
a[0] = tab[0];\r
}\r
else {\r
- return -1\r
+ return -1;\r
}\r
\r
for (i = 1; i < l; i++) {\r
function SortResults(mots) {\r
\r
var fileAndWordList = new Object();\r
- if (mots.length == 0) {\r
+ if (mots.length == 0 || mots[0].length == 0) {\r
return null;\r
}\r
-\r
+ \r
+ //-------------------------OXYGEN PATCH START-------------------------\r
+ // In generated js file we add scoring at the end of the word\r
+ // Example word1*scoringForWord1,word2*scoringForWord2 and so on\r
+ // Split after * to obtain the right values\r
+ var scoringArr = Array();\r
for (var t in mots) {\r
// get the list of the indices of the files.\r
- var listNumerosDesFicStr = w[mots[t].toString()];\r
+ var listNumerosDesFicStr = w[mots[t].toString()]; \r
//alert ("listNumerosDesFicStr "+listNumerosDesFicStr);\r
var tab = listNumerosDesFicStr.split(",");\r
-\r
//for each file (file's index):\r
for (var t2 in tab) {\r
+ var tmp = '';\r
+ var idx = '';\r
var temp = tab[t2].toString();\r
+ if (temp.indexOf('*') != -1){\r
+ idx = temp.indexOf('*');\r
+ tmp = temp.substring(idx + 3, temp.length);\r
+ temp = temp.substring(0,idx);\r
+ }\r
+ scoringArr.push(tmp);\r
if (fileAndWordList[temp] == undefined) {\r
-\r
fileAndWordList[temp] = "" + mots[t];\r
} else {\r
-\r
fileAndWordList[temp] += "," + mots[t];\r
}\r
+ //console.info("fileAndWordList[" + temp + "]=" + fileAndWordList[temp] + " : " + tmp);\r
}\r
}\r
-\r
var fileAndWordListValuesOnly = new Array();\r
-\r
// sort results according to values\r
var temptab = new Array();\r
- for (t in fileAndWordList) {\r
- tab = fileAndWordList[t].split(',');\r
-\r
+ finalObj = new Array();\r
+ for (t in fileAndWordList) { \r
+ finalObj.push(new newObj(t,fileAndWordList[t]));\r
+ } \r
+ finalObj = removeDerivates(finalObj);\r
+ for (t in finalObj) {\r
+ tab = finalObj[t].wordList.split(',');\r
var tempDisplay = new Array();\r
- for (var x in tab) {\r
- if(stemQueryMap[tab[x]] != undefined){\r
- tempDisplay.push(stemQueryMap[tab[x]]); //get the original word from the stem word.\r
+ for (var x in tab) { \r
+ if(stemQueryMap[tab[x]] != undefined && doStem){\r
+ tempDisplay.push(stemQueryMap[tab[x]]); //get the original word from the stem word. \r
} else {\r
tempDisplay.push(tab[x]); //no stem is available. (probably a CJK language)\r
}\r
}\r
var tempDispString = tempDisplay.join(", ");\r
-\r
- temptab.push(new resultPerFile(t, fileAndWordList[t], tab.length, tempDispString));\r
- fileAndWordListValuesOnly.push(fileAndWordList[t]);\r
+ var index;\r
+ for (x in fileAndWordList) {\r
+ if (x === finalObj[t].filesNo) {\r
+ index = x;\r
+ break;\r
+ }\r
+ }\r
+ var scoring = findRating(fileAndWordList[index], index); \r
+ temptab.push(new resultPerFile(finalObj[t].filesNo, finalObj[t].wordList, tab.length, tempDispString, scoring));\r
+ fileAndWordListValuesOnly.push(finalObj[t].wordList); \r
}\r
-\r
-\r
- //alert("t"+fileAndWordListValuesOnly.toString());\r
-\r
fileAndWordListValuesOnly = unique(fileAndWordListValuesOnly);\r
fileAndWordListValuesOnly = fileAndWordListValuesOnly.sort(compare_nbMots);\r
- //alert("t: "+fileAndWordListValuesOnly.join(';'));\r
\r
var listToOutput = new Array();\r
-\r
for (var j in fileAndWordListValuesOnly) {\r
for (t in temptab) {\r
if (temptab[t].motsliste == fileAndWordListValuesOnly[j]) {\r
}\r
}\r
}\r
- }\r
+ } \r
+ // Sort results by scoring, descending on the same group\r
+ for (var i in listToOutput) {\r
+ listToOutput[i].sort(function(a, b){\r
+ return b.scoring - a.scoring;\r
+ });\r
+ }\r
+ // If we have groups with same number of words, \r
+ // will sort groups by higher scoring of each group\r
+ for (var i = 0; i < listToOutput.length - 1; i++) {\r
+ for (var j = i + 1; j < listToOutput.length; j++) {\r
+ if (listToOutput[i][0].motsnb < listToOutput[j][0].motsnb \r
+ || (listToOutput[i][0].motsnb == listToOutput[j][0].motsnb\r
+ && listToOutput[i][0].scoring < listToOutput[j][0].scoring)\r
+ ) {\r
+ var x = listToOutput[i];\r
+ listToOutput[i] = listToOutput[j];\r
+ listToOutput[j] = x;\r
+ }\r
+ }\r
+ }\r
+\r
return listToOutput;\r
}\r
\r
-function resultPerFile(filenb, motsliste, motsnb, motslisteDisplay) {\r
+// Remove derivates words from the list of words\r
+function removeDerivates(obj){\r
+ var toResultObject = new Array(); \r
+ for (i in obj){\r
+ var filesNo = obj[i].filesNo;\r
+ var wordList = obj[i].wordList;\r
+ var wList = wordList.split(","); \r
+ var searchedWords = searchTextField.toLowerCase().split(" ");\r
+ for (var k = 0 ; k < searchedWords.length ; k++){\r
+ for (var j = 0 ; j < wList.length ; j++){ \r
+ if (wList[j].startsWith(searchedWords[k])){\r
+ wList[j] = searchedWords[k];\r
+ }\r
+ }\r
+ }\r
+ wList = removeDuplicate(wList);\r
+ var recreateList = '';\r
+ for(var x in wList){\r
+ recreateList+=wList[x] + ",";\r
+ }\r
+ recreateList = recreateList.substr(0, recreateList.length - 1);\r
+ toResultObject.push(new newObj(filesNo, recreateList));\r
+ }\r
+ return toResultObject;\r
+}\r
+\r
+function newObj(filesNo, wordList){\r
+ this.filesNo = filesNo;\r
+ this.wordList = wordList;\r
+}\r
+//-------------------------OXYGEN PATCH END-------------------------\r
+\r
+\r
+// Object.\r
+// Oxygen. Add a new parameter. Scoring.\r
+function resultPerFile(filenb, motsliste, motsnb, motslisteDisplay, scoring, group) {\r
+ //10 - spring,time - 2 - spring, time - 55 - 3\r
this.filenb = filenb;\r
this.motsliste = motsliste;\r
this.motsnb = motsnb;\r
this.motslisteDisplay= motslisteDisplay;\r
+ //-------------------------OXYGEN PATCH START-------------------------\r
+ this.scoring = scoring;\r
+ //-------------------------OXYGEN PATCH END-------------------------\r
}\r
\r
+//-------------------------OXYGEN PATCH START-------------------------\r
+function findRating(words, nr){\r
+ var sum = 0;\r
+ var xx = words.split(',');\r
+ for (jj = 0 ; jj < xx.length ; jj++){\r
+ var wrd = w[xx[jj]].split(',');\r
+ for (var ii = 0 ; ii < wrd.length ; ii++){\r
+ var wrdno = wrd[ii].split('*');\r
+ if (wrdno[0] == nr){\r
+ sum+=parseInt(wrdno[1]);\r
+ }\r
+ }\r
+ }\r
+ return sum;\r
+}\r
+//-------------------------OXYGEN PATCH END-------------------------\r
function compare_nbMots(s1, s2) {\r
var t1 = s1.split(',');\r
var t2 = s2.split(',');\r
return -1;\r
}\r
//return t1.length - t2.length);\r
-}
\ No newline at end of file
+}\r
+//-------------------------OXYGEN PATCH START-------------------------\r
+// return false if browser is Google Chrome and WebHelp is used on a local machine, not a web server \r
+function verifyBrowser(){\r
+ var returnedValue = true; \r
+ var browser = BrowserDetect.browser;\r
+ var addressBar = window.location.href;\r
+ if (browser == 'Chrome' && addressBar.indexOf('file://') === 0){\r
+ returnedValue = false;\r
+ }\r
+ \r
+ return returnedValue;\r
+}\r
+\r
+// Remove duplicate values from an array\r
+function removeDuplicate(arr) {\r
+ var r = new Array();\r
+ o:for(var i = 0, n = arr.length; i < n; i++) {\r
+ for(var x = 0, y = r.length; x < y; x++) {\r
+ if(r[x]==arr[i]) continue o;\r
+ }\r
+ r[r.length] = arr[i];\r
+ }\r
+ return r;\r
+}\r
+\r
+// Create startsWith method\r
+String.prototype.startsWith = function(str) {\r
+ return (this.match("^"+str)==str);\r
+}\r
+\r
+function trim(str, chars) {\r
+ return ltrim(rtrim(str, chars), chars);\r
+}\r
+ \r
+function ltrim(str, chars) {\r
+ chars = chars || "\\s";\r
+ return str.replace(new RegExp("^[" + chars + "]+", "g"), "");\r
+}\r
+ \r
+function rtrim(str, chars) {\r
+ chars = chars || "\\s";\r
+ return str.replace(new RegExp("[" + chars + "]+$", "g"), "");\r
+}\r
+\r
+//-------------------------OXYGEN PATCH END-------------------------
\ No newline at end of file
<link rel="stylesheet" type="text/css" href="{$webhelp.common.dir}jquery/theme-redmond/jquery-ui-1.8.2.custom.css"/>
<link rel="stylesheet" type="text/css" href="{$webhelp.common.dir}jquery/treeview/jquery.treeview.css"/>
+ <!--
+ browserDetect is an Oxygen addition to warn the user if they're using chrome from the file system.
+ This breaks the Oxygen search highlighting.
+ -->
+ <script type="text/javascript" src="../common/browserDetect.js">
+ <xsl:comment> </xsl:comment>
+ </script>
<script type="text/javascript" src="{$webhelp.common.dir}jquery/jquery-1.4.2.min.js">
<xsl:comment> </xsl:comment>
</script>
<!--Scripts/css stylesheets for Search-->
<!-- TODO: Why THREE files? There's absolutely no need for having separate files.
These should have been identified at the optimization phase! -->
+ <script type="text/javascript" src="search/l10n.js">
+ <xsl:comment></xsl:comment>
+ </script>
<script type="text/javascript" src="search/htmlFileList.js">
<xsl:comment> </xsl:comment>
</script>
<xsl:if test="$webhelp.include.search.tab != 'false'">
<div id="searchDiv">
<div id="search">
- <form onsubmit="Verifie(ditaSearch_Form);return false"
- name="ditaSearch_Form"
+ <form onsubmit="Verifie(searchForm);return false"
+ name="searchForm"
class="searchForm">
<fieldset class="searchFieldSet">
<legend>
<input id="textToSearch" name="textToSearch" type="text"
class="searchText" tabindex="1"/>
<xsl:text disable-output-escaping="yes"> <![CDATA[ ]]> </xsl:text>
- <input onclick="Verifie(ditaSearch_Form)" type="button"
+ <input onclick="Verifie(searchForm)" type="button"
class="searchButton"
value="Go" id="doSearch" tabindex="1"/>
</center>