+++ /dev/null
-/*
- *******************************************************************************
- * Copyright (C) 1996-2009, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-package com.ibm.icu.dev.tool.rbbi;
-
-import java.io.DataOutputStream;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.io.UnsupportedEncodingException;
-import java.util.Vector;
-
-import com.ibm.icu.text.UnicodeSet;
-import com.ibm.icu.util.CompactByteArray;
-
-public class BuildDictionaryFile {
- public static void main(String args[])
- throws FileNotFoundException, UnsupportedEncodingException, IOException {
- String filename = args[0];
- String encoding = "";
- String outputFile = "";
- String listingFile = "";
-
- if (args.length >= 2)
- encoding = args[1];
-
- if(args.length >= 3)
- outputFile = args[2];
-
- if (args.length >= 4)
- listingFile = args[3];
-
- BuildDictionaryFile dictionary = new BuildDictionaryFile();
- dictionary.build(filename, encoding);
-
- DataOutputStream out = null;
- if (outputFile.length() != 0) {
- out = new DataOutputStream(new FileOutputStream(outputFile));
- dictionary.writeDictionaryFile(out);
- }
-
- PrintWriter listing = null;
- if (listingFile.length() != 0) {
- listing = new PrintWriter(new OutputStreamWriter(new FileOutputStream(listingFile), "UnicodeLittle"));
- dictionary.printWordList("", 0, listing);
- listing.close();
- }
- }
-
- public BuildDictionaryFile() {
- }
-
- public void build(String filename, String encoding)
- throws FileNotFoundException, UnsupportedEncodingException, IOException {
- FileInputStream file = new FileInputStream(filename);
- InputStreamReader in;
- if (encoding.length() == 0)
- in = new InputStreamReader(file);
- else
- in = new InputStreamReader(file, encoding);
-
- buildColumnMap(in);
-
- file = new FileInputStream(filename);
- if (encoding.length() == 0)
- in = new InputStreamReader(file);
- else
- in = new InputStreamReader(file, encoding);
-
- buildStateTable(in);
-//printTable();
- }
-
- public void buildColumnMap(InputStreamReader in) throws IOException {
-System.out.println("Building column map...");
- UnicodeSet charsInFile = new UnicodeSet();
- int c = in.read();
-int totalChars = 0;
- while (c >= 0) {
-++totalChars; if (totalChars > 0 && totalChars % 5000 == 0) System.out.println("Read " + totalChars + " characters...");
- if (c > ' ')
- charsInFile.add((char)c);
- c = in.read();
- }
-// Test.debugPrintln(charsInFile.toString());
-
- StringBuffer tempReverseMap = new StringBuffer();
- tempReverseMap.append(' ');
-
- columnMap = new CompactByteArray();
- int n = charsInFile.getRangeCount();
- byte p = 1;
- for (int i=0; i<n; ++i) {
- char start = (char) charsInFile.getRangeStart(i);
- char end = (char) charsInFile.getRangeEnd(i);
- for (char ch = start; ch <= end; ch++) {
- if (columnMap.elementAt(Character.toLowerCase(ch)) == 0) {
- columnMap.setElementAt(Character.toUpperCase(ch), Character.toUpperCase(ch),
- p);
- columnMap.setElementAt(Character.toLowerCase(ch), Character.toLowerCase(ch),
- p);
- ++p;
- tempReverseMap.append(ch);
- }
- }
- }
-//System.out.println("Compacting...");
- columnMap.compact();
-
-//System.out.println(tempReverseMap.toString());
- reverseColumnMap = new char[p];
- if (0 != p) {
- tempReverseMap.getChars(0, p, reverseColumnMap, 0);
- }
-
- System.out.println("total columns = " + p);
- numCols = p;
- numColGroups = (numCols >> 5) + 1;
-
-/*
-short[] index = columnMap.getIndexArray();
-System.out.println("Index:");
-for (int i = 0; i < index.length; i++) {
-if (i % 16 == 0) {
-System.out.println();
-System.out.print(" " + Integer.toHexString(i * 128) + ":");
-}
-System.out.print("\t" + Integer.toHexString(index[i]));
-}
-System.out.println();
-byte[] data = columnMap.getStringArray();
-System.out.print("Values:");
-for (int i = 0; i < data.length; i++) {
-if (i % 16 == 0) {
-System.out.println();
-System.out.print(" " + Integer.toHexString(i) + ":");
-}
-if (data[i] == 0)
-System.out.print("\t.");
-else
-System.out.print("\t" + Integer.toString(data[i]));
-}
-System.out.println();
-*/
- }
-
- public void buildStateTable(InputStreamReader in) throws IOException {
- Vector tempTable = new Vector();
- tempTable.addElement(new int[numCols + 1]);
- int state = 0;
-
- int c = in.read();
- int[] row = null;
- int charsInWord = 0;
- while (c >= 0) {
- charsInWord++;
- short column = columnMap.elementAt((char)c);
-
- row = (int[])(tempTable.elementAt(state));
- if (column != 0) {
- if (row[column] == 0) {
- row[column] = tempTable.size();
- ++row[numCols];
- state = (tempTable.size());
- tempTable.addElement(new int[numCols + 1]);
- }
- else
- state = row[column];
- }
- else if (state != 0) {
- if (row[0] != -1) {
- row[0] = -1;
- ++row[numCols];
- uniqueWords++;
- totalUniqueWordChars += charsInWord;
- }
- totalWords++;
-if (totalWords % 5000 == 0) System.out.println("Read " + totalWords + " words, " + tempTable.size() + " rows...");
- charsInWord = 0;
- state = 0;
- }
- c = in.read();
- }
- if (state != 0) {
- row = (int[])(tempTable.elementAt(state));
- if (row[0] != -1) {
- row[0] = -1;
- uniqueWords++;
- totalUniqueWordChars += charsInWord;
- }
- totalWords++;
- }
-
- compress(tempTable);
-
- table = new short[numCols * tempTable.size()];
- for (int i = 0; i < tempTable.size(); i++) {
- row = (int[])tempTable.elementAt(i);
- for (int j = 0; j < numCols; j++)
- table[i * numCols + j] = (short)row[j];
- }
- }
-
- private void compress(Vector tempTable) {
-System.out.println("Before compression:");
-System.out.println(" Number of rows = " + tempTable.size());
-System.out.println(" Number of columns = " + numCols);
-System.out.println(" Number of cells = " + tempTable.size() * numCols);
- deleteDuplicateRows(tempTable);
-System.out.println("After removing duplicate rows:");
-System.out.println(" Number of rows = " + tempTable.size());
-System.out.println(" Number of columns = " + numCols);
-System.out.println(" Number of cells = " + tempTable.size() * numCols);
- stackRows(tempTable);
-if (tempTable.size() > 32767) throw new IllegalArgumentException("Too many rows in table!");
-System.out.println("After doubling up on rows:");
-System.out.println(" Number of rows = " + tempTable.size());
-System.out.println(" Number of columns = " + numCols);
-System.out.println(" Number of cells = " + tempTable.size() * numCols);
- }
-
-/*
-experimental...
- private void deleteDuplicateRows(Vector tempTable) {
- int[] rowNumMap = new int[tempTable.size()];
- for (int i = 0; i < rowNumMap.length; i++)
- rowNumMap[i] = i;
-
- int nextClass = numCols;
- int currentClass;
- int lastClass;
- boolean split;
- int[] row1, row2, tempRow;
- int tempCat;
-
- do {
-System.out.println("Making a pass (" + nextClass + " classes)...");
- currentClass = 0;
- lastClass = nextClass;
- while (currentClass < nextClass) {
-System.out.println(" currentClass = " + currentClass);
- split = false;
- row1 = row2 = null;
- for (int i = 0; i < tempTable.size(); i++) {
- tempRow = (int[])tempTable.elementAt(i);
- if (tempRow[numCols] == currentClass) {
- if (row1 == null) {
- row1 = (int[])tempTable.elementAt(i);
- }
- else {
- row2 = (int[])tempTable.elementAt(i);
- for (int j = 0; j < numCols; j++) {
- if ((row1[j] == 0) != (row2[j] == 0) ||
- (row1[j] == -1) != (row2[j] == -1)) {
- row2[numCols] = nextClass;
- split = true;
- break;
- }
- else if (row1[j] != 0 && row2[j] != 0 && row1[j] != -1
- && row2[j] != -1) {
- tempRow = (int[])tempTable.elementAt(row1[j]);
- tempCat = tempRow[numCols];
- tempRow = (int[])tempTable.elementAt(row2[j]);
- if (tempCat != tempRow[numCols]) {
- row2[numCols] = nextClass;
- split = true;
- break;
- }
- }
- }
- }
- }
- }
- if (split)
- ++nextClass;
- ++currentClass;
-//System.out.println();
- }
- } while (lastClass != nextClass);
-
- int[] representatives = new int[nextClass];
- for (int i = 1; i < tempTable.size(); i++) {
- tempRow = (int[])tempTable.elementAt(i);
- if (representatives[tempRow[numCols]] == 0)
- representatives[tempRow[numCols]] = i;
- else
- rowNumMap[i] = representatives[tempRow[numCols]];
- }
-System.out.println("Renumbering...");
-
- // renumber all remaining rows
- for (int i = 0; i < rowNumMap.length; i++)
- if (rowNumMap[i] != i)
- tempTable.setElementAt(null, i);
- int newRowNum = 0;
- for (int i = 0; i < rowNumMap.length; i++)
- if (tempTable.elementAt(i) != null)
- rowNumMap[i] = newRowNum++;
- for (int i = 0; i < rowNumMap.length; i++)
- if (tempTable.elementAt(i) == null)
- rowNumMap[i] = rowNumMap[rowNumMap[i]];
-
- for (int i = tempTable.size() - 1; i >= 0; i--) {
- tempRow = (int[])tempTable.elementAt(i);
- if (tempRow == null)
- tempTable.removeElementAt(i);
- else {
- for (int j = 0; j < numCols; j++)
- if (tempRow[j] != -1)
- tempRow[j] = rowNumMap[j];
- }
- }
-//for (int i = 1; i < rowNumMap.length; i++) rowNumMap[i] = i; int newRowNum = rowNumMap.length;
- }
-*/
-
- private void deleteDuplicateRows(Vector tempTable) {
- Vector work = (Vector)(tempTable.clone());
- boolean didDeleteRow = true;
-
- Vector tempMapping = new Vector(work.size());
- int[] mapping = new int[work.size()];
- for (int i = 0; i < mapping.length; i++) {
- mapping[i] = i;
- tempMapping.addElement(new Integer(i));
- }
- boolean[] tbd = new boolean[work.size()];
-
- while (didDeleteRow) {
-System.out.println(" " + work.size() + " rows...");
-int deletedRows = 0;
- didDeleteRow = false;
-
- sortTable(work, tempMapping, mapping, 1, work.size());
- for (int i = 0; i < work.size() - 1; ) {
-System.out.print("Deleting, inspecting row " + i + ", deleted " + deletedRows + " rows...\r");
- int rowToDelete = ((Integer)(tempMapping.elementAt(i + 1))).intValue();
- int rowToMapTo = ((Integer)(tempMapping.elementAt(i))).intValue();
- if (compareRows((int[])work.elementAt(i), (int[])work.elementAt(i + 1),
- mapping) == 0) {
- tbd[rowToDelete] = true;
- tempTable.setElementAt(null, rowToDelete);
- while (tbd[mapping[rowToMapTo]])
- mapping[rowToMapTo] = mapping[mapping[rowToMapTo]];
- mapping[rowToDelete] = mapping[rowToMapTo];
- didDeleteRow = true;
-deletedRows++;
- work.removeElementAt(i + 1);
- tempMapping.removeElementAt(i + 1);
- }
- else
- i++;
- }
- for (int i = 0; i < mapping.length; i++) {
- if (tbd[i] && tbd[mapping[i]])
- mapping[i] = mapping[mapping[i]];
- }
- }
-
- int decrementBy = 0;
- for (int i = 0; i < mapping.length; i++) {
- if (tbd[i])
- decrementBy++;
- else
- mapping[i] -= decrementBy;
- }
- for (int i = 0; i < mapping.length; i++) {
- if (tbd[i])
- mapping[i] = mapping[mapping[i]];
- }
- for (int i = tempTable.size() - 1; i >= 0; i--) {
- if (tbd[i])
- tempTable.removeElementAt(i);
- else {
- int[] row = (int[])tempTable.elementAt(i);
- for (int j = 0; j < numCols; j++)
- row[j] = (row[j] == -1) ? -1 : mapping[row[j]];
- }
- }
- }
-
- private void sortTable(Vector tbl, Vector tempMapping, int[] mapping, int start, int end) {
-System.out.print("Sorting (" + start + ", " + end + ")...\r");
- if (start + 1 >= end)
- return;
- else if (start + 10 >= end) {
- for (int i = start + 1; i < end; i++) {
- int[] row = (int[])tbl.elementAt(i);
- Integer tempMap = (Integer)tempMapping.elementAt(i);
- int j;
- for (j = i - 1; j >= start; j--) {
- if (compareRows((int[])tbl.elementAt(j), row, mapping) > 0) {
- tbl.setElementAt((int[])tbl.elementAt(j), j + 1);
- tempMapping.setElementAt((Integer)tempMapping.elementAt(j), j + 1);
- }
- else {
- tbl.setElementAt(row, j + 1);
- tempMapping.setElementAt(tempMap, j + 1);
- break;
- }
- }
- if (j < start) {
- tbl.setElementAt(row, start);
- tempMapping.setElementAt(tempMap, start);
- }
- }
- }
- else {
- int boundaryPos = (start + end) / 2;
- int i;
- boolean allTheSame = true;
- int firstDifferent = 0;
-
- do {
- int[] boundary = (int[])tbl.elementAt(boundaryPos);
- i = start;
- int j = end - 1;
- int[] row = null;
- byte compResult;
- while (i < j) {
- row = (int[])tbl.elementAt(i);
- while (i <= j && compareRows(row, boundary, mapping) < 0) {
- i++;
- row = (int[])tbl.elementAt(i);
- }
-
- row = (int[])tbl.elementAt(j);
- compResult = compareRows(row, boundary, mapping);
- while (i <= j && (compResult >= 0)) {
- if (compResult != 0) {
- allTheSame = false;
- firstDifferent = j;
- }
- j--;
- row = (int[])tbl.elementAt(j);
- compResult = compareRows(row, boundary, mapping);
- }
- if (i <= j) {
- row = (int[])tbl.elementAt(j);
- tbl.setElementAt(tbl.elementAt(i), j);
- tbl.setElementAt(row, i);
- Object temp = tempMapping.elementAt(j);
- tempMapping.setElementAt(tempMapping.elementAt(i), j);
- tempMapping.setElementAt(temp, i);
- }
- }
- if (i <= start) {
- if (allTheSame)
- return;
- else
- boundaryPos = firstDifferent;
- }
- } while (i <= start);
- sortTable(tbl, tempMapping, mapping, start, i);
- sortTable(tbl, tempMapping, mapping, i, end);
- }
- }
-
- private byte compareRows(int[] row1, int[] row2, int[] mapping) {
- for (int i = 0; i < numCols; i++) {
- int c1 = (row1[i] == -1) ? -1 : mapping[row1[i]];
- int c2 = (row2[i] == -1) ? -1 : mapping[row2[i]];
- if (c1 < c2)
- return -1;
- else if (c1 > c2)
- return 1;
- }
- return 0;
- }
-
- private int[] buildRowIndex(Vector tempTable) {
- int[] tempRowIndex = new int[tempTable.size()];
- rowIndexFlagsIndex = new short[tempTable.size()];
- Vector tempRowIndexFlags = new Vector();
- rowIndexShifts = new byte[tempTable.size()];
-
- // build the row index. Each entry in the row index starts out referring
- // to the original row (so it doesn't actually do any mapping), and we set
- // up the index flags to show which cells in the row are populated
- for (int i = 0; i < tempTable.size(); i++) {
- tempRowIndex[i] = i;
-
- int[] row = (int[])tempTable.elementAt(i);
- if (row[numCols] == 1 && row[0] == 0) {
- int j = 0;
- while (row[j] == 0)
- ++j;
- rowIndexFlagsIndex[i] = (short)(-j);
- }
- else {
- int[] flags = new int[numColGroups];
- int nextFlag = 1;
- int colGroup = 0;
- for (int j = 0; j < numCols; j++) {
- if (row[j] != 0)
- flags[colGroup] |= nextFlag;
- nextFlag <<= 1;
- if (nextFlag == 0) {
- ++colGroup;
- nextFlag = 1;
- }
- }
- colGroup = 0;
- int j = 0;
- while (j < tempRowIndexFlags.size()) {
- if (((Integer)tempRowIndexFlags.elementAt(j)).intValue() ==
- flags[colGroup]) {
- ++colGroup;
- ++j;
- if (colGroup >= numColGroups)
- break;
- }
- else if (colGroup != 0)
- colGroup = 0;
- else
- ++j;
- }
- rowIndexFlagsIndex[i] = (short)(j - colGroup);
- while (colGroup < numColGroups) {
- tempRowIndexFlags.addElement(new Integer(flags[colGroup]));
- ++colGroup;
- }
- }
- }
- rowIndexFlags = new int[tempRowIndexFlags.size()];
- for (int i = 0; i < rowIndexFlags.length; i++)
- rowIndexFlags[i] = ((Integer)tempRowIndexFlags.elementAt(i)).intValue();
-System.out.println("Number of column groups = " + numColGroups);
-System.out.println("Size of rowIndexFlags = " + rowIndexFlags.length);
-
- return tempRowIndex;
- }
-
- private void stackRows(Vector tempTable) {
-/*
-System.out.print("Row:\t");
-for (int i = 0; i < numCols; i++)
-System.out.print(reverseColumnMap[i] + "\t");
-System.out.println();
-for (int i = 0; i < tempTable.size(); i++) {
-System.out.print(Integer.toString(i) + ":\t");
-int[] row = (int[])tempTable.elementAt(i);
-for (int j = 0; j < row.length; j++)
-if (row[j] != 0) System.out.print(Integer.toString(row[j]) + "\t");
-else System.out.print(".\t");
-System.out.println();
-}
-*/
-
- int[] tempRowIndex = buildRowIndex(tempTable);
- boolean[] tbd = new boolean[tempTable.size()];
-
- // now we actually go through and stack rows together
- for (int i = 0; i < tempTable.size(); i++) {
- if (tbd[i])
- continue;
-System.out.print("Stacking, inspecting row " + i + "...\r");
-//System.out.println("Stacking, inspecting row " + i + "...");
-
- int[] destRow = (int[])tempTable.elementAt(i);
-
- boolean[] tempFlags = new boolean[numCols];
- boolean[] filledCells = new boolean[numCols];
- for (int j = 0; j < numCols; j++)
- filledCells[j] = destRow[j] != 0;
-
- for (int j = i + 1; destRow[numCols] < numCols && j < tempTable.size(); j++) {
- if (tbd[j])
- continue;
-
- int[] srcRow = (int[])tempTable.elementAt(j);
- if (srcRow[numCols] + destRow[numCols] > numCols)
- continue;
-
- int maxLeftShift = -999;
- int maxRightShift = 0;
- for (int k = 0; k < numCols; k++) {
- tempFlags[k] = srcRow[k] != 0;
- if (tempFlags[k]) {
- if (maxLeftShift == -999)
- maxLeftShift = -k;
- maxRightShift = (numCols - 1) - k;
- }
- }
-
- int shift;
- for (shift = maxLeftShift; shift <= maxRightShift; shift++) {
- int k;
- for (k = 0; k < numCols; k++) {
- if (tempFlags[k] && filledCells[k + shift])
- break;
- }
- if (k >= numCols)
- break;
- }
- if (shift <= maxRightShift) {
-//System.out.println("Packing row " + j + " into row " + i + " with shift = " + shift);
- for (int k = 0; k < numCols; k++) {
- if (tempFlags[k]) {
- filledCells[k + shift] = true;
- destRow[k + shift] = srcRow[k];
- ++destRow[numCols];
- }
- }
- tbd[j] = true;
- tempRowIndex[j] = i;
- rowIndexShifts[j] = (byte)shift;
- }
- }
- }
-
- // finally, we squeeze out all the deleted rows
- int decrementBy = 0;
- for (int i = 0; i < tempRowIndex.length; i++) {
- if (!tbd[i])
- tempRowIndex[i] -= decrementBy;
- else
- ++decrementBy;
- }
- rowIndex = new short[tempRowIndex.length];
- for (int i = tempRowIndex.length - 1; i >= 0; i--) {
- if (tbd[i]) {
- rowIndex[i] = (short)(tempRowIndex[tempRowIndex[i]]);
- tempTable.removeElementAt(i);
- }
- else
- rowIndex[i] = (short)tempRowIndex[i];
- }
- }
-
-// private void printTable() {
-// short cell;
-// int populatedCells = 0;
-///*
-// System.out.println("Conceptual table:");
-// System.out.println(" Row: a b c d e f g h i j k l m n"
-// + " o p q r s t u v w x y z ' #");
-//
-// boolean[] rowPrintFlags = new boolean[rowIndex.length];
-// printConceptualTable("", 0, rowPrintFlags);
-//*/
-//
-// System.out.println();
-// System.out.println("Conceptual table:");
-// System.out.print(" Row:");
-// for (int i = 0; i < reverseColumnMap.length; i++) {
-// System.out.print(" " + reverseColumnMap[i]);
-// }
-// for (int i = 0; i < rowIndex.length; i++) {
-// System.out.println();
-// printNumber(i, 4);
-// System.out.print(":");
-// for (int j = 0; j < numCols; j++)
-// printNumber(at(i, j), 4);
-// }
-// System.out.println('\n');
-//
-// System.out.println();
-// System.out.println("Internally stored table:");
-// System.out.print(" Row:");
-// for (int i = 0; i < reverseColumnMap.length; i++) {
-// System.out.print(" " + reverseColumnMap[i]);
-// }
-// for (int i = 0; i < table.length; i++) {
-// if (i % numCols == 0) {
-// System.out.println();
-// printNumber(i / numCols, 4);
-// System.out.print(":");
-// }
-// cell = table[i];
-// if (cell != 0)
-// populatedCells++;
-// printNumber(cell, 4);
-// }
-// System.out.println('\n');
-//
-//System.out.println("Row index:");
-//for (int i = 0; i < rowIndex.length; i++) {
-// System.out.print(" " + i + " -> " + rowIndex[i]);
-// if (rowIndexFlagsIndex[i] < 0)
-// System.out.print(", flags = " + Integer.toBinaryString((1 << (-rowIndexFlagsIndex[i]))) + " (" + rowIndexFlagsIndex[i]);
-// else
-// System.out.print(", flags = " + Integer.toBinaryString(rowIndexFlags[rowIndexFlagsIndex[i]]) + " (" + rowIndexFlagsIndex[i]);
-// System.out.println("), shift = " + rowIndexShifts[i]);
-//}
-///*
-// int theoreticalMinRows = populatedCells / numCols;
-// if (populatedCells % numCols != 0)
-// theoreticalMinRows++;
-// int oneCellRows = 0;
-// for (int i = 0; i < rowIndexFlags.length; i++) {
-// double temp = Math.log(rowIndexFlags[i]) / Math.log(2);
-// if (temp == (int)temp)
-// oneCellRows++;
-// }
-//
-// System.out.println('\n');
-// System.out.println("Total words in input = " + totalWords);
-// System.out.println("Total unique words = " + uniqueWords + ", comprising " +
-// totalUniqueWordChars + " characters\n");
-// System.out.println("Number of populated cells = " + populatedCells);
-// System.out.println("Total number of cells = " + (table.length));
-// System.out.println("Residency = " + ((float)populatedCells / table.length * 100) + '%');
-// System.out.println("Ratio of populated cells to unique-word characters = " +
-// ((float)populatedCells / totalUniqueWordChars * 100) + '%');
-// System.out.println("Ratio of total cells to unique-word characters = " +
-// ((float)table.length / totalUniqueWordChars * 100) + '%');
-// System.out.println("Number of rows = " + (table.length / numCols));
-// System.out.println("Theoretical minimum number of rows = " + theoreticalMinRows);
-// System.out.println("Ratio of number of rows to theoretical minimum = " +
-// ((float)(table.length / numCols) / theoreticalMinRows * 100) + '%');
-// System.out.println("Number of conceptual rows = " + rowIndex.length);
-// System.out.println("Conceptual rows with only one populated cell = " + oneCellRows);
-// System.out.println("Ratio of one-cell rows to total conceptual rows = " + (((float)oneCellRows)
-// / rowIndex.length * 100) + '%');
-// System.out.println("Average number of populated cells in multi-cell rows = " +
-// ((float)(populatedCells - oneCellRows) / (rowIndex.length - oneCellRows)));
-//
-// int storageUsed = table.length * 2 + rowIndex.length * 2
-// + rowIndexFlags.length * 4 + rowIndexShifts.length;
-// System.out.println("Total number of bytes in table (including indexes) = " +
-// storageUsed);
-// System.out.println("Bytes of overhead per unique-word character = " + ((double)(storageUsed
-// - (totalUniqueWordChars * 2)) / totalUniqueWordChars));
-//*/
-// }
-
-// private void printConceptualTable(String initialString, int state, boolean[] flags) {
-// if (initialString.length() == 0)
-// System.out.println("root:");
-// else
-// System.out.println(initialString + ':');
-//
-// if (!flags[state]) {
-// flags[state] = true;
-// printNumber(state, 4);
-// System.out.print(":");
-// for (int i = 0; i < numCols; i++)
-// printNumber(at(state, i), 4);
-// System.out.println();
-// }
-//
-// int nextState;
-// for (int i = 0; i < numCols; i++) {
-// nextState = at(state, i);
-// if (nextState > 0 && !flags[nextState]) {
-// printNumber(nextState, 4);
-// System.out.print(":");
-// for (int j = 0; j < numCols; j++)
-// printNumber(at(nextState, j), 4);
-// System.out.println();
-// }
-// }
-// for (int i = 0; i < numCols; i++) {
-// nextState = at(state, i);
-// if (nextState > 0 && !flags[nextState]) {
-// char nextChar;
-// if (nextState == 27)
-// nextChar = ' ';
-// else if (nextState == 26)
-// nextChar = '\'';
-// else
-// nextChar = (char)(i + 'a');
-// flags[nextState] = true;
-// printConceptualTable(initialString + nextChar, nextState, flags);
-// }
-// }
-// }
-
- private void printWordList(String partialWord, int state, PrintWriter out)
- throws IOException {
- if (state == -1) {
- System.out.println(partialWord);
- if (out != null)
- out.println(partialWord);
- }
- else {
- for (int i = 0; i < numCols; i++) {
- if (at(state, i) != 0)
- printWordList(partialWord + reverseColumnMap[i], at(state, i), out);
- }
- }
- }
-
- private void writeDictionaryFile(DataOutputStream out) throws IOException {
- out.writeInt(0); // version number
-
- char[] columnMapIndexes = columnMap.getIndexArray();
- out.writeInt(columnMapIndexes.length);
- for (int i = 0; i < columnMapIndexes.length; i++)
- out.writeShort((short)columnMapIndexes[i]);
- byte[] columnMapValues = columnMap.getValueArray();
- out.writeInt(columnMapValues.length);
- for (int i = 0; i < columnMapValues.length; i++)
- out.writeByte((byte)columnMapValues[i]);
-
- out.writeInt(numCols);
- out.writeInt(numColGroups);
-
- out.writeInt(rowIndex.length);
- for (int i = 0; i < rowIndex.length; i++)
- out.writeShort(rowIndex[i]);
-
- out.writeInt(rowIndexFlagsIndex.length);
- for (int i = 0; i < rowIndexFlagsIndex.length; i++)
- out.writeShort(rowIndexFlagsIndex[i]);
- out.writeInt(rowIndexFlags.length);
- for (int i = 0; i < rowIndexFlags.length; i++)
- out.writeInt(rowIndexFlags[i]);
-
- out.writeInt(rowIndexShifts.length);
- for (int i = 0; i < rowIndexShifts.length; i++)
- out.writeByte(rowIndexShifts[i]);
-
- out.writeInt(table.length);
- for (int i = 0; i < table.length; i++)
- out.writeShort(table[i]);
-
- out.close();
- }
-
-// private void printNumber(int x, int width) {
-// String s = String.valueOf(x);
-// if (width > s.length())
-// System.out.print(spaces.substring(0, width - s.length()));
-// if (x != 0)
-// System.out.print(s);
-// else
-// System.out.print('.');
-// }
-
- public final short at(int row, char ch) {
- int col = columnMap.elementAt(ch);
- return at(row, col);
- }
-
- public final short at(int row, int col) {
- if (cellIsPopulated(row, col))
- return internalAt(rowIndex[row], col + rowIndexShifts[row]);
- else
- return 0;
- }
-
- private final boolean cellIsPopulated(int row, int col) {
- if (rowIndexFlagsIndex[row] < 0)
- return col == -rowIndexFlagsIndex[row];
- else {
- int flags = rowIndexFlags[rowIndexFlagsIndex[row] + (col >> 5)];
- return (flags & (1 << (col & 0x1f))) != 0;
- }
- }
-
- private final short internalAt(int row, int col) {
- return table[row * numCols + col];
- }
-
- private CompactByteArray columnMap = null;
- private char[] reverseColumnMap = null;
- private int numCols;
- private int numColGroups;
- private short[] table = null;
- private short[] rowIndex = null;
- private int[] rowIndexFlags = null;
- private short[] rowIndexFlagsIndex = null;
- private byte[] rowIndexShifts = null;
-
- private int totalWords = 0;
- private int uniqueWords = 0;
- private int totalUniqueWordChars = 0;
-
- //private static final String spaces = " ";
-}
-