From 03f9f2307e387e91bb8a89a80c3a2546d1c213c3 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 16 Dec 2011 06:51:58 +0000 Subject: [PATCH] ICU-8972 genprops: start to read ppucd.txt, use that for Decomposition_Type X-SVN-Rev: 31137 --- tools/unicode/c/genprops/corepropswriter.cpp | 32 ++++- tools/unicode/c/genprops/genprops.cpp | 132 ++++++++----------- tools/unicode/c/genprops/genprops.h | 19 ++- tools/unicode/c/genprops/props2writer.cpp | 44 +++++++ tools/unicode/py/preparseucd.py | 2 + 5 files changed, 138 insertions(+), 91 deletions(-) diff --git a/tools/unicode/c/genprops/corepropswriter.cpp b/tools/unicode/c/genprops/corepropswriter.cpp index 06b1f0b605d..0341fdf24a4 100644 --- a/tools/unicode/c/genprops/corepropswriter.cpp +++ b/tools/unicode/c/genprops/corepropswriter.cpp @@ -251,13 +251,6 @@ static UTrie2 *pTrie=NULL; /* -------------------------------------------------------------------------- */ -U_CFUNC void -setUnicodeVersion(const char *v) { - UVersionInfo version; - u_versionFromString(version, v); - uprv_memcpy(dataInfo.dataVersion, version, 4); -} - U_CFUNC void initStore() { UErrorCode errorCode=U_ZERO_ERROR; @@ -496,6 +489,31 @@ generateData(const char *dataDir, UBool csource) { } } +class CorePropsWriter : public PropsWriter { +public: + virtual void setUnicodeVersion(const UVersionInfo version); + virtual void setProps(const UniProps &, const UnicodeSet &newValues, UErrorCode &errorCode); +}; + +void +CorePropsWriter::setUnicodeVersion(const UVersionInfo version) { + uprv_memcpy(dataInfo.dataVersion, version, 4); +} + +void +CorePropsWriter::setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) { +} + +PropsWriter * +createCorePropsWriter(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + PropsWriter *pw=new CorePropsWriter(); + if(pw==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + return pw; +} + /* * Hey, Emacs, please set the following: * diff --git a/tools/unicode/c/genprops/genprops.cpp b/tools/unicode/c/genprops/genprops.cpp index b72dd6a5b20..dfe841f8c1e 100644 --- a/tools/unicode/c/genprops/genprops.cpp +++ b/tools/unicode/c/genprops/genprops.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2010, International Business Machines +* Copyright (C) 1999-2011, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -23,27 +23,36 @@ #include #include #include "unicode/utypes.h" -#include "unicode/uchar.h" +#include "unicode/localpointer.h" #include "unicode/putil.h" +#include "unicode/uchar.h" #include "unicode/uclean.h" +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" +#include "genprops.h" +#include "propsvec.h" +#include "ppucd.h" +#include "toolutil.h" #include "unewdata.h" #include "uoptions.h" #include "uparse.h" #include "uprops.h" -#include "propsvec.h" - -U_CDECL_BEGIN -#include "genprops.h" -U_CDECL_END #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) +// TODO: remove +#define USE_NEW 1 + U_NAMESPACE_USE UBool beVerbose=FALSE, haveCopyright=TRUE; +void PropsWriter::setUnicodeVersion(const UVersionInfo version) {} +void PropsWriter::setProps(const UniProps &, const UnicodeSet &, UErrorCode &) {} + /* prototypes --------------------------------------------------------------- */ static void @@ -59,7 +68,6 @@ enum COPYRIGHT, DESTDIR, SOURCEDIR, - UNICODE_VERSION, ICUDATADIR, CSOURCE }; @@ -72,7 +80,6 @@ static UOption options[]={ UOPTION_COPYRIGHT, UOPTION_DESTDIR, UOPTION_SOURCEDIR, - UOPTION_DEF("unicode", 'u', UOPT_REQUIRES_ARG), UOPTION_ICUDATADIR, UOPTION_DEF("csource", 'C', UOPT_NO_ARG) }; @@ -82,14 +89,12 @@ main(int argc, char* argv[]) { char filename[300]; const char *srcDir=NULL, *destDir=NULL, *suffix=NULL; char *basename=NULL; - UErrorCode errorCode=U_ZERO_ERROR; U_MAIN_INIT_ARGS(argc, argv); /* preset then read command line options */ options[DESTDIR].value=u_getDataDirectory(); options[SOURCEDIR].value=""; - options[UNICODE_VERSION].value=""; options[ICUDATADIR].value=u_getDataDirectory(); argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); @@ -136,17 +141,47 @@ main(int argc, char* argv[]) { srcDir=options[SOURCEDIR].value; destDir=options[DESTDIR].value; + /* initialize */ + initStore(); + + IcuToolErrorCode errorCode("genprops"); + LocalPointer corePropsWriter(createCorePropsWriter(errorCode)); + LocalPointer props2Writer(createProps2Writer(errorCode)); + if(errorCode.isFailure()) { + fprintf(stderr, "genprops: unable to create PropsWriters - %s\n", errorCode.errorName()); + return errorCode.reset(); + } + + CharString ppucdPath(srcDir, errorCode); + ppucdPath.appendPathPart("ppucd.txt", errorCode); + + PreparsedUCD ppucd(ppucdPath.data(), errorCode); + if(errorCode.isFailure()) { + fprintf(stderr, "genprops: unable to open %s - %s\n", + ppucdPath.data(), errorCode.errorName()); + return errorCode.reset(); + } + PreparsedUCD::LineType lineType; + UnicodeSet newValues; + int i=0; + while((lineType=ppucd.readLine(errorCode))!=PreparsedUCD::NO_LINE) { + if(ppucd.lineHasPropertyValues()) { + const UniProps *props=ppucd.getProps(newValues, errorCode); + props2Writer->setProps(*props, newValues, errorCode); + } else if(lineType==PreparsedUCD::UNICODE_VERSION_LINE) { + const UVersionInfo &version=ppucd.getUnicodeVersion(); + corePropsWriter->setUnicodeVersion(version); + } + ++i; + } + printf("*** parsed %d lines from ppucd.txt\n", i); + if(argc>=2) { suffix=argv[1]; } else { suffix=NULL; } - if(options[UNICODE_VERSION].doesOccur) { - setUnicodeVersion(options[UNICODE_VERSION].value); - } - /* else use the default dataVersion in store.c */ - if (options[ICUDATADIR].doesOccur) { u_setDataDirectory(options[ICUDATADIR].value); } @@ -158,16 +193,13 @@ main(int argc, char* argv[]) { *basename++=U_FILE_SEP_CHAR; } - /* initialize */ - initStore(); - /* process UnicodeData.txt */ writeUCDFilename(basename, "UnicodeData", suffix); - parseDB(filename, &errorCode); + parseDB(filename, errorCode); /* process additional properties files */ *basename=0; - generateAdditionalProperties(filename, suffix, &errorCode); + generateAdditionalProperties(filename, suffix, errorCode); /* process parsed data */ if(U_SUCCESS(errorCode)) { @@ -259,28 +291,6 @@ genCategoryNames[U_CHAR_CATEGORY_COUNT]={ "Pi", "Pf" }; -const char *const -decompositionTypeNames[U_DT_COUNT]={ - NULL, - NULL, - "compat", - "circle", - "final", - "font", - "fraction", - "initial", - "isolated", - "medial", - "narrow", - "noBreak", - "small", - "square", - "sub", - "super", - "vertical", - "wide" -}; - static struct { uint32_t first, last, props; char name[80]; @@ -320,34 +330,6 @@ unicodeDataLineFn(void *context, exit(U_PARSE_ERROR); } - /* get decomposition type, field 5 */ - if(fields[5][0]