}
int32_t len;
- UChar *testData = ReadAndConvertFile(srcPath, len, status);
+ UChar *testData = ReadAndConvertFile(srcPath, len, "utf-8", status);
if (U_FAILURE(status)) {
return; /* something went wrong, error already output */
}
}
-//-------------------------------------------------------------------------------
-//
-// Read a text data file, convert it from UTF-8 to UChars, and return the data
-// in one big UChar * buffer, which the caller must delete.
-//
-// (Lightly modified version of a similar function in regextst.cpp)
-//
-//--------------------------------------------------------------------------------
-UChar *DecimalFormatTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
- UErrorCode &status) {
- UChar *retPtr = NULL;
- char *fileBuf = NULL;
- const char *fileBufNoBOM = NULL;
- FILE *f = NULL;
-
- ulen = 0;
- if (U_FAILURE(status)) {
- return retPtr;
- }
-
- //
- // Open the file.
- //
- f = fopen(fileName, "rb");
- if (f == 0) {
- dataerrln("Error opening test data file %s\n", fileName);
- status = U_FILE_ACCESS_ERROR;
- return NULL;
- }
- //
- // Read it in
- //
- int32_t fileSize;
- int32_t amtRead;
- int32_t amtReadNoBOM;
-
- fseek( f, 0, SEEK_END);
- fileSize = ftell(f);
- fileBuf = new char[fileSize];
- fseek(f, 0, SEEK_SET);
- amtRead = static_cast<int32_t>(fread(fileBuf, 1, fileSize, f));
- if (amtRead != fileSize || fileSize <= 0) {
- errln("Error reading test data file.");
- goto cleanUpAndReturn;
- }
-
- //
- // Look for a UTF-8 BOM on the data just read.
- // The test data file is UTF-8.
- // The BOM needs to be there in the source file to keep the Windows &
- // EBCDIC machines happy, so force an error if it goes missing.
- // Many Linux editors will silently strip it.
- //
- fileBufNoBOM = fileBuf + 3;
- amtReadNoBOM = amtRead - 3;
- if (fileSize<3 || uprv_strncmp(fileBuf, "\xEF\xBB\xBF", 3) != 0) {
- // TODO: restore this check.
- errln("Test data file %s is missing its BOM", fileName);
- fileBufNoBOM = fileBuf;
- amtReadNoBOM = amtRead;
- }
-
- //
- // Find the length of the input in UTF-16 UChars
- // (by preflighting the conversion)
- //
- u_strFromUTF8(NULL, 0, &ulen, fileBufNoBOM, amtReadNoBOM, &status);
-
- //
- // Convert file contents from UTF-8 to UTF-16
- //
- if (status == U_BUFFER_OVERFLOW_ERROR) {
- // Buffer Overflow is expected from the preflight operation.
- status = U_ZERO_ERROR;
- retPtr = new UChar[ulen+1];
- u_strFromUTF8(retPtr, ulen+1, NULL, fileBufNoBOM, amtReadNoBOM, &status);
- }
-
-cleanUpAndReturn:
- fclose(f);
- delete[] fileBuf;
- if (U_FAILURE(status)) {
- errln("ICU Error \"%s\"\n", u_errorName(status));
- delete retPtr;
- retPtr = NULL;
- }
- return retPtr;
-}
-
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
// The following are test functions that are visible from the intltest test framework.
virtual void DataDrivenTests();
- // The following functions are internal to the decimal format tests.
- virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, UErrorCode &status);
virtual const char *getPath(char buffer[2048], const char *filename);
virtual void execParseTest(int32_t lineNum,
const UnicodeString &inputText,
#include "unicode/uidna.h"
#include "unicode/utf16.h"
#include "idnaconf.h"
+#include "charstr.h"
static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
}
#if !UCONFIG_NO_IDNA
-/* this function is modified from RBBITest::ReadAndConvertFile()
- *
- */
-UBool IdnaConfTest::ReadAndConvertFile(){
-
- char * source = NULL;
- size_t source_len;
-
- // read the test data file to memory
- FILE* f = NULL;
- UErrorCode status = U_ZERO_ERROR;
-
- const char *path = IntlTest::getSourceTestData(status);
- if (U_FAILURE(status)) {
- errln("%s", u_errorName(status));
- return FALSE;
- }
-
- const char* name = "idna_conf.txt"; // test data file
- int t = static_cast<int>(strlen(path) + strlen(name) + 1);
- char* absolute_name = new char[t];
- strcpy(absolute_name, path);
- strcat(absolute_name, name);
- f = fopen(absolute_name, "rb");
- delete [] absolute_name;
-
- if (f == NULL){
- dataerrln("fopen error on %s", name);
- return FALSE;
- }
-
- fseek( f, 0, SEEK_END);
- if ((source_len = ftell(f)) <= 0){
- errln("Error reading test data file.");
- fclose(f);
- return FALSE;
- }
-
- source = new char[source_len];
- fseek(f, 0, SEEK_SET);
- if (fread(source, 1, source_len, f) != source_len) {
- errln("Error reading test data file.");
- delete [] source;
- fclose(f);
- return FALSE;
- }
- fclose(f);
-
- // convert the UTF-8 encoded stream to UTF-16 stream
- UConverter* conv = ucnv_open("utf-8", &status);
- int dest_len = ucnv_toUChars(conv,
- NULL, // dest,
- 0, // destCapacity,
- source,
- static_cast<int32_t>(source_len),
- &status);
- if (status == U_BUFFER_OVERFLOW_ERROR) {
- // Buffer Overflow is expected from the preflight operation.
- status = U_ZERO_ERROR;
- UChar * dest = NULL;
- dest = new UChar[ dest_len + 1];
- ucnv_toUChars(conv, dest, dest_len + 1, source, static_cast<int32_t>(source_len), &status);
- // Do not know the "if possible" behavior of ucnv_toUChars()
- // Do it by ourself.
- dest[dest_len] = 0;
- len = dest_len;
- base = dest;
- delete [] source;
- ucnv_close(conv);
- return TRUE; // The buffer will owned by caller.
- }
- errln("UConverter error: %s", u_errorName(status));
- delete [] source;
- ucnv_close(conv);
- return FALSE;
-}
int IdnaConfTest::isNewlineMark(){
static const UChar LF = 0x0a;
}
void IdnaConfTest::Test(void){
- if (!ReadAndConvertFile())return;
+ UErrorCode status = U_ZERO_ERROR;
+ //
+ // Open and read the test data file.
+ //
+ const char *testDataDirectory = IntlTest::getSourceTestData(status);
+ CharString testFileName(testDataDirectory, -1, status);
+ testFileName.append("idna_conf.txt", -1, status);
+
+ base = ReadAndConvertFile(testFileName.data(), len, "UTF-8", status);
+ if (U_FAILURE(status)) {
+ return;
+ }
UnicodeString s;
UnicodeString key;
int len ;
int curOffset;
- UBool ReadAndConvertFile();
int isNewlineMark();
UBool ReadOneLine(UnicodeString&);
return val;
}
+//-------------------------------------------------------------------------------
+//
+// ReadAndConvertFile Read a text data file, convert it to UChars, and
+// return the data in one big UChar * buffer, which the caller must delete.
+//
+// parameters:
+// fileName: the name of the file, with no directory part. The test data directory
+// is assumed.
+// ulen an out parameter, receives the actual length (in UChars) of the file data.
+// encoding The file encoding. If the file contains a BOM, that will override the encoding
+// specified here. The BOM, if it exists, will be stripped from the returned data.
+// Pass NULL for the system default encoding.
+// status
+// returns:
+// The file data, converted to UChar.
+// The caller must delete this when done with
+// delete [] theBuffer;
+//
+//
+//--------------------------------------------------------------------------------
+UChar *IntlTest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) {
+ UChar *retPtr = NULL;
+ char *fileBuf = NULL;
+ UConverter* conv = NULL;
+ FILE *f = NULL;
+
+ ulen = 0;
+ if (U_FAILURE(status)) {
+ return retPtr;
+ }
+
+ //
+ // Open the file.
+ //
+ f = fopen(fileName, "rb");
+ if (f == 0) {
+ dataerrln("Error opening test data file %s\n", fileName);
+ status = U_FILE_ACCESS_ERROR;
+ return NULL;
+ }
+ //
+ // Read it in
+ //
+ int fileSize;
+ int amt_read;
+
+ fseek( f, 0, SEEK_END);
+ fileSize = ftell(f);
+ fileBuf = new char[fileSize];
+ fseek(f, 0, SEEK_SET);
+ amt_read = static_cast<int>(fread(fileBuf, 1, fileSize, f));
+ if (amt_read != fileSize || fileSize <= 0) {
+ errln("Error reading test data file.");
+ goto cleanUpAndReturn;
+ }
+
+ //
+ // Look for a Unicode Signature (BOM) on the data just read
+ //
+ int32_t signatureLength;
+ const char * fileBufC;
+ const char* bomEncoding;
+
+ fileBufC = fileBuf;
+ bomEncoding = ucnv_detectUnicodeSignature(
+ fileBuf, fileSize, &signatureLength, &status);
+ if(bomEncoding!=NULL ){
+ fileBufC += signatureLength;
+ fileSize -= signatureLength;
+ encoding = bomEncoding;
+ }
+
+ //
+ // Open a converter to take the rule file to UTF-16
+ //
+ conv = ucnv_open(encoding, &status);
+ if (U_FAILURE(status)) {
+ goto cleanUpAndReturn;
+ }
+
+ //
+ // Convert the rules to UChar.
+ // Preflight first to determine required buffer size.
+ //
+ ulen = ucnv_toUChars(conv,
+ NULL, // dest,
+ 0, // destCapacity,
+ fileBufC,
+ fileSize,
+ &status);
+ if (status == U_BUFFER_OVERFLOW_ERROR) {
+ // Buffer Overflow is expected from the preflight operation.
+ status = U_ZERO_ERROR;
+
+ retPtr = new UChar[ulen+1];
+ ucnv_toUChars(conv,
+ retPtr, // dest,
+ ulen+1,
+ fileBufC,
+ fileSize,
+ &status);
+ }
+
+cleanUpAndReturn:
+ fclose(f);
+ delete []fileBuf;
+ ucnv_close(conv);
+ if (U_FAILURE(status)) {
+ errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
+ delete []retPtr;
+ retPtr = 0;
+ ulen = 0;
+ }
+ return retPtr;
+}
+
/*
* Hey, Emacs, please set the following:
*
virtual const char* getTestDataPath(UErrorCode& err);
static const char* getSourceTestData(UErrorCode& err);
static char *getUnidataPath(char path[]);
+ UChar *ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status);
+
// static members
public:
-//-------------------------------------------------------------------------------
-//
-// ReadAndConvertFile Read a text data file, convert it to UChars, and
-// return the data in one big UChar * buffer, which the caller must delete.
-//
-// parameters:
-// fileName: the name of the file, with no directory part. The test data directory
-// is assumed.
-// ulen an out parameter, receives the actual length (in UChars) of the file data.
-// encoding The file encoding. If the file contains a BOM, that will override the encoding
-// specified here. The BOM, if it exists, will be stripped from the returned data.
-// Pass NULL for the system default encoding.
-// status
-// returns:
-// The file data, converted to UChar.
-// The caller must delete this when done with
-// delete [] theBuffer;
-//
-// TODO: This is a clone of RegexTest::ReadAndConvertFile.
-// Move this function to some common place.
-//
-//--------------------------------------------------------------------------------
-UChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) {
- UChar *retPtr = NULL;
- char *fileBuf = NULL;
- UConverter* conv = NULL;
- FILE *f = NULL;
-
- ulen = 0;
- if (U_FAILURE(status)) {
- return retPtr;
- }
-
- //
- // Open the file.
- //
- f = fopen(fileName, "rb");
- if (f == 0) {
- dataerrln("Error opening test data file %s\n", fileName);
- status = U_FILE_ACCESS_ERROR;
- return NULL;
- }
- //
- // Read it in
- //
- int fileSize;
- int amt_read;
-
- fseek( f, 0, SEEK_END);
- fileSize = ftell(f);
- fileBuf = new char[fileSize];
- fseek(f, 0, SEEK_SET);
- amt_read = static_cast<int>(fread(fileBuf, 1, fileSize, f));
- if (amt_read != fileSize || fileSize <= 0) {
- errln("Error reading test data file.");
- goto cleanUpAndReturn;
- }
-
- //
- // Look for a Unicode Signature (BOM) on the data just read
- //
- int32_t signatureLength;
- const char * fileBufC;
- const char* bomEncoding;
-
- fileBufC = fileBuf;
- bomEncoding = ucnv_detectUnicodeSignature(
- fileBuf, fileSize, &signatureLength, &status);
- if(bomEncoding!=NULL ){
- fileBufC += signatureLength;
- fileSize -= signatureLength;
- encoding = bomEncoding;
- }
-
- //
- // Open a converter to take the rule file to UTF-16
- //
- conv = ucnv_open(encoding, &status);
- if (U_FAILURE(status)) {
- goto cleanUpAndReturn;
- }
-
- //
- // Convert the rules to UChar.
- // Preflight first to determine required buffer size.
- //
- ulen = ucnv_toUChars(conv,
- NULL, // dest,
- 0, // destCapacity,
- fileBufC,
- fileSize,
- &status);
- if (status == U_BUFFER_OVERFLOW_ERROR) {
- // Buffer Overflow is expected from the preflight operation.
- status = U_ZERO_ERROR;
-
- retPtr = new UChar[ulen+1];
- ucnv_toUChars(conv,
- retPtr, // dest,
- ulen+1,
- fileBufC,
- fileSize,
- &status);
- }
-
-cleanUpAndReturn:
- fclose(f);
- delete []fileBuf;
- ucnv_close(conv);
- if (U_FAILURE(status)) {
- errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
- delete []retPtr;
- retPtr = 0;
- ulen = 0;
- }
- return retPtr;
-}
-
-
-
//--------------------------------------------------------------------------------------------
//
// Run tests from each of the boundary test data files distributed by the Unicode Consortium
void TestMonkey();
void TestExtended();
- UChar *ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status);
void executeTest(TestParams *, UErrorCode &status);
void TestWordBreaks();
}
-
-//-------------------------------------------------------------------------------
-//
-// Read a text data file, convert it to UChars, and return the data
-// in one big UChar * buffer, which the caller must delete.
-//
-//--------------------------------------------------------------------------------
-UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
- const char *defEncoding, UErrorCode &status) {
- UChar *retPtr = NULL;
- char *fileBuf = NULL;
- UConverter* conv = NULL;
- FILE *f = NULL;
-
- ulen = 0;
- if (U_FAILURE(status)) {
- return retPtr;
- }
-
- //
- // Open the file.
- //
- f = fopen(fileName, "rb");
- if (f == 0) {
- dataerrln("Error opening test data file %s\n", fileName);
- status = U_FILE_ACCESS_ERROR;
- return NULL;
- }
- //
- // Read it in
- //
- int32_t fileSize;
- int32_t amt_read;
-
- fseek( f, 0, SEEK_END);
- fileSize = ftell(f);
- fileBuf = new char[fileSize];
- fseek(f, 0, SEEK_SET);
- amt_read = static_cast<int32_t>(fread(fileBuf, 1, fileSize, f));
- if (amt_read != fileSize || fileSize <= 0) {
- errln("Error reading test data file.");
- goto cleanUpAndReturn;
- }
-
- //
- // Look for a Unicode Signature (BOM) on the data just read
- //
- int32_t signatureLength;
- const char * fileBufC;
- const char* encoding;
-
- fileBufC = fileBuf;
- encoding = ucnv_detectUnicodeSignature(
- fileBuf, fileSize, &signatureLength, &status);
- if(encoding!=NULL ){
- fileBufC += signatureLength;
- fileSize -= signatureLength;
- } else {
- encoding = defEncoding;
- if (strcmp(encoding, "utf-8") == 0) {
- errln("file %s is missing its BOM", fileName);
- }
- }
-
- //
- // Open a converter to take the rule file to UTF-16
- //
- conv = ucnv_open(encoding, &status);
- if (U_FAILURE(status)) {
- goto cleanUpAndReturn;
- }
-
- //
- // Convert the rules to UChar.
- // Preflight first to determine required buffer size.
- //
- ulen = ucnv_toUChars(conv,
- NULL, // dest,
- 0, // destCapacity,
- fileBufC,
- fileSize,
- &status);
- if (status == U_BUFFER_OVERFLOW_ERROR) {
- // Buffer Overflow is expected from the preflight operation.
- status = U_ZERO_ERROR;
-
- retPtr = new UChar[ulen+1];
- ucnv_toUChars(conv,
- retPtr, // dest,
- ulen+1,
- fileBufC,
- fileSize,
- &status);
- }
-
-cleanUpAndReturn:
- fclose(f);
- delete[] fileBuf;
- ucnv_close(conv);
- if (U_FAILURE(status)) {
- errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
- delete []retPtr;
- retPtr = 0;
- ulen = 0;
- }
- return retPtr;
-}
-
-
//-------------------------------------------------------------------------------
//
// PerlTests - Run Perl's regular expression tests
const UnicodeString &input, const char *srcPath, int32_t line);
virtual void regex_err(const char *pat, int32_t errline, int32_t errcol,
UErrorCode expectedStatus, int32_t line);
- virtual UChar *ReadAndConvertFile(const char *fileName, int32_t &len, const char *charset, UErrorCode &status);
virtual const char *getPath(char buffer[2048], const char *filename);
virtual void TestCase11049(const char *pattern, const char *data, UBool expectMatch, int32_t lineNumber);