------------------------------------------------------------------------ */
-
-#include "ConvertUTF.h"
#ifdef CVTUTF_DEBUG
-#include <stdio.h>
+ #include <stdio.h>
#endif
+#include <string.h> /* strlen() */
+#include <unistd.h> /* ssize_t */
+#include "ConvertUTF.h"
static const int halfShift = 10; /* used for shifting by 10 bits */
}
}
+/**
+ * This is a variation of isLegalUTF8Sequence() that behaves like g_utf8_validate().
+ * In addition to knowing if the sequence is legal, it also tells you the last good character.
+ */
+Boolean
+tr_utf8_validate( const char * str, ssize_t max_len, const char ** end )
+{
+ const UTF8* source = (const UTF8*) str;
+ const UTF8* sourceEnd = source;
+
+ if( max_len == 0 )
+ return true;
+
+ if( str == NULL )
+ return false;
+
+ sourceEnd = source + ((max_len < 0) ? strlen(str) : (size_t)max_len);
+
+ if( source == sourceEnd )
+ {
+ if( end != NULL )
+ *end = (const char*) source;
+ return true;
+ }
+
+ for( ;; )
+ {
+ const int length = trailingBytesForUTF8[*source] + 1;
+ if (source + length > sourceEnd) {
+ if( end != NULL )
+ *end = (const char*) source;
+ return false;
+ }
+ if (!isLegalUTF8(source, length)) {
+ if( end != NULL )
+ *end = (const char*) source;
+ return false;
+ }
+ source += length;
+ if (source >= sourceEnd) {
+ if( end != NULL )
+ *end = (const char*) source;
+ return true;
+ }
+ }
+
+
+}
+
+
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF16 (
#error only libtransmission should #include this header.
#endif
+#include <unistd.h> /* ssize_t */
+
/*
* Copyright 2001-2004 Unicode, Inc.
*
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
+
+/* intended to work the same as g_utf8_validate */
+Boolean tr_utf8_validate( const char * str, ssize_t max_len, const char ** end );
+
+
#ifdef __cplusplus
}
#endif
***/
static int
-getfile( char ** setme,
- const char * root,
- tr_benc * path )
+getfile( char ** setme,
+ const char * root,
+ tr_benc * path )
{
int err;
const uint8_t * raw;
tr_benc * beInfo;
tr_benc * meta = (tr_benc *) meta_in;
+ tr_bool err;
/* info_hash: urlencoded 20-byte SHA1 hash of the value of the info key
* from the Metainfo file. Note that the value will be a bencoded
if( !str || !*str )
return "name";
tr_free( inf->name );
- inf->name = tr_strdup( str );
+ inf->name = tr_utf8clean( str, -1, &err );
/* comment */
if( !tr_bencDictFindStr( meta, "comment.utf-8", &str ) )
if( !tr_bencDictFindStr( meta, "comment", &str ) )
str = "";
tr_free( inf->comment );
- inf->comment = tr_strdup( str );
+ inf->comment = tr_utf8clean( str, -1, &err );
/* created by */
if( !tr_bencDictFindStr( meta, "created by.utf-8", &str ) )
if( !tr_bencDictFindStr( meta, "created by", &str ) )
str = "";
tr_free( inf->creator );
- inf->creator = tr_strdup( str );
+ inf->creator = tr_utf8clean( str, -1, &err );
/* creation date */
if( !tr_bencDictFindInt( meta, "creation date", &i ) )
#include <stdio.h> /* fprintf */
#include <string.h> /* strcmp */
#include "transmission.h"
+#include <unistd.h> /* ssize_t */
+#include "ConvertUTF.h" /* tr_utf8_validate*/
#include "platform.h"
#include "utils.h"
#include "crypto.h"
return 0;
}
+static int
+test_utf8( void )
+{
+ const char * in;
+ char * out;
+ tr_bool err;
+
+ in = "hello world";
+ out = tr_utf8clean( in, -1, &err );
+ check( err == FALSE )
+ check( out != NULL )
+ check( !strcmp( out, in ) )
+ tr_free( out );
+
+ in = "hello world";
+ out = tr_utf8clean( in, 5, &err );
+ check( err == FALSE )
+ check( out != NULL )
+ check( !strcmp( out, "hello" ) )
+ tr_free( out );
+
+ /* this version is not utf-8 */
+ in = "Òðóäíî áûòü Áîãîì";
+ out = tr_utf8clean( in, 17, &err );
+ check( out != NULL )
+ check( err != 0 )
+ check( strlen( out ) == 17 )
+ check( tr_utf8_validate( out, -1, NULL ) )
+ tr_free( out );
+
+ /* same string, but utf-8 clean */
+ in = "Òðóäíî áûòü Áîãîì";
+ out = tr_utf8clean( in, -1, &err );
+ check( out != NULL )
+ check( !err );
+ check( tr_utf8_validate( out, -1, NULL ) )
+ check ( !strcmp( in, out ) )
+ tr_free( out );
+
+ return 0;
+}
+
int
main( void )
{
return i;
if( ( i = test_buildpath( ) ) )
return i;
+ if( ( i = test_utf8( ) ) )
+ return i;
/* test that tr_cryptoRandInt() stays in-bounds */
for( i = 0; i < 100000; ++i )
#endif
#include "transmission.h"
+#include "ConvertUTF.h"
#include "list.h"
#include "utils.h"
#include "platform.h"
return first;
}
+
+/***
+****
+***/
+
+char*
+tr_utf8clean( const char * str, ssize_t max_len, tr_bool * err )
+{
+ const char zero = '\0';
+ char * ret;
+ struct evbuffer * buf = evbuffer_new( );
+ const char * end;
+
+ if( err != NULL )
+ *err = FALSE;
+
+ if( max_len < 0 )
+ max_len = (ssize_t) strlen( str );
+
+ while( !tr_utf8_validate ( str, max_len, &end ) )
+ {
+ const ssize_t good_len = end - str;
+
+ evbuffer_add( buf, str, good_len );
+ max_len -= ( good_len + 1 );
+ str += ( good_len + 1 );
+ evbuffer_add( buf, "?", 1 );
+
+ if( err != NULL )
+ *err = TRUE;
+ }
+
+ evbuffer_add( buf, str, max_len );
+ evbuffer_add( buf, &zero, 1 );
+ ret = tr_memdup( EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
+ assert( tr_utf8_validate( ret, -1, NULL ) );
+ evbuffer_free( buf );
+ return ret;
+}
/* wait the specified number of milliseconds */
void tr_wait( uint64_t delay_milliseconds );
+char* tr_utf8clean( const char * str,
+ ssize_t max_len,
+ tr_bool * err );
+
+
/***
****
***/