Some improvements of String

author Tsuda Kageyu <tsuda.kageyu@gmail.com>

Sun, 14 Apr 2013 20:03:54 +0000 (05:03 +0900)

committer Tsuda Kageyu <tsuda.kageyu@gmail.com>

Sun, 14 Apr 2013 20:03:54 +0000 (05:03 +0900)
author Tsuda Kageyu <tsuda.kageyu@gmail.com>
Sun, 14 Apr 2013 20:03:54 +0000 (05:03 +0900)
committer Tsuda Kageyu <tsuda.kageyu@gmail.com>
Sun, 14 Apr 2013 20:03:54 +0000 (05:03 +0900)
diff --git a/taglib/toolkit/taglib.h b/taglib/toolkit/taglib.h

index e941ca784d178d3708f5aa1c9f5dfe6953918ba1..ed82b0fee5670285810f74cd41b1c5940ad32b50 100755 (executable)
--- a/taglib/toolkit/taglib.h
+++ b/taglib/toolkit/taglib.h
@@ -63,6 +63,18 @@
  #  define TAGLIB_ATOMIC_GCC
  #endif
  
+// Detect CPU endian at compile time rather than run time if possible.
+// This is a poor list. Hope someone enrich it.
+#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) \
+  || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) \
+  || (defined(__clang__) && (defined(__i386__) || defined(__x86_64__)))
+# define TAGLIB_LITTLE_ENDIAN
+/*
+#elif ....
+# define TAGLIB_BIG_ENDIAN
+*/
+#endif
+
  //! A namespace for all TagLib related classes and functions
  
  /*!
diff --git a/taglib/toolkit/tstring.cpp b/taglib/toolkit/tstring.cpp

index 292f35333fdbe5559110fe8de6c38fe1835148b9..42505cbe7b5efc0305b9ea73efee258c6264267a 100644 (file)
--- a/taglib/toolkit/tstring.cpp
+++ b/taglib/toolkit/tstring.cpp
@@ -23,8 +23,10 @@
   *   http://www.mozilla.org/MPL/                                           *
   ***************************************************************************/
  
+// This class assumes that std::basic_string<T> has a contiguous and null-terminated buffer.
+// 
+
  #include "tstring.h"
-#include "unicode.h"
  #include "tdebug.h"
  #include "tstringlist.h"
  
@@ -32,167 +34,170 @@
  
  #include <string.h>
  
-namespace TagLib {
+// Determine if the compiler supports codecvt.
+
+#if (defined(_MSC_VER) && _MSC_VER >= 1600)
+# define TAGLIB_USE_CODECVT
+#endif
+
+#ifdef TAGLIB_USE_CODECVT
+# include <codecvt>
+typedef std::codecvt_utf8_utf16<wchar_t> utf8_utf16_t;
+#else
+# include "unicode.h"
+#endif
+
+namespace {
  
    inline unsigned short byteSwap(unsigned short x)
    {
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) 
+
+    return _byteswap_ushort(x);
+
+#else
+
      return (((x) >> 8) & 0xff) | (((x) & 0xff) << 8);
+
+#endif
    }
  
    inline unsigned short combine(unsigned char c1, unsigned char c2)
    {
      return (c1 << 8) | c2;
    }
+
+#if !defined(TAGLIB_LITTLE_ENDIAN) && !defined(TAGLIB_BIG_ENDIAN)
+
+  TagLib::String::Type wcharByteOrder() 
+  {
+    // Detect CPU endian at run time.
+    union {
+      TagLib::ushort w;
+      char c;
+    } x = { 0x1234 };
+
+    if(x.c == 0x34)
+      return String::UTF16LE;
+    else
+      return String::UTF16BE;
+  }
+
+#endif
  }
  
-using namespace TagLib;
+namespace TagLib {
  
  class String::StringPrivate : public RefCounter
  {
  public:
-  StringPrivate(const wstring &s) :
-    RefCounter(),
-    data(s),
-    CString(0) {}
-
-  StringPrivate() :
-    RefCounter(),
-    CString(0) {}
+  StringPrivate(const wstring &s) : RefCounter(), data(s) {}
+  StringPrivate() : RefCounter() {}
  
-  ~StringPrivate() {
-    delete [] CString;
-  }
-
-  wstring data;
+  /*!
+   * Stores string in UTF-16. The byte order depends on the CPU endian. 
+   */
+  TagLib::wstring data;
  
    /*!
-   * This is only used to hold the a pointer to the most recent value of
-   * toCString.
+   * This is only used to hold the the most recent value of toCString().
     */
-  char *CString;
+  std::string cstring;
  };
  
  String String::null;
  
  ////////////////////////////////////////////////////////////////////////////////
  
-String::String()
+String::String() 
+  : d(new StringPrivate())
  {
-  d = new StringPrivate;
  }
  
-String::String(const String &s) : d(s.d)
+String::String(const String &s) 
+  : d(s.d)
  {
    d->ref();
  }
  
  String::String(const std::string &s, Type t)
+  : d(new StringPrivate())
  {
-  d = new StringPrivate;
-
-  if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
+  if(t == Latin1)
+    copyFromLatin1(&s[0], s.length());
+  else if(t == String::UTF8)
+    copyFromUTF8(&s[0], s.length());
+  else {
      debug("String::String() -- A std::string should not contain UTF16.");
-    return;
    }
-
-  int length = s.length();
-  d->data.resize(length);
-  wstring::iterator targetIt = d->data.begin();
-
-  for(std::string::const_iterator it = s.begin(); it != s.end(); it++) {
-    *targetIt = uchar(*it);
-    ++targetIt;
-  }
-
-  prepare(t);
  }
  
  String::String(const wstring &s, Type t)
+  : d(new StringPrivate())
  {
-  d = new StringPrivate(s);
-  prepare(t);
+  if(t == UTF16 || t == UTF16BE || t == UTF16LE)
+    copyFromUTF16(s.c_str(), s.length(), t);
+  else {
+    debug("String::String() -- A TagLib::wstring should not contain Latin1 or UTF-8.");
+  }
  }
  
  String::String(const wchar_t *s, Type t)
+  : d(new StringPrivate())
  {
-  d = new StringPrivate(s);
-  prepare(t);
+  if(t == UTF16 || t == UTF16BE || t == UTF16LE)
+    copyFromUTF16(s, ::wcslen(s), t);
+  else {
+    debug("String::String() -- A const wchar_t * should not contain Latin1 or UTF-8.");
+  }
  }
  
  String::String(const char *s, Type t)
+  : d(new StringPrivate())
  {
-  d = new StringPrivate;
-
-  if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
+  if(t == Latin1)
+    copyFromLatin1(s, ::strlen(s));
+  else if(t == String::UTF8)
+    copyFromUTF8(s, ::strlen(s));
+  else {
      debug("String::String() -- A const char * should not contain UTF16.");
-    return;
    }
-
-  int length = ::strlen(s);
-  d->data.resize(length);
-
-  wstring::iterator targetIt = d->data.begin();
-
-  for(int i = 0; i < length; i++) {
-    *targetIt = uchar(s[i]);
-    ++targetIt;
-  }
-
-  prepare(t);
  }
  
  String::String(wchar_t c, Type t)
+  : d(new StringPrivate())
  {
-  d = new StringPrivate;
-  d->data += c;
-  prepare(t);
+  if(t == UTF16 || t == UTF16BE || t == UTF16LE)
+    copyFromUTF16(&c, 1, t);
+  else {
+    debug("String::String() -- A const wchar_t should not contain Latin1 or UTF-8.");
+  }
  }
  
  String::String(char c, Type t)
+  : d(new StringPrivate())
  {
-  d = new StringPrivate;
-
-  if(t == UTF16 || t == UTF16BE || t == UTF16LE) {
-    debug("String::String() -- A std::string should not contain UTF16.");
-    return;
+  if(t == Latin1 || t == UTF8) {
+    d->data.resize(1);
+    d->data[0] = static_cast<uchar>(c);
+  }
+  else {
+    debug("String::String() -- A char  should not contain UTF16.");
    }
-
-  d->data += uchar(c);
-  prepare(t);
  }
  
  String::String(const ByteVector &v, Type t)
+  : d(new StringPrivate())
  {
-  d = new StringPrivate;
-
    if(v.isEmpty())
      return;
  
-  if(t == Latin1 || t == UTF8) {
-
-    int length = 0;
-    d->data.resize(v.size());
-    wstring::iterator targetIt = d->data.begin();
-    for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
-      *targetIt = uchar(*it);
-      ++targetIt;
-      ++length;
-    }
-    d->data.resize(length);
-  }
-  else  {
-    d->data.resize(v.size() / 2);
-    wstring::iterator targetIt = d->data.begin();
-
-    for(ByteVector::ConstIterator it = v.begin();
-        it != v.end() && it + 1 != v.end() && combine(*it, *(it + 1));
-        it += 2)
-    {
-      *targetIt = combine(*it, *(it + 1));
-      ++targetIt;
-    }
-  }
-  prepare(t);
+  if(t == Latin1) 
+    copyFromLatin1(v.data(), v.size());
+  else if(t == UTF8) 
+    copyFromUTF8(v.data(), v.size());
+  else 
+    copyFromUTF16(v.data(), v.size(), t);
  }
  
  ////////////////////////////////////////////////////////////////////////////////
@@ -206,45 +211,49 @@ String::~String()
  std::string String::to8Bit(bool unicode) const
  {
    std::string s;
-  s.resize(d->data.size());
  
    if(!unicode) {
+    s.resize(d->data.size());
+
      std::string::iterator targetIt = s.begin();
      for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
-      *targetIt = char(*it);
+      *targetIt = static_cast<char>(*it);
        ++targetIt;
      }
-    return s;
    }
+  else {
+    s.resize(d->data.size() * 4 + 1);
  
-  const int outputBufferSize = d->data.size() * 3 + 1;
+#ifdef TAGLIB_USE_CODECVT
  
-  Unicode::UTF16 *sourceBuffer = new Unicode::UTF16[d->data.size() + 1];
-  Unicode::UTF8  *targetBuffer = new Unicode::UTF8[outputBufferSize];
+    std::mbstate_t st = 0;
+    const wchar_t *source;
+    char *target;
+    std::codecvt_base::result result = utf8_utf16_t().out(
+      st, &d->data[0], &d->data[d->data.size()], source, &s[0], &s[s.size()], target);
  
-  for(unsigned int i = 0; i < d->data.size(); i++)
-    sourceBuffer[i] = Unicode::UTF16(d->data[i]);
+    if(result != utf8_utf16_t::ok) {
+      debug("String::copyFromUTF8() - Unicode conversion error.");
+    }
  
-  const Unicode::UTF16 *source = sourceBuffer;
-  Unicode::UTF8 *target = targetBuffer;
+#else
  
-  Unicode::ConversionResult result =
-    Unicode::ConvertUTF16toUTF8(&source, sourceBuffer + d->data.size(),
-                                &target, targetBuffer + outputBufferSize,
-                                Unicode::lenientConversion);
+    const Unicode::UTF16 *source = &d->data[0];
+    Unicode::UTF8 *target = reinterpret_cast<Unicode::UTF8*>(&s[0]);
  
-  if(result != Unicode::conversionOK) {
-    debug("String::to8Bit() - Unicode conversion error.");
-  }
+    Unicode::ConversionResult result = Unicode::ConvertUTF16toUTF8(
+      &source, source + d->data.size(),
+      &target, target + s.size(),
+      Unicode::lenientConversion);
  
-  int newSize = target - targetBuffer;
-  s.resize(newSize);
-  targetBuffer[newSize] = 0;
+    if(result != Unicode::conversionOK) {
+      debug("String::to8Bit() - Unicode conversion error.");
+    }
  
-  s = (char *) targetBuffer;
+#endif
  
-  delete [] sourceBuffer;
-  delete [] targetBuffer;
+    s.resize(::strlen(s.c_str()));
+  }
  
    return s;
  }
@@ -256,22 +265,8 @@ TagLib::wstring String::toWString() const
  
  const char *String::toCString(bool unicode) const
  {
-  delete [] d->CString;
-
-  std::string buffer = to8Bit(unicode);
-  d->CString = new char[buffer.size() + 1];
-
-#if defined(_MSC_VER) && (_MSC_VER >= 1400)  // VC++2005 or later
-
-  strcpy_s(d->CString, buffer.size() + 1, buffer.c_str());
-
-#else
-
-  strcpy(d->CString, buffer.c_str());
-
-#endif                                          
-
-  return d->CString;
+  d->cstring = to8Bit(unicode);
+  return d->cstring.c_str();
  }
  
  String::Iterator String::begin()
@@ -296,23 +291,12 @@ String::ConstIterator String::end() const
  
  int String::find(const String &s, int offset) const
  {
-  wstring::size_type position = d->data.find(s.d->data, offset);
-
-  if(position != wstring::npos)
-    return position;
-  else
-    return -1;
+  return d->data.find(s.d->data, offset);
  }
  
  int String::rfind(const String &s, int offset) const
  {
-  wstring::size_type position =
-    d->data.rfind(s.d->data, offset == -1 ? wstring::npos : offset);
-
-  if(position != wstring::npos)
-    return position;
-  else
-    return -1;
+  return d->data.rfind(s.d->data, offset);
  }
  
  StringList String::split(const String &separator) const
@@ -345,9 +329,7 @@ bool String::startsWith(const String &s) const
  
  String String::substr(uint position, uint n) const
  {
-  String s;
-  s.d->data = d->data.substr(position, n);
-  return s;
+  return String(d->data.substr(position, n));
  }
  
  String &String::append(const String &s)
@@ -395,67 +377,102 @@ bool String::isNull() const
  
  ByteVector String::data(Type t) const
  {
-  ByteVector v;
+  switch(t) 
+  {
+  case Latin1:
+    {
+      ByteVector v(size(), 0);
+      char *p = v.data();
  
-  switch(t) {
+      for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++)
+        *p++ = static_cast<char>(*it);
  
-  case Latin1:
-  {
-    for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++)
-      v.append(char(*it));
-    break;
-  }
+      return v;
+    }
    case UTF8:
-  {
-    std::string s = to8Bit(true);
-    v.setData(s.c_str(), s.length());
-    break;
-  }
+    {
+      ByteVector v(size() * 4 + 1, 0);
+
+#ifdef TAGLIB_USE_CODECVT
+
+      std::mbstate_t st = 0;
+      const wchar_t *source;
+      char *target;
+      std::codecvt_base::result result = utf8_utf16_t().out(
+        st, &d->data[0], &d->data[d->data.size()], source, v.data(), v.data() + v.size(), target);
+
+      if(result != utf8_utf16_t::ok) {
+        debug("String::data() - Unicode conversion error.");
+      }
+
+#else
+
+      const Unicode::UTF16 *source = &d->data[0];
+      Unicode::UTF8 *target = reinterpret_cast<Unicode::UTF8*>(v.data());
+
+      Unicode::ConversionResult result = Unicode::ConvertUTF16toUTF8(
+        &source, source + d->data.size(),
+        &target, target + v.size(),
+        Unicode::lenientConversion);
+
+      if(result != Unicode::conversionOK) {
+        debug("String::data() - Unicode conversion error.");
+      }
+
+#endif
+
+      v.resize(::strlen(v.data()));
+
+      return v;
+    }
    case UTF16:
-  {
-    // Assume that if we're doing UTF16 and not UTF16BE that we want little
-    // endian encoding.  (Byte Order Mark)
+    {
+      ByteVector v(2 + size() * 2, 0);
+      char *p = v.data();
  
-    v.append(char(0xff));
-    v.append(char(0xfe));
+      // Assume that if we're doing UTF16 and not UTF16BE that we want little
+      // endian encoding.  (Byte Order Mark)
  
-    for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
+      *p++ = '\xff';
+      *p++ = '\xfe';
  
-      char c1 = *it & 0xff;
-      char c2 = *it >> 8;
+      for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
+        *p++ = static_cast<char>(*it & 0xff);
+        *p++ = static_cast<char>(*it >> 8);
+      }
  
-      v.append(c1);
-      v.append(c2);
+      return v;
      }
-    break;
-  }
    case UTF16BE:
-  {
-    for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
+    {
+      ByteVector v(size() * 2, 0);
+      char *p = v.data();
  
-      char c1 = *it >> 8;
-      char c2 = *it & 0xff;
+      for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
+        *p++ = static_cast<char>(*it >> 8);
+        *p++ = static_cast<char>(*it & 0xff);
+      }
  
-      v.append(c1);
-      v.append(c2);
+      return v;
      }
-    break;
-  }
    case UTF16LE:
-  {
-    for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
+    {
+      ByteVector v(size() * 2, 0);
+      char *p = v.data();
  
-      char c1 = *it & 0xff;
-      char c2 = *it >> 8;
+      for(wstring::const_iterator it = d->data.begin(); it != d->data.end(); it++) {
+        *p++ = static_cast<char>(*it & 0xff);
+        *p++ = static_cast<char>(*it >> 8);
+      }
  
-      v.append(c1);
-      v.append(c2);
+      return v;
+    }
+  default:
+    {
+      debug("String::data() - Invalid Type value.");
+      return ByteVector();
      }
-    break;
-  }
    }
-
-  return v;
  }
  
  int String::toInt() const
@@ -560,7 +577,6 @@ String String::number(int n) // static
  TagLib::wchar &String::operator[](int i)
  {
    detach();
-
    return d->data[i];
  }
  
@@ -638,14 +654,7 @@ String &String::operator=(const std::string &s)
      delete d;
  
    d = new StringPrivate;
-
-  d->data.resize(s.size());
-
-  wstring::iterator targetIt = d->data.begin();
-  for(std::string::const_iterator it = s.begin(); it != s.end(); it++) {
-    *targetIt = uchar(*it);
-    ++targetIt;
-  }
+  copyFromLatin1(s.c_str(), s.length());
  
    return *this;
  }
@@ -690,15 +699,7 @@ String &String::operator=(const char *s)
      delete d;
  
    d = new StringPrivate;
-
-  int length = ::strlen(s);
-  d->data.resize(length);
-
-  wstring::iterator targetIt = d->data.begin();
-  for(int i = 0; i < length; i++) {
-    *targetIt = uchar(s[i]);
-    ++targetIt;
-  }
+  copyFromLatin1(s, ::strlen(s));
  
    return *this;
  }
@@ -709,20 +710,10 @@ String &String::operator=(const ByteVector &v)
      delete d;
  
    d = new StringPrivate;
-  d->data.resize(v.size());
-  wstring::iterator targetIt = d->data.begin();
-
-  uint i = 0;
-
-  for(ByteVector::ConstIterator it = v.begin(); it != v.end() && (*it); ++it) {
-    *targetIt = uchar(*it);
-    ++targetIt;
-    ++i;
-  }
+  copyFromLatin1(v.data(), v.size());
  
    // If we hit a null in the ByteVector, shrink the string again.
-
-  d->data.resize(i);
+  d->data.resize(::wcslen(d->data.c_str()));
  
    return *this;
  }
@@ -748,68 +739,132 @@ void String::detach()
  // private members
  ////////////////////////////////////////////////////////////////////////////////
  
-void String::prepare(Type t)
+void String::copyFromLatin1(const char *s, size_t length)
  {
-  switch(t) {
-  case UTF16:
-  {
-    if(d->data.size() >= 1 && (d->data[0] == 0xfeff || d->data[0] == 0xfffe)) {
-      bool swap = d->data[0] != 0xfeff;
-      d->data.erase(d->data.begin(), d->data.begin() + 1);
-      if(swap) {
-        for(uint i = 0; i < d->data.size(); i++)
-          d->data[i] = byteSwap((unsigned short)d->data[i]);
-      }
-    }
-    else {
-      debug("String::prepare() - Invalid UTF16 string.");
-      d->data.erase(d->data.begin(), d->data.end());
-    }
-    break;
+  d->data.resize(length);
+
+  for(size_t i = 0; i < length; ++i)
+    d->data[i] = static_cast<uchar>(s[i]);
+}
+
+void String::copyFromUTF8(const char *s, size_t length)
+{
+  d->data.resize(length);
+
+#ifdef TAGLIB_USE_CODECVT
+
+  std::mbstate_t st = 0;
+  const char *source;
+  wchar_t *target;
+  std::codecvt_base::result result = utf8_utf16_t().in(
+    st, s, s + length, source, &d->data[0], &d->data[d->data.size()], target);
+
+  if(result != utf8_utf16_t::ok) {
+    debug("String::copyFromUTF8() - Unicode conversion error.");
    }
-  case UTF8:
-  {
-    int bufferSize = d->data.size() + 1;
-    Unicode::UTF8  *sourceBuffer = new Unicode::UTF8[bufferSize];
-    Unicode::UTF16 *targetBuffer = new Unicode::UTF16[bufferSize];
  
-    unsigned int i = 0;
-    for(; i < d->data.size(); i++)
-      sourceBuffer[i] = Unicode::UTF8(d->data[i]);
-    sourceBuffer[i] = 0;
+#else
  
-    const Unicode::UTF8 *source = sourceBuffer;
-    Unicode::UTF16 *target = targetBuffer;
+  const Unicode::UTF8 *source = reinterpret_cast<const Unicode::UTF8 *>(s);
+  Unicode::UTF16 *target = &d->data[0];
  
-    Unicode::ConversionResult result =
-      Unicode::ConvertUTF8toUTF16(&source, sourceBuffer + bufferSize,
-                                  &target, targetBuffer + bufferSize,
-                                  Unicode::lenientConversion);
+  Unicode::ConversionResult result = Unicode::ConvertUTF8toUTF16(
+    &source, source + length,
+    &target, target + length,
+    Unicode::lenientConversion);
  
-    if(result != Unicode::conversionOK) {
-      debug("String::prepare() - Unicode conversion error.");
-    }
+  if(result != Unicode::conversionOK) {
+    debug("String::copyFromUTF8() - Unicode conversion error.");
+  }
  
-    int newSize = target != targetBuffer ? target - targetBuffer - 1 : 0;
-    d->data.resize(newSize);
+#endif
  
-    for(int i = 0; i < newSize; i++)
-      d->data[i] = targetBuffer[i];
+  d->data.resize(::wcslen(d->data.c_str()));
+}
  
-    delete [] sourceBuffer;
-    delete [] targetBuffer;
+void String::copyFromUTF16(const wchar_t *s, size_t length, Type t)
+{
+  bool swap;
+  if(t == UTF16) {
+    if(length >= 1 && s[0] == 0xfeff) 
+      swap = false; // Same as CPU endian. No need to swap bytes.
+    else if(length >= 1 && s[0] == 0xfffe) 
+      swap = true;  // Not same as CPU endian. Need to swap bytes.
+    else {
+      debug("String::copyFromUTF16() - Invalid UTF16 string.");
+      return;
+    }
  
-    break;
+    s++;
+    length--;
    }
-  case UTF16LE:
-  {
-    for(uint i = 0; i < d->data.size(); i++)
-      d->data[i] = byteSwap((unsigned short)d->data[i]);
-    break;
+  else 
+    swap = (t != WCharByteOrder);
+
+  d->data.resize(length);
+  memcpy(&d->data[0], s, length * sizeof(wchar_t));
+
+  if(swap) {
+    for(size_t i = 0; i < length; ++i)
+      d->data[i] = byteSwap(static_cast<unsigned short>(s[i]));
    }
-  default:
-    break;
+}
+
+template <size_t sizeOfWcharT>
+void String::internalCopyFromUTF16(const char *s, size_t length, Type t)
+{
+  // Non specialized version. Used where sizeof(wchar_t) != 2.
+
+  bool swap;
+  if(t == UTF16) {
+    if(length >= 2 && *reinterpret_cast<const TagLib::ushort*>(s) == 0xfeff) 
+      swap = false; // Same as CPU endian. No need to swap bytes.
+    else if(length >= 2 && *reinterpret_cast<const TagLib::ushort*>(s) == 0xfffe) 
+      swap = true;  // Not same as CPU endian. Need to swap bytes.
+    else {
+      debug("String::copyFromUTF16() - Invalid UTF16 string.");
+      return;
+    }
+
+    s += 2;
+    length -= 2;
    }
+  else 
+    swap = (t != WCharByteOrder);
+
+  d->data.resize(length / 2);
+  for(size_t i = 0; i < length / 2; ++i) {
+    d->data[i] = swap ? combine(*s, *(s + 1)) : combine(*(s + 1), *s);
+    s += 2;
+  }
+}
+
+template <>
+void String::internalCopyFromUTF16<2>(const char *s, size_t length, Type t)
+{
+  // Specialized version for where sizeof(wchar_t) == 2.
+
+  copyFromUTF16(reinterpret_cast<const wchar_t*>(s), length / 2, t);
+}
+
+void String::copyFromUTF16(const char *s, size_t length, Type t)
+{
+  internalCopyFromUTF16<sizeof(wchar_t)>(s, length, t);
+}
+
+#if defined(TAGLIB_LITTLE_ENDIAN)
+
+const String::Type String::WCharByteOrder = String::UTF16LE;
+
+#elif defined(TAGLIB_BIG_ENDIAN)
+
+const String::Type String::WCharByteOrder = String::UTF16BE;
+
+#else
+
+const String::Type String::WCharByteOrder = wcharByteOrder();
+
+#endif
  }
  
  ////////////////////////////////////////////////////////////////////////////////
@@ -818,27 +873,28 @@ void String::prepare(Type t)
  
  const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2)
  {
-  String s(s1);
+  TagLib::String s(s1);
    s.append(s2);
    return s;
  }
  
  const TagLib::String operator+(const char *s1, const TagLib::String &s2)
  {
-  String s(s1);
+  TagLib::String s(s1);
    s.append(s2);
    return s;
  }
  
  const TagLib::String operator+(const TagLib::String &s1, const char *s2)
  {
-  String s(s1);
+  TagLib::String s(s1);
    s.append(s2);
    return s;
  }
  
-std::ostream &operator<<(std::ostream &s, const String &str)
+std::ostream &operator<<(std::ostream &s, const TagLib::String &str)
  {
    s << str.to8Bit();
    return s;
  }
+
diff --git a/taglib/toolkit/tstring.h b/taglib/toolkit/tstring.h

index 759a175ae64fd3c331bac57ed71b7b7aded3675d..150d7c37a61765003f38f470aea6e562cb921cbe 100644 (file)
--- a/taglib/toolkit/tstring.h
+++ b/taglib/toolkit/tstring.h
@@ -135,12 +135,12 @@ namespace TagLib {
      /*!
       * Makes a deep copy of the data in \a s.
       */
-    String(const wstring &s, Type t = UTF16BE);
+    String(const wstring &s, Type t = WCharByteOrder);
  
      /*!
       * Makes a deep copy of the data in \a s.
       */
-    String(const wchar_t *s, Type t = UTF16BE);
+    String(const wchar_t *s, Type t = WCharByteOrder);
  
      /*!
       * Makes a deep copy of the data in \a c.
@@ -451,17 +451,42 @@ namespace TagLib {
  
    private:
      /*!
-     * This checks to see if the string is in \e UTF-16 (with BOM) or \e UTF-8
-     * format and if so converts it to \e UTF-16BE for internal use.  \e Latin1
-     * does not require conversion since it is a subset of \e UTF-16BE and
-     * \e UTF16-BE requires no conversion since it is used internally.
+     * Converts a \e Latin-1 string into \e UTF-16(without BOM/CPU byte order) 
+     * and copies it to the internal buffer.
       */
-    void prepare(Type t);
+    void copyFromLatin1(const char *s, size_t length);
+
+    /*!
+     * Converts a \e UTF-8 string into \e UTF-16(without BOM/CPU byte order) 
+     * and copies it to the internal buffer.
+     */
+    void copyFromUTF8(const char *s, size_t length);
+
+    /*!
+     * Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into 
+     * \e UTF-16(without BOM/CPU byte order) and copies it to the internal buffer.
+     */
+    void copyFromUTF16(const wchar_t *s, size_t length, Type t);
+
+    /*!
+     * Converts a \e UTF-16 (with BOM), UTF-16LE or UTF16-BE string into 
+     * \e UTF-16(without BOM/CPU byte order) and copies it to the internal buffer.
+     */
+    void copyFromUTF16(const char *s, size_t length, Type t);
+    
+    template <size_t sizeOfWcharT>
+    void internalCopyFromUTF16(const char *s, size_t length, Type t);
+
+    /*!
+     * Indicates which byte order of UTF-16 is used to store strings internally. 
+     *
+     * \note \e String::UTF16BE or \e String::UTF16LE
+     */
+    static const Type WCharByteOrder;
  
      class StringPrivate;
      StringPrivate *d;
    };
-
  }
  
  /*!
diff --git a/taglib/toolkit/unicode.h b/taglib/toolkit/unicode.h

index cf7eb3c56944fc3cfc2e6acaa570c1cf6d65f904..b9de0ea21707823b1f1de34574005133e8e87948 100644 (file)
--- a/taglib/toolkit/unicode.h
+++ b/taglib/toolkit/unicode.h
@@ -115,8 +115,8 @@
  namespace Unicode {
  
  typedef unsigned long  UTF32;  /* at least 32 bits */
-typedef unsigned short UTF16;  /* at least 16 bits */
-typedef unsigned char  UTF8;   /* typically 8 bits */
+typedef wchar_t              UTF16;    /* TagLib assumes that wchar_t is sufficient for UTF-16. */
+typedef unsigned char  UTF8;     /* typically 8 bits */
  typedef unsigned char  Boolean; /* 0 or 1 */
  
  typedef enum {
author	Tsuda Kageyu <tsuda.kageyu@gmail.com>
	Sun, 14 Apr 2013 20:03:54 +0000 (05:03 +0900)
committer	Tsuda Kageyu <tsuda.kageyu@gmail.com>
	Sun, 14 Apr 2013 20:03:54 +0000 (05:03 +0900)
taglib/toolkit/taglib.h		patch \| blob \| history
taglib/toolkit/tstring.cpp		patch \| blob \| history
taglib/toolkit/tstring.h		patch \| blob \| history
taglib/toolkit/unicode.h		patch \| blob \| history