From: tsuda.kageyu@gmail.com Date: Thu, 23 Aug 2012 02:40:32 +0000 (+0900) Subject: Support broken Latin-1 encodings in ID3V2 X-Git-Tag: v1.8~12^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fe8053c7d57c217fbce584fafada7a9b734fb171;p=taglib Support broken Latin-1 encodings in ID3V2 --- diff --git a/taglib/mpeg/id3v2/frames/commentsframe.cpp b/taglib/mpeg/id3v2/frames/commentsframe.cpp index 2c6c49f9..ec70e891 100644 --- a/taglib/mpeg/id3v2/frames/commentsframe.cpp +++ b/taglib/mpeg/id3v2/frames/commentsframe.cpp @@ -158,8 +158,13 @@ void CommentsFrame::parseFields(const ByteVector &data) ByteVectorList l = ByteVectorList::split(data.mid(4), textDelimiter(d->textEncoding), byteAlign, 2); if(l.size() == 2) { - d->description = String(l.front(), d->textEncoding); - d->text = String(l.back(), d->textEncoding); + if(d->textEncoding == String::Latin1) { + d->description = Tag::latin1StringHandler()->parse(l.front()); + d->text = Tag::latin1StringHandler()->parse(l.back()); + } else { + d->description = String(l.front(), d->textEncoding); + d->text = String(l.back(), d->textEncoding); + } } } diff --git a/taglib/mpeg/id3v2/frames/textidentificationframe.cpp b/taglib/mpeg/id3v2/frames/textidentificationframe.cpp index b6a02b06..d9f696b7 100644 --- a/taglib/mpeg/id3v2/frames/textidentificationframe.cpp +++ b/taglib/mpeg/id3v2/frames/textidentificationframe.cpp @@ -213,8 +213,10 @@ void TextIdentificationFrame::parseFields(const ByteVector &data) for(ByteVectorList::Iterator it = l.begin(); it != l.end(); it++) { if(!(*it).isEmpty()) { - String s(*it, d->textEncoding); - d->fieldList.append(s); + if(d->textEncoding == String::Latin1) + d->fieldList.append(Tag::latin1StringHandler()->parse(*it)); + else + d->fieldList.append(String(*it, d->textEncoding)); } } } diff --git a/taglib/mpeg/id3v2/frames/unsynchronizedlyricsframe.cpp b/taglib/mpeg/id3v2/frames/unsynchronizedlyricsframe.cpp index 9d76164d..0b140912 100644 --- a/taglib/mpeg/id3v2/frames/unsynchronizedlyricsframe.cpp +++ b/taglib/mpeg/id3v2/frames/unsynchronizedlyricsframe.cpp @@ -158,8 +158,13 @@ void UnsynchronizedLyricsFrame::parseFields(const ByteVector &data) ByteVectorList::split(data.mid(4), textDelimiter(d->textEncoding), byteAlign, 2); if(l.size() == 2) { - d->description = String(l.front(), d->textEncoding); - d->text = String(l.back(), d->textEncoding); + if(d->textEncoding == String::Latin1) { + d->description = Tag::latin1StringHandler()->parse(l.front()); + d->text = Tag::latin1StringHandler()->parse(l.back()); + } else { + d->description = String(l.front(), d->textEncoding); + d->text = String(l.back(), d->textEncoding); + } } } diff --git a/taglib/mpeg/id3v2/id3v2frame.cpp b/taglib/mpeg/id3v2/id3v2frame.cpp index 9c9c640c..e15adc8c 100644 --- a/taglib/mpeg/id3v2/id3v2frame.cpp +++ b/taglib/mpeg/id3v2/id3v2frame.cpp @@ -36,6 +36,7 @@ #include #include +#include "id3v2tag.h" #include "id3v2frame.h" #include "id3v2synchdata.h" #include "tpropertymap.h" @@ -273,7 +274,11 @@ String Frame::readStringField(const ByteVector &data, String::Type encoding, int if(end < *position) return String::null; - String str = String(data.mid(*position, end - *position), encoding); + String str; + if(encoding == String::Latin1) + str = Tag::latin1StringHandler()->parse(data.mid(*position, end - *position)); + else + str = String(data.mid(*position, end - *position), encoding); *position = end + delimiter.size(); diff --git a/taglib/mpeg/id3v2/id3v2tag.cpp b/taglib/mpeg/id3v2/id3v2tag.cpp index 662fd159..8b623f82 100644 --- a/taglib/mpeg/id3v2/id3v2tag.cpp +++ b/taglib/mpeg/id3v2/id3v2tag.cpp @@ -70,8 +70,26 @@ public: FrameListMap frameListMap; FrameList frameList; + + static const Latin1StringHandler *stringHandler; }; +static const Latin1StringHandler defaultStringHandler; +const ID3v2::Latin1StringHandler *ID3v2::Tag::TagPrivate::stringHandler = &defaultStringHandler; + +//////////////////////////////////////////////////////////////////////////////// +// StringHandler implementation +//////////////////////////////////////////////////////////////////////////////// + +Latin1StringHandler::~Latin1StringHandler() +{ +} + +String Latin1StringHandler::parse(const ByteVector &data) const +{ + return String(data, String::Latin1); +} + //////////////////////////////////////////////////////////////////////////////// // public members //////////////////////////////////////////////////////////////////////////////// @@ -584,6 +602,19 @@ ByteVector ID3v2::Tag::render(int version) const return d->header.render() + tagData; } +Latin1StringHandler const *ID3v2::Tag::latin1StringHandler() +{ + return TagPrivate::stringHandler; +} + +void ID3v2::Tag::setLatin1StringHandler(const Latin1StringHandler *handler) +{ + if(handler) + TagPrivate::stringHandler = handler; + else + TagPrivate::stringHandler = &defaultStringHandler; +} + //////////////////////////////////////////////////////////////////////////////// // protected members //////////////////////////////////////////////////////////////////////////////// diff --git a/taglib/mpeg/id3v2/id3v2tag.h b/taglib/mpeg/id3v2/id3v2tag.h index 94784e76..58bc5b5a 100644 --- a/taglib/mpeg/id3v2/id3v2tag.h +++ b/taglib/mpeg/id3v2/id3v2tag.h @@ -57,6 +57,35 @@ namespace TagLib { typedef List FrameList; typedef Map FrameListMap; + //! An abstraction for the ISO-8859-1 string to data encoding in ID3v2 tags. + + /*! + * ID3v2 tag can store strings in ISO-8859-1 (Latin1), and TagLib only + * supports genuine ISO-8859-1 by default. However, in practice, non + * ISO-8859-1 encodings are often used instead of ISO-8859-1, such as + * Windows-1252 for western languages, Shift_JIS for Japanese and so on. + * + * Here is an option to read such tags by subclassing this class, + * reimplementing parse() and setting your reimplementation as the default + * with ID3v2::Tag::setStringHandler(). + * + * \note Writing non-ISO-8859-1 tags is not implemented intentionally. + * Use UTF-16 or UTF-8 instead. + * + * \see ID3v2::Tag::setStringHandler() + */ + class TAGLIB_EXPORT Latin1StringHandler + { + public: + virtual ~Latin1StringHandler(); + + /*! + * Decode a string from \a data. The default implementation assumes that + * \a data is an ISO-8859-1 (Latin1) character array. + */ + virtual String parse(const ByteVector &data) const; + }; + //! The main class in the ID3v2 implementation /*! @@ -323,6 +352,25 @@ namespace TagLib { */ // BIC: combine with the above method ByteVector render(int version) const; + + /*! + * Gets the current string handler that decides how the "Latin-1" data + * will be converted to and from binary data. + * + * \see Latin1StringHandler + */ + static Latin1StringHandler const *latin1StringHandler(); + + /*! + * Sets the string handler that decides how the "Latin-1" data will be + * converted to and from binary data. + * + * If the parameter \a handler is null, default ISO-8859-1 handler + * is restored. + * + * \see Latin1StringHandler + */ + static void setLatin1StringHandler(const Latin1StringHandler *handler); protected: /*!