From: Tsuda Kageyu Date: Thu, 19 Nov 2015 08:29:25 +0000 (+0900) Subject: Efficient lookup for an ID3v2 tag in a MPEG file. X-Git-Tag: v1.11beta~124^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e10684312e641f0d4da668a2851e97e174f8abee;p=taglib Efficient lookup for an ID3v2 tag in a MPEG file. An ID3v2 tag or MPEG frame is most likely be at the beginning of the file. --- diff --git a/taglib/mpeg/mpegfile.cpp b/taglib/mpeg/mpegfile.cpp index c5f9bbf6..8b7af692 100644 --- a/taglib/mpeg/mpegfile.cpp +++ b/taglib/mpeg/mpegfile.cpp @@ -489,7 +489,7 @@ void MPEG::File::read(bool readProperties) { // Look for an ID3v2 tag - d->ID3v2Location = findID3v2(0); + d->ID3v2Location = findID3v2(); if(d->ID3v2Location >= 0) { @@ -532,115 +532,40 @@ void MPEG::File::read(bool readProperties) ID3v1Tag(true); } -long MPEG::File::findID3v2(long offset) +long MPEG::File::findID3v2() { - // This method is based on the contents of TagLib::File::find(), but because - // of some subtleties -- specifically the need to look for the bit pattern of - // an MPEG sync, it has been modified for use here. + if(!isValid()) + return -1; - if(isValid() && ID3v2::Header::fileIdentifier().size() <= bufferSize()) { + // An ID3v2 tag or MPEG frame is most likely be at the beginning of the file. - // The position in the file that the current buffer starts at. + const ByteVector headerID = ID3v2::Header::fileIdentifier(); - long bufferOffset = 0; - ByteVector buffer; + seek(0); - // These variables are used to keep track of a partial match that happens at - // the end of a buffer. + const ByteVector data = readBlock(headerID.size()); + if(data.size() < headerID.size()) + return -1; - int previousPartialMatch = -1; - bool previousPartialSynchMatch = false; + if(data == headerID) + return 0; - // Save the location of the current read pointer. We will restore the - // position using seek() before all returns. + if(firstSyncByte(data[0]) && secondSynchByte(data[1])) + return -1; - long originalPosition = tell(); + // Look for the entire file, if neither an MEPG frame or ID3v2 tag was found + // at the beginning of the file. + // We don't care about the inefficiency of the code, since this is a seldom case. - // Start the search at the offset. + const long tagOffset = find(headerID); + if(tagOffset < 0) + return -1; - seek(offset); + const long frameOffset = firstFrameOffset(); + if(frameOffset < tagOffset) + return -1; - // This loop is the crux of the find method. There are three cases that we - // want to account for: - // (1) The previously searched buffer contained a partial match of the search - // pattern and we want to see if the next one starts with the remainder of - // that pattern. - // - // (2) The search pattern is wholly contained within the current buffer. - // - // (3) The current buffer ends with a partial match of the pattern. We will - // note this for use in the next iteration, where we will check for the rest - // of the pattern. - - for(buffer = readBlock(bufferSize()); buffer.size() > 0; buffer = readBlock(bufferSize())) { - - // (1) previous partial match - - if(previousPartialSynchMatch && secondSynchByte(buffer[0])) - return -1; - - if(previousPartialMatch >= 0 && int(bufferSize()) > previousPartialMatch) { - const int patternOffset = (bufferSize() - previousPartialMatch); - if(buffer.containsAt(ID3v2::Header::fileIdentifier(), 0, patternOffset)) { - seek(originalPosition); - return offset + bufferOffset - bufferSize() + previousPartialMatch; - } - } - - // (2) pattern contained in current buffer - - long location = buffer.find(ID3v2::Header::fileIdentifier()); - if(location >= 0) { - seek(originalPosition); - return offset + bufferOffset + location; - } - - int firstSynchByte = buffer.find(char(uchar(255))); - - // Here we have to loop because there could be several of the first - // (11111111) byte, and we want to check all such instances until we find - // a full match (11111111 111) or hit the end of the buffer. - - while(firstSynchByte >= 0) { - - // if this *is not* at the end of the buffer - - if(firstSynchByte < int(buffer.size()) - 1) { - if(secondSynchByte(buffer[firstSynchByte + 1])) { - // We've found the frame synch pattern. - seek(originalPosition); - return -1; - } - else { - - // We found 11111111 at the end of the current buffer indicating a - // partial match of the synch pattern. The find() below should - // return -1 and break out of the loop. - - previousPartialSynchMatch = true; - } - } - - // Check in the rest of the buffer. - - firstSynchByte = buffer.find(char(uchar(255)), firstSynchByte + 1); - } - - // (3) partial match - - previousPartialMatch = buffer.endsWithPartialMatch(ID3v2::Header::fileIdentifier()); - - bufferOffset += bufferSize(); - } - - // Since we hit the end of the file, reset the status before continuing. - - clear(); - - seek(originalPosition); - } - - return -1; + return tagOffset; } long MPEG::File::findID3v1() diff --git a/taglib/mpeg/mpegfile.h b/taglib/mpeg/mpegfile.h index 858a6a5c..8478d093 100644 --- a/taglib/mpeg/mpegfile.h +++ b/taglib/mpeg/mpegfile.h @@ -390,7 +390,7 @@ namespace TagLib { File &operator=(const File &); void read(bool readProperties); - long findID3v2(long offset); + long findID3v2(); long findID3v1(); void findAPE();