From 3bce77a3591f012d924d001a9fe7d263e797f1a6 Mon Sep 17 00:00:00 2001 From: Tsuda Kageyu Date: Mon, 22 Jun 2015 10:37:20 +0900 Subject: [PATCH] Use linear search instead of the Knuth-Morris-Pratt algorithm in ByteVector::find(). In almost all cases, it handles too small data for KMP to work effectively. --- taglib/toolkit/tbytevector.cpp | 51 ++++++++++------------------------ 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/taglib/toolkit/tbytevector.cpp b/taglib/toolkit/tbytevector.cpp index 70939dda..636a1a34 100644 --- a/taglib/toolkit/tbytevector.cpp +++ b/taglib/toolkit/tbytevector.cpp @@ -99,10 +99,6 @@ static const uint crcTable[256] = { 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 }; -/*! - * A templatized straightforward find that works with the types - * std::vector::iterator and std::vector::reverse_iterator. - */ template int findChar( const TIterator dataBegin, const TIterator dataEnd, @@ -125,10 +121,6 @@ int findChar( return -1; } -/*! - * A templatized KMP find that works with the types - * std::vector::iterator and std::vector::reverse_iterator. - */ template int findVector( const TIterator dataBegin, const TIterator dataEnd, @@ -140,46 +132,33 @@ int findVector( if(patternSize == 0 || offset + patternSize > dataSize) return -1; - // n % 0 is invalid - - if(byteAlign == 0) - return -1; - // Special case that pattern contains just single char. if(patternSize == 1) return findChar(dataBegin, dataEnd, *patternBegin, offset, byteAlign); - size_t lastOccurrence[256]; + // n % 0 is invalid - for(size_t i = 0; i < 256; ++i) - lastOccurrence[i] = patternSize; + if(byteAlign == 0) + return -1; - for(size_t i = 0; i < patternSize - 1; ++i) - lastOccurrence[static_cast(*(patternBegin + i))] = patternSize - i - 1; + // We don't use sophisticated algorithms like Knuth-Morris-Pratt here. - TIterator it = dataBegin + patternSize - 1 + offset; - while(true) { - TIterator itBuffer = it; - TIterator itPattern = patternBegin + patternSize - 1; + // In the current implementation of TagLib, data and patterns are too small + // for such algorithms to work effectively. - while(*itBuffer == *itPattern) { - if(itPattern == patternBegin) { - if((itBuffer - dataBegin - offset) % byteAlign == 0) - return (itBuffer - dataBegin); - else - break; - } + for(TIterator it = dataBegin + offset; it < dataEnd - patternSize + 1; it += byteAlign) { - --itBuffer; - --itPattern; - } + TIterator itData = it; + TIterator itPattern = patternBegin; - const size_t step = lastOccurrence[static_cast(*it)]; - if(dataEnd - step <= it) - break; + while(*itData == *itPattern) { + ++itData; + ++itPattern; - it += step; + if(itPattern == patternEnd) + return (it - dataBegin); + } } return -1; -- 2.40.0