From 63e6b0b5bf8049ab2d80863cc132ea667f7bbf76 Mon Sep 17 00:00:00 2001 From: Hartmut Holzgraefe Date: Wed, 13 Dec 2000 23:26:19 +0000 Subject: [PATCH] levenshtein() fixed, regression tests added (bug id #6562 and #7368) # fallback to unoptimized version for 4.0.4 release --- ext/standard/levenshtein.c | 198 ++++-------------- ext/standard/tests/general_functions/003.phpt | 54 +++++ 2 files changed, 97 insertions(+), 155 deletions(-) create mode 100644 ext/standard/tests/general_functions/003.phpt diff --git a/ext/standard/levenshtein.c b/ext/standard/levenshtein.c index d157cf7fab..080c86a327 100644 --- a/ext/standard/levenshtein.c +++ b/ext/standard/levenshtein.c @@ -23,162 +23,47 @@ #include #include "php_string.h" -/* faster, but obfuscated, all operations have a cost of 1 */ -static int fastest_levdist(const char *s1, const char *s2) +/* reference implementation, only optimized for memory usage, not speed */ +static int reference_levdist(const char *s1, int l1, + const char *s2, int l2, + int cost_ins, int cost_rep, int cost_del ) { - register char *p1,*p2; - register int i,j,n; - int l1=0,l2=0; - char r[512]; - const char *tmp; - - /* skip equal start sequence, if any */ - while(*s1==*s2) { - if(!*s1) break; - s1++; s2++; - } - - /* if we already used up one string, then - the result is the length of the other */ - if(*s1=='\0') return strlen(s2); - if(*s2=='\0') return strlen(s1); - - /* length count */ - while(*s1++) l1++; - while(*s2++) l2++; - - /* cut of equal tail sequence, if any */ - while(*--s1 == *--s2) { - l1--; l2--; - } - - - /* reset pointers, adjust length */ - s1-=l1++; - s2-=l2++; + int *p1,*p2,*tmp; + int i1,i2,c0,c1,c2; + if(l1==0) return l2*cost_ins; + if(l2==0) return l1*cost_del; + + p1=malloc(l2*sizeof(int)); + p2=malloc(l2*sizeof(int)); + + p1[0]=(s1[0]==s2[0])?0:cost_rep; + + for(i2=1;i2=255) return -1; + c0=p1[l2-1]; - /* swap if l2 longer than l1 */ - if(l1=255) return -1; - - /* swap if l2 longer than l1 */ - if(l1value.str.val, (*str2)->value.str.val); + + distance = reference_levdist((*str1)->value.str.val,(*str1)->value.str.len, + (*str2)->value.str.val,(*str2)->value.str.len, + 1,1,1); + break; case 5: /* more general version: calc cost by ins/rep/del weights */ @@ -216,11 +104,11 @@ PHP_FUNCTION(levenshtein) convert_to_long_ex(cost_rep); convert_to_long_ex(cost_del); - distance = weighted_levdist((*str1)->value.str.val - , (*str2)->value.str.val - , (*cost_ins)->value.lval - , (*cost_rep)->value.lval - , (*cost_del)->value.lval + distance = reference_levdist((*str1)->value.str.val,(*str1)->value.str.len, + (*str2)->value.str.val,(*str2)->value.str.len, + (*cost_ins)->value.lval, + (*cost_rep)->value.lval, + (*cost_del)->value.lval ); break; diff --git a/ext/standard/tests/general_functions/003.phpt b/ext/standard/tests/general_functions/003.phpt new file mode 100644 index 0000000000..22cbd4eb4e --- /dev/null +++ b/ext/standard/tests/general_functions/003.phpt @@ -0,0 +1,54 @@ +--TEST-- +levenshtein() function test +--POST-- +--GET-- +--FILE-- + +--EXPECT-- +all passed -- 2.40.0