From 8ec9f097b67b41429530eda5914f9df2611d3529 Mon Sep 17 00:00:00 2001 From: Rolland Santimano Date: Fri, 30 Sep 2005 06:20:47 +0000 Subject: [PATCH] - Unicode impl of levenshtein() --- ext/standard/levenshtein.c | 132 ++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 59 deletions(-) diff --git a/ext/standard/levenshtein.c b/ext/standard/levenshtein.c index fb1db6a967..08213516c1 100644 --- a/ext/standard/levenshtein.c +++ b/ext/standard/levenshtein.c @@ -27,39 +27,58 @@ /* {{{ reference_levdist * reference implementation, only optimized for memory usage, not speed */ -static int reference_levdist(const char *s1, int l1, - const char *s2, int l2, - int cost_ins, int cost_rep, int cost_del ) +static int reference_levdist(void *s1, int32_t l1, void *s2, int32_t l2, zend_uchar str_type, int cost_ins, int cost_rep, int cost_del ) { int *p1, *p2, *tmp; - int i1, i2, c0, c1, c2; - - if(l1==0) return l2*cost_ins; - if(l2==0) return l1*cost_del; + int32_t i1, i2, j1, j2, cp1, cp2; + int32_t c0, c1, c2; + UChar32 ch1, ch2; + + if (str_type == IS_UNICODE) { + cp1 = u_countChar32((UChar *)s1, l1); + cp2 = u_countChar32((UChar *)s2, l2); + + if (cp1 == 0) return cp2*cost_ins; + if (cp2 == 0) return cp1*cost_del; + if ((cp1>LEVENSHTEIN_MAX_LENTH)||(cp2>LEVENSHTEIN_MAX_LENTH)) { + return -1; + } - if((l1>LEVENSHTEIN_MAX_LENTH)||(l2>LEVENSHTEIN_MAX_LENTH)) - return -1; + p1 = safe_emalloc((cp2+1), sizeof(int), 0); + p2 = safe_emalloc((cp2+1), sizeof(int), 0); + } else { + if (l1 == 0) return l2*cost_ins; + if (l2 == 0) return l1*cost_del; + if ((l1>LEVENSHTEIN_MAX_LENTH)||(l2>LEVENSHTEIN_MAX_LENTH)) { + return -1; + } - p1 = safe_emalloc((l2+1), sizeof(int), 0); - p2 = safe_emalloc((l2+1), sizeof(int), 0); + p1 = safe_emalloc((l2+1), sizeof(int), 0); + p2 = safe_emalloc((l2+1), sizeof(int), 0); + } - for(i2=0;i2<=l2;i2++) + for (i2 = 0 ; i2 <= l2 ; i2++) p1[i2] = i2*cost_ins; - for(i1=0;i1