]> granicus.if.org Git - php/commitdiff
added function "int levdist(string str1, string str2)"
authorHartmut Holzgraefe <hholzgra@php.net>
Tue, 23 May 2000 14:10:37 +0000 (14:10 +0000)
committerHartmut Holzgraefe <hholzgra@php.net>
Tue, 23 May 2000 14:10:37 +0000 (14:10 +0000)
that will calculate the Levenshtein distance between two
strings (faster and possibly more accurate than similar_text())

ext/standard/Makefile.in
ext/standard/basic_functions.c
ext/standard/levenshtein.c [new file with mode: 0644]
ext/standard/php_string.h

index 221922f52cbe6c9de7ce09d1c822655006279745..d83ed1a49af7fe524d6e080046d7b873b85d00f6 100644 (file)
@@ -7,7 +7,7 @@ LTLIBRARY_SOURCES=\
        link.c mail.c math.c md5.c metaphone.c microtime.c pack.c pageinfo.c \
        parsedate.c quot_print.c rand.c reg.c soundex.c string.c \
        syslog.c type.c uniqid.c url.c url_scanner.c var.c output.c assert.c \
-       strnatcmp.c
+       strnatcmp.c levenshtein.c
 
 include $(top_srcdir)/build/dynlib.mk
 
index 4a1aa32a0dbed15f35870a5aff3b80193e57f3ca..8a45900d6706b1795899bfb409698dfa05dbc6d2 100644 (file)
@@ -162,6 +162,7 @@ function_entry basic_functions[] = {
        PHP_FE(implode,                                                                 NULL)
        PHP_FE(setlocale,                                                               NULL)
        PHP_FE(soundex,                                                                 NULL)
+       PHP_FE(levdist,                                                                 NULL)
        PHP_FE(chr,                                                                             NULL)
        PHP_FE(ord,                                                                             NULL)
        PHP_FE(parse_str,                                                               NULL)
diff --git a/ext/standard/levenshtein.c b/ext/standard/levenshtein.c
new file mode 100644 (file)
index 0000000..946f364
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+   +----------------------------------------------------------------------+
+   | PHP version 4.0                                                      |
+   +----------------------------------------------------------------------+
+   | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group                   |
+   +----------------------------------------------------------------------+
+   | This source file is subject to version 2.02 of the PHP license,      |
+   | that is bundled with this package in the file LICENSE, and is        |
+   | available at through the world-wide-web at                           |
+   | http://www.php.net/license/2_02.txt.                                 |
+   | If you did not receive a copy of the PHP license and are unable to   |
+   | obtain it through the world-wide-web, please send a note to          |
+   | license@php.net so we can mail you a copy immediately.               |
+   +----------------------------------------------------------------------+
+   | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no>       |
+   +----------------------------------------------------------------------+
+ */
+/* $Id$ */
+
+#include "php.h"
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include "php_string.h"
+
+int calc_levdist(const char *s1, const char *s2) /* faster, but obfuscated */
+{
+       register char *p1,*p2;
+       register int i,j,n;
+       int l1=0,l2=0;
+       char r[512];
+
+       /* skip equal start sequence, if any */
+       while(*s1==*s2) {
+               if(!*s1) break;
+               s1++; s2++;
+       }
+       
+       /* if we already used up one string, then
+      the result is the length of the other */
+       if(*s1=='\0') return strlen(s2);
+       if(*s2=='\0') return strlen(s1);
+
+       /* length count */
+       while(*s1++) l1++;
+       while(*s2++) l2++;
+       
+       /* cut of equal tail sequence, if any */
+       while(*--s1 == *--s2) {
+               l1--; l2--;             
+       }
+       
+
+       /* reset pointers, adjust length */
+       s1-=l1++;
+       s2-=l2++;
+
+       /* possible dist to great? */
+       if(abs(l1-l2)>=255) return -1;
+
+       /* swap if l2 longer than l1 */
+       if(l1<l2) {
+               (long)s1 ^= (long)s2; (long)s2 ^= (long)s1; (long)s1 ^= (long)s2;
+               l1 ^= l2; l2 ^= l1; l1 ^= l2;
+       }
+
+       
+       /* fill initial row */
+       n=(*s1!=*s2);
+       for(i=0,p1=r;i<l1;i++,*p1++=n++,p1++) {/*empty*/}
+       
+       /* calc. rowwise */
+       for(j=1;j<l2;j++) {
+               /* init pointers and col#0 */
+               p1 = r + !(j&1);
+               p2 = r + (j&1);
+               n=*p1+1;
+               *p2++=n;p2++;
+               s2++;
+               
+               /* foreach column */
+               for(i=1;i<l1;i++) {
+                       if(*p1<n) n=*p1+(*(s1+i)!=*(s2)); /* replace cheaper than delete? */
+                       p1++;
+                       if(*++p1<n) n=*p1+1; /* insert cheaper then insert ? */
+                       *p2++=n++; /* update field and cost for next col's delete */
+                       p2++;
+               }       
+       }
+
+       /* return result */
+       return n-1;
+}
+
+
+/* {{{ proto int levdist(string str1, string str2)
+   Calculate Levenshtein distance between two strings */
+PHP_FUNCTION(levdist){
+       zval **str1, **str2;
+       int l;
+
+       if (ARG_COUNT(ht) != 2 || zend_get_parameters_ex(2, &str1, &str2) == FAILURE) {
+               WRONG_PARAM_COUNT;
+       }
+       convert_to_string_ex(str1);
+       convert_to_string_ex(str2);
+       
+       l = calc_levdist((*str1)->value.str.val, (*str2)->value.str.val);
+
+       if(l<0) {
+               php_error(E_WARNING,"levdist(): argument string(s) to long");
+       }
+
+       RETURN_LONG(l);
+}
+/* }}} */
+
index e71cf8f997b3804b855d9d2c1f991059927f6701..537c405fe79a8b3a83e12889fe8513480be8fad3 100644 (file)
@@ -43,6 +43,7 @@ PHP_FUNCTION(chop);
 PHP_FUNCTION(trim);
 PHP_FUNCTION(ltrim);
 PHP_FUNCTION(soundex);
+PHP_FUNCTION(levdist);
 
 PHP_FUNCTION(count_chars);
 PHP_FUNCTION(explode);