From 0eb51100b77996efbf0a728efe55ded996814025 Mon Sep 17 00:00:00 2001 From: Hartmut Holzgraefe Date: Tue, 23 May 2000 14:10:37 +0000 Subject: [PATCH] added function "int levdist(string str1, string str2)" that will calculate the Levenshtein distance between two strings (faster and possibly more accurate than similar_text()) --- ext/standard/Makefile.in | 2 +- ext/standard/basic_functions.c | 1 + ext/standard/levenshtein.c | 117 +++++++++++++++++++++++++++++++++ ext/standard/php_string.h | 1 + 4 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 ext/standard/levenshtein.c diff --git a/ext/standard/Makefile.in b/ext/standard/Makefile.in index 221922f52c..d83ed1a49a 100644 --- a/ext/standard/Makefile.in +++ b/ext/standard/Makefile.in @@ -7,7 +7,7 @@ LTLIBRARY_SOURCES=\ link.c mail.c math.c md5.c metaphone.c microtime.c pack.c pageinfo.c \ parsedate.c quot_print.c rand.c reg.c soundex.c string.c \ syslog.c type.c uniqid.c url.c url_scanner.c var.c output.c assert.c \ - strnatcmp.c + strnatcmp.c levenshtein.c include $(top_srcdir)/build/dynlib.mk diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 4a1aa32a0d..8a45900d67 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -162,6 +162,7 @@ function_entry basic_functions[] = { PHP_FE(implode, NULL) PHP_FE(setlocale, NULL) PHP_FE(soundex, NULL) + PHP_FE(levdist, NULL) PHP_FE(chr, NULL) PHP_FE(ord, NULL) PHP_FE(parse_str, NULL) diff --git a/ext/standard/levenshtein.c b/ext/standard/levenshtein.c new file mode 100644 index 0000000000..946f36466c --- /dev/null +++ b/ext/standard/levenshtein.c @@ -0,0 +1,117 @@ +/* + +----------------------------------------------------------------------+ + | PHP version 4.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Bjørn Borud - Guardian Networks AS | + +----------------------------------------------------------------------+ + */ +/* $Id$ */ + +#include "php.h" +#include +#include +#include +#include "php_string.h" + +int calc_levdist(const char *s1, const char *s2) /* faster, but obfuscated */ +{ + register char *p1,*p2; + register int i,j,n; + int l1=0,l2=0; + char r[512]; + + /* skip equal start sequence, if any */ + while(*s1==*s2) { + if(!*s1) break; + s1++; s2++; + } + + /* if we already used up one string, then + the result is the length of the other */ + if(*s1=='\0') return strlen(s2); + if(*s2=='\0') return strlen(s1); + + /* length count */ + while(*s1++) l1++; + while(*s2++) l2++; + + /* cut of equal tail sequence, if any */ + while(*--s1 == *--s2) { + l1--; l2--; + } + + + /* reset pointers, adjust length */ + s1-=l1++; + s2-=l2++; + + /* possible dist to great? */ + if(abs(l1-l2)>=255) return -1; + + /* swap if l2 longer than l1 */ + if(l1value.str.val, (*str2)->value.str.val); + + if(l<0) { + php_error(E_WARNING,"levdist(): argument string(s) to long"); + } + + RETURN_LONG(l); +} +/* }}} */ + diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h index e71cf8f997..537c405fe7 100644 --- a/ext/standard/php_string.h +++ b/ext/standard/php_string.h @@ -43,6 +43,7 @@ PHP_FUNCTION(chop); PHP_FUNCTION(trim); PHP_FUNCTION(ltrim); PHP_FUNCTION(soundex); +PHP_FUNCTION(levdist); PHP_FUNCTION(count_chars); PHP_FUNCTION(explode); -- 2.50.1