From eb38a6f82924cb5ad5ed3b611fa2364dcc4ba5eb Mon Sep 17 00:00:00 2001 From: Christos Zoulas Date: Thu, 5 Dec 2013 17:02:34 +0000 Subject: [PATCH] locale and strcasestr changes --- ChangeLog | 6 ++++ configure.ac | 2 +- src/file.h | 15 +++++---- src/funcs.c | 16 +++++++--- src/readcdf.c | 14 +++++++-- src/softmagic.c | 13 +++++--- src/strcasestr.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 129 insertions(+), 19 deletions(-) create mode 100644 src/strcasestr.c diff --git a/ChangeLog b/ChangeLog index 2580d22e..8e424d53 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2013-12-05 12:00 Christos Zoulas + + * use strcasestr() to for cdf strings + * reset to the "C" locale while doing regex operations, or case + insensitive comparisons; this is provisional + 2013-11-19 20:10 Christos Zoulas * always leave magic file loaded, don't unload for magic_check, etc. diff --git a/configure.ac b/configure.ac index cff2e002..cfc74b1f 100644 --- a/configure.ac +++ b/configure.ac @@ -139,7 +139,7 @@ dnl Checks for functions AC_CHECK_FUNCS(strerror strndup strtoul mkstemp mkostemp utimes utime wcwidth strtof) dnl Provide implementation of some required functions if necessary -AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat getline ctime_r asctime_r pread) +AC_REPLACE_FUNCS(getopt_long asprintf vasprintf strlcpy strlcat getline ctime_r asctime_r pread strcasestr) dnl Checks for libraries AC_CHECK_LIB(z,gzopen) diff --git a/src/file.h b/src/file.h index b08a0806..5faa9ae5 100644 --- a/src/file.h +++ b/src/file.h @@ -27,7 +27,7 @@ */ /* * file.h - definitions for file(1) program - * @(#)$File: file.h,v 1.144 2013/02/18 15:40:59 christos Exp $ + * @(#)$File: file.h,v 1.145 2013/04/22 15:30:11 christos Exp $ */ #ifndef __file_h__ @@ -491,18 +491,21 @@ ssize_t pread(int, void *, size_t, off_t); int vasprintf(char **, const char *, va_list); #endif #ifndef HAVE_ASPRINTF -int asprintf(char **ptr, const char *format_string, ...); +int asprintf(char **, const char *, ...); #endif #ifndef HAVE_STRLCPY -size_t strlcpy(char *dst, const char *src, size_t siz); +size_t strlcpy(char *, const char *, size_t); #endif #ifndef HAVE_STRLCAT -size_t strlcat(char *dst, const char *src, size_t siz); +size_t strlcat(char *, const char *, size_t); +#endif +#ifndef HAVE_STRCASESTR +char *strcasestr(const char *, const char *); #endif #ifndef HAVE_GETLINE -ssize_t getline(char **dst, size_t *len, FILE *fp); -ssize_t getdelim(char **dst, size_t *len, int delimiter, FILE *fp); +ssize_t getline(char **, size_t *, FILE *); +ssize_t getdelim(char **, size_t *, int, FILE *); #endif #ifndef HAVE_CTIME_R char *ctime_r(const time_t *, char *); diff --git a/src/funcs.c b/src/funcs.c index 1068d04d..90738f05 100644 --- a/src/funcs.c +++ b/src/funcs.c @@ -27,7 +27,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: funcs.c,v 1.63 2013/09/03 08:31:48 christos Exp $") +FILE_RCSID("@(#)$File: funcs.c,v 1.64 2013/11/19 23:49:44 christos Exp $") #endif /* lint */ #include "magic.h" @@ -44,6 +44,9 @@ FILE_RCSID("@(#)$File: funcs.c,v 1.63 2013/09/03 08:31:48 christos Exp $") #if defined(HAVE_LIMITS_H) #include #endif +#if defined(HAVE_LOCALE_H) +#include +#endif #ifndef SIZE_MAX #define SIZE_MAX ((size_t)~0) @@ -437,14 +440,14 @@ protected int file_replace(struct magic_set *ms, const char *pat, const char *rep) { regex_t rx; - int rc; + int rc, rv = -1; + (void)setlocale(LC_CTYPE, "C"); rc = regcomp(&rx, pat, REG_EXTENDED); if (rc) { char errmsg[512]; (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); file_magerror(ms, "regex error %d, (%s)", rc, errmsg); - return -1; } else { regmatch_t rm; int nm = 0; @@ -452,10 +455,13 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep) ms->o.buf[rm.rm_so] = '\0'; if (file_printf(ms, "%s%s", rep, rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) - return -1; + goto out; nm++; } regfree(&rx); - return nm; + rv = nm; } +out: + (void)setlocale(LC_CTYPE, ""); + return rv; } diff --git a/src/readcdf.c b/src/readcdf.c index 2e0250f7..b3ca60ce 100644 --- a/src/readcdf.c +++ b/src/readcdf.c @@ -26,7 +26,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: readcdf.c,v 1.34 2013/10/29 18:22:45 christos Exp $") +FILE_RCSID("@(#)$File: readcdf.c,v 1.35 2013/10/29 18:30:45 christos Exp $") #endif #include @@ -34,6 +34,9 @@ FILE_RCSID("@(#)$File: readcdf.c,v 1.34 2013/10/29 18:22:45 christos Exp $") #include #include #include +#if defined(HAVE_LOCALE_H) +#include +#endif #include "cdf.h" #include "magic.h" @@ -70,10 +73,15 @@ private const char * cdf_app_to_mime(const char *vbuf, const struct nv *nv) { size_t i; + const char *rv = NULL; + (void)setlocale(LC_CTYPE, "C"); for (i = 0; nv[i].pattern != NULL; i++) - if (strstr(vbuf, nv[i].pattern) != NULL) - return nv[i].mime; + if (strcasestr(vbuf, nv[i].pattern) != NULL) { + rv = nv[i].mime; + break; + } + (void)setlocale(LC_CTYPE, ""); return NULL; } diff --git a/src/softmagic.c b/src/softmagic.c index c77dcc0f..edcf0189 100644 --- a/src/softmagic.c +++ b/src/softmagic.c @@ -32,7 +32,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: softmagic.c,v 1.167 2013/04/22 15:30:11 christos Exp $") +FILE_RCSID("@(#)$File: softmagic.c,v 1.168 2013/05/30 15:53:33 christos Exp $") #endif /* lint */ #include "magic.h" @@ -40,6 +40,9 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.167 2013/04/22 15:30:11 christos Exp $") #include #include #include +#if defined(HAVE_LOCALE_H) +#include +#endif private int match(struct magic_set *, struct magic *, uint32_t, @@ -337,22 +340,24 @@ private int check_fmt(struct magic_set *ms, struct magic *m) { regex_t rx; - int rc; + int rc, rv = -1; if (strchr(m->desc, '%') == NULL) return 0; + (void)setlocale(LC_CTYPE, "C"); rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); if (rc) { char errmsg[512]; (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); file_magerror(ms, "regex error %d, (%s)", rc, errmsg); - return -1; } else { rc = regexec(&rx, m->desc, 0, 0, 0); regfree(&rx); - return !rc; + rv = !rc; } + (void)setlocale(LC_CTYPE, ""); + return rv; } #ifndef HAVE_STRNDUP diff --git a/src/strcasestr.c b/src/strcasestr.c new file mode 100644 index 00000000..546ed3f9 --- /dev/null +++ b/src/strcasestr.c @@ -0,0 +1,82 @@ +/* $NetBSD: strcasestr.c,v 1.3 2005/11/29 03:12:00 christos Exp $ */ + +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: strcasestr.c,v 1.3 2005/11/29 03:12:00 christos Exp $"); +__RCSID("$NetBSD: strncasecmp.c,v 1.2 2007/06/04 18:19:27 christos Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include +#include +#include + +static int +_strncasecmp(const char *s1, const char *s2, size_t n) +{ + if (n != 0) { + const unsigned char *us1 = (const unsigned char *)s1, + *us2 = (const unsigned char *)s2; + + do { + if (tolower(*us1) != tolower(*us2++)) + return tolower(*us1) - tolower(*--us2); + if (*us1++ == '\0') + break; + } while (--n != 0); + } + return 0; +} + +/* + * Find the first occurrence of find in s, ignore case. + */ +char * +strcasestr(const char *s, const char *find) +{ + char c, sc; + size_t len; + + if ((c = *find++) != 0) { + c = tolower((unsigned char)c); + len = strlen(find); + do { + do { + if ((sc = *s++) == 0) + return (NULL); + } while ((char)tolower((unsigned char)sc) != c); + } while (_strncasecmp(s, find, len) != 0); + s--; + } + return (char *)(intptr_t)(s); +} -- 2.49.0