From 0eb357710f7d679e9df1e97ad02e2b4f30a94eb0 Mon Sep 17 00:00:00 2001 From: Matthew Fernandez Date: Sun, 24 Jul 2022 10:44:09 -0700 Subject: [PATCH] ast onematch: rephrase hash-based switch into string comparisons MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This code was using a hand rolled hash function to implement a series of string comparisons as a jump table. I guess at some point this must have been a necessary optimization due to limitations of the day’s compilers/machines. In a modern environment, this is a deoptimization, impeding the compiler’s ability to understand your intent. Modern compilers know the string comparison functions as built-ins and can use SIMD¹/SWAR² tricks to emit a short string comparison as a single instruction. They are also capable of transforming an if-then-else ladder into a switch if their heuristics predict it will be worthwhile. ¹ https://en.wikipedia.org/wiki/Single_instruction,_multiple_data ² https://en.wikipedia.org/wiki/SWAR --- ci/clang_format.py | 1 - lib/ast/CMakeLists.txt | 1 - lib/ast/Makefile.am | 2 +- lib/ast/ast.vcxproj | 1 - lib/ast/ast.vcxproj.filters | 3 --- lib/ast/hashkey.h | 41 ------------------------------------- lib/ast/strmatch.c | 40 +++++++++++------------------------- 7 files changed, 13 insertions(+), 76 deletions(-) delete mode 100644 lib/ast/hashkey.h diff --git a/ci/clang_format.py b/ci/clang_format.py index 133ea99c7..d03597659 100644 --- a/ci/clang_format.py +++ b/ci/clang_format.py @@ -236,7 +236,6 @@ EXCLUDE = ( "lib/ast/error.h", "lib/ast/fmtbuf.c", "lib/ast/fmtesc.c", - "lib/ast/hashkey.h", "lib/ast/pathaccess.c", "lib/ast/pathcanon.c", "lib/ast/pathcat.c", diff --git a/lib/ast/CMakeLists.txt b/lib/ast/CMakeLists.txt index 29b759119..32920f30e 100644 --- a/lib/ast/CMakeLists.txt +++ b/lib/ast/CMakeLists.txt @@ -2,7 +2,6 @@ add_library(ast STATIC # Header files ast.h error.h - hashkey.h # Source files pathpath.c diff --git a/lib/ast/Makefile.am b/lib/ast/Makefile.am index 2502eadb5..e5154ef45 100644 --- a/lib/ast/Makefile.am +++ b/lib/ast/Makefile.am @@ -2,7 +2,7 @@ AM_CPPFLAGS = -I$(top_srcdir)/lib -noinst_HEADERS = ast.h error.h hashkey.h +noinst_HEADERS = ast.h error.h noinst_LTLIBRARIES = libast_C.la libast_C_la_SOURCES = pathpath.c sfstr.h chresc.c chrtoi.c error.c \ diff --git a/lib/ast/ast.vcxproj b/lib/ast/ast.vcxproj index 717262509..3447ed83d 100644 --- a/lib/ast/ast.vcxproj +++ b/lib/ast/ast.vcxproj @@ -77,7 +77,6 @@ - diff --git a/lib/ast/ast.vcxproj.filters b/lib/ast/ast.vcxproj.filters index 34e45e636..4f0cdb89c 100644 --- a/lib/ast/ast.vcxproj.filters +++ b/lib/ast/ast.vcxproj.filters @@ -21,9 +21,6 @@ Header Files - - Header Files - Header Files diff --git a/lib/ast/hashkey.h b/lib/ast/hashkey.h deleted file mode 100644 index 089353266..000000000 --- a/lib/ast/hashkey.h +++ /dev/null @@ -1,41 +0,0 @@ -/************************************************************************* - * Copyright (c) 2011 AT&T Intellectual Property - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: Details at https://graphviz.org - *************************************************************************/ - -#pragma once - -/* - * Glenn Fowler - * AT&T Research - * - * 1-6 char lower-case keyword -> long hash - * digit args passed as HASHKEYN('2') - */ - -#define HASHKEYMAX 6 -#define HASHKEYBIT 5 -#define HASHKEYOFF ('a'-1) -#define HASHKEYPART(h,c) (((h)< #include -#include #include #include @@ -392,57 +391,42 @@ onematch(Match_t * mp, int g, char *s, char *p, char *e, char *r, if (ok) /*NOP*/; else if (n == ':') { - switch (HASHNKEY5 - (x, oldp[0], oldp[1], oldp[2], oldp[3], - oldp[4])) { - case HASHNKEY5(5, 'a', 'l', 'n', 'u', 'm'): + if (x == 5 && strncmp(oldp, "alnum", 5) == 0) { if (isalnum(sc)) ok = 1; - break; - case HASHNKEY5(5, 'a', 'l', 'p', 'h', 'a'): + } else if (x == 5 && strncmp(oldp, "alpha", 5) == 0) { if (isalpha(sc)) ok = 1; - break; - case HASHNKEY5(5, 'b', 'l', 'a', 'n', 'k'): + } else if (x == 5 && strncmp(oldp, "blank", 5) == 0) { if (isblank(sc)) ok = 1; - break; - case HASHNKEY5(5, 'c', 'n', 't', 'r', 'l'): + } else if (x == 5 && strncmp(oldp, "cntrl", 5) == 0) { if (iscntrl(sc)) ok = 1; - break; - case HASHNKEY5(5, 'd', 'i', 'g', 'i', 't'): + } else if (x == 5 && strncmp(oldp, "digit", 5) == 0) { if (isdigit(sc)) ok = 1; - break; - case HASHNKEY5(5, 'g', 'r', 'a', 'p', 'h'): + } else if (x == 5 && strncmp(oldp, "graph", 5) == 0) { if (isgraph(sc)) ok = 1; - break; - case HASHNKEY5(5, 'l', 'o', 'w', 'e', 'r'): + } else if (x == 5 && strncmp(oldp, "lower", 5) == 0) { if (islower(sc)) ok = 1; - break; - case HASHNKEY5(5, 'p', 'r', 'i', 'n', 't'): + } else if (x == 5 && strncmp(oldp, "print", 5) == 0) { if (isprint(sc)) ok = 1; - break; - case HASHNKEY5(5, 'p', 'u', 'n', 'c', 't'): + } else if (x == 5 && strncmp(oldp, "punct", 5) == 0) { if (ispunct(sc)) ok = 1; - break; - case HASHNKEY5(5, 's', 'p', 'a', 'c', 'e'): + } else if (x == 5 && strncmp(oldp, "space", 5) == 0) { if (isspace(sc)) ok = 1; - break; - case HASHNKEY5(5, 'u', 'p', 'p', 'e', 'r'): + } else if (x == 5 && strncmp(oldp, "upper", 5) == 0) { if (icase ? islower(sc) : isupper(sc)) ok = 1; - break; - case HASHNKEY5(6, 'x', 'd', 'i', 'g', 'i'): + } else if (x == 6 && strncmp(oldp, "xdigi", 5) == 0) { if (oldp[5] == 't' && isxdigit(sc)) ok = 1; - break; } } else if (range) -- 2.40.0