]> granicus.if.org Git - graphviz/commitdiff
ast onematch: rephrase hash-based switch into string comparisons
authorMatthew Fernandez <matthew.fernandez@gmail.com>
Sun, 24 Jul 2022 17:44:09 +0000 (10:44 -0700)
committerMatthew Fernandez <matthew.fernandez@gmail.com>
Sat, 30 Jul 2022 00:02:14 +0000 (17:02 -0700)
This code was using a hand rolled hash function to implement a series of string
comparisons as a jump table. I guess at some point this must have been a
necessary optimization due to limitations of the day’s compilers/machines. In a
modern environment, this is a deoptimization, impeding the compiler’s ability to
understand your intent. Modern compilers know the string comparison functions as
built-ins and can use SIMD¹/SWAR² tricks to emit a short string comparison as a
single instruction. They are also capable of transforming an if-then-else ladder
into a switch if their heuristics predict it will be worthwhile.

¹ https://en.wikipedia.org/wiki/Single_instruction,_multiple_data
² https://en.wikipedia.org/wiki/SWAR

ci/clang_format.py
lib/ast/CMakeLists.txt
lib/ast/Makefile.am
lib/ast/ast.vcxproj
lib/ast/ast.vcxproj.filters
lib/ast/hashkey.h [deleted file]
lib/ast/strmatch.c

index 133ea99c75f752c2c2838fbf527183c8edbd8570..d03597659fb16a3788b50227b22930ab2746db8c 100644 (file)
@@ -236,7 +236,6 @@ EXCLUDE = (
   "lib/ast/error.h",
   "lib/ast/fmtbuf.c",
   "lib/ast/fmtesc.c",
-  "lib/ast/hashkey.h",
   "lib/ast/pathaccess.c",
   "lib/ast/pathcanon.c",
   "lib/ast/pathcat.c",
index 29b759119e04abb80b313b3ffe7fde699c76006e..32920f30e152e55a285b1a9f993ece378705a0d5 100644 (file)
@@ -2,7 +2,6 @@ add_library(ast STATIC
   # Header files
   ast.h
   error.h
-  hashkey.h
 
   # Source files
   pathpath.c
index 2502eadb5fadbc9c2f3651af8ceecd2b2de0d1d1..e5154ef457bff81889e287c61fcda3c361002087 100644 (file)
@@ -2,7 +2,7 @@
 
 AM_CPPFLAGS = -I$(top_srcdir)/lib
 
-noinst_HEADERS = ast.h error.h hashkey.h
+noinst_HEADERS = ast.h error.h
 noinst_LTLIBRARIES = libast_C.la
 
 libast_C_la_SOURCES = pathpath.c sfstr.h chresc.c chrtoi.c error.c \
index 717262509384eea8a4e811c94763ef3fa47ac244..3447ed83da02ade6cee551fdecef2dc04e9b8a8e 100644 (file)
@@ -77,7 +77,6 @@
   <ItemGroup>
     <ClInclude Include="ast.h" />
     <ClInclude Include="error.h" />
-    <ClInclude Include="hashkey.h" />
     <ClInclude Include="sfstr.h" />
   </ItemGroup>
   <ItemGroup>
index 34e45e6368a06167e39bac13143046df5c2930bd..4f0cdb89c34d5b8543949fdb0ae2873a6d3f6786 100644 (file)
@@ -21,9 +21,6 @@
     <ClInclude Include="error.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="hashkey.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
     <ClInclude Include="sfstr.h">
       <Filter>Header Files</Filter>
     </ClInclude>
diff --git a/lib/ast/hashkey.h b/lib/ast/hashkey.h
deleted file mode 100644 (file)
index 0893532..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*************************************************************************
- * Copyright (c) 2011 AT&T Intellectual Property 
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors: Details at https://graphviz.org
- *************************************************************************/
-
-#pragma once
-
-/*
- * Glenn Fowler
- * AT&T Research
- *
- * 1-6 char lower-case keyword -> long hash
- * digit args passed as HASHKEYN('2')
- */
-
-#define HASHKEYMAX                     6
-#define HASHKEYBIT                     5
-#define HASHKEYOFF                     ('a'-1)
-#define HASHKEYPART(h,c)               (((h)<<HASHKEYBIT)+HASHKEY1(c))
-
-#define HASHKEYN(n)                    ((n)-'0'+'z'+1)
-
-#define HASHKEY1(c1)                   ((c1)-HASHKEYOFF)
-#define HASHKEY2(c1,c2)                        HASHKEYPART(HASHKEY1(c1),c2)
-#define HASHKEY3(c1,c2,c3)             HASHKEYPART(HASHKEY2(c1,c2),c3)
-#define HASHKEY4(c1,c2,c3,c4)          HASHKEYPART(HASHKEY3(c1,c2,c3),c4)
-#define HASHKEY5(c1,c2,c3,c4,c5)       HASHKEYPART(HASHKEY4(c1,c2,c3,c4),c5)
-#define HASHKEY6(c1,c2,c3,c4,c5,c6)    HASHKEYPART(HASHKEY5(c1,c2,c3,c4,c5),c6)
-
-#define HASHNKEY1(n,c1)                        HASHKEY2((n)+HASHKEYOFF,c1)
-#define HASHNKEY2(n,c2,c1)             HASHKEY3((n)+HASHKEYOFF,c2,c1)
-#define HASHNKEY3(n,c3,c2,c1)          HASHKEY4((n)+HASHKEYOFF,c3,c2,c1)
-#define HASHNKEY4(n,c4,c3,c2,c1)       HASHKEY5((n)+'a',c4,c3,c2,c1)
-#define HASHNKEY5(n,c5,c4,c3,c2,c1)    HASHKEY6((n)+'a',c5,c4,c3,c2,c1)
-
-#undef extern
index 447b72723cd967ce6f893714badd94cb92215705..c61182c495d44fe03e9befa379b3511faad8a4d8 100644 (file)
@@ -45,7 +45,6 @@
 
 #include <ast/ast.h>
 #include <ctype.h>
-#include <ast/hashkey.h>
 #include <stddef.h>
 #include <string.h>
 
@@ -392,57 +391,42 @@ onematch(Match_t * mp, int g, char *s, char *p, char *e, char *r,
                        if (ok)
                            /*NOP*/;
                        else if (n == ':') {
-                           switch (HASHNKEY5
-                                   (x, oldp[0], oldp[1], oldp[2], oldp[3],
-                                    oldp[4])) {
-                           case HASHNKEY5(5, 'a', 'l', 'n', 'u', 'm'):
+                           if (x == 5 && strncmp(oldp, "alnum", 5) == 0) {
                                if (isalnum(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'a', 'l', 'p', 'h', 'a'):
+                           } else if (x == 5 && strncmp(oldp, "alpha", 5) == 0) {
                                if (isalpha(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'b', 'l', 'a', 'n', 'k'):
+                           } else if (x == 5 && strncmp(oldp, "blank", 5) == 0) {
                                if (isblank(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'c', 'n', 't', 'r', 'l'):
+                           } else if (x == 5 && strncmp(oldp, "cntrl", 5) == 0) {
                                if (iscntrl(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'd', 'i', 'g', 'i', 't'):
+                           } else if (x == 5 && strncmp(oldp, "digit", 5) == 0) {
                                if (isdigit(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'g', 'r', 'a', 'p', 'h'):
+                           } else if (x == 5 && strncmp(oldp, "graph", 5) == 0) {
                                if (isgraph(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'l', 'o', 'w', 'e', 'r'):
+                           } else if (x == 5 && strncmp(oldp, "lower", 5) == 0) {
                                if (islower(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'p', 'r', 'i', 'n', 't'):
+                           } else if (x == 5 && strncmp(oldp, "print", 5) == 0) {
                                if (isprint(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'p', 'u', 'n', 'c', 't'):
+                           } else if (x == 5 && strncmp(oldp, "punct", 5) == 0) {
                                if (ispunct(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 's', 'p', 'a', 'c', 'e'):
+                           } else if (x == 5 && strncmp(oldp, "space", 5) == 0) {
                                if (isspace(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(5, 'u', 'p', 'p', 'e', 'r'):
+                           } else if (x == 5 && strncmp(oldp, "upper", 5) == 0) {
                                if (icase ? islower(sc) : isupper(sc))
                                    ok = 1;
-                               break;
-                           case HASHNKEY5(6, 'x', 'd', 'i', 'g', 'i'):
+                           } else if (x == 6 && strncmp(oldp, "xdigi", 5) == 0) {
                                if (oldp[5] == 't' && isxdigit(sc))
                                    ok = 1;
-                               break;
                            }
                        }
                        else if (range)