]> granicus.if.org Git - file/commitdiff
Prefix regex magic with search for keywords first for efficiency
authorChristos Zoulas <christos@zoulas.com>
Wed, 27 Feb 2019 16:46:23 +0000 (16:46 +0000)
committerChristos Zoulas <christos@zoulas.com>
Wed, 27 Feb 2019 16:46:23 +0000 (16:46 +0000)
(Michael Schroeder)

magic/Magdir/c-lang
magic/Magdir/m4
magic/Magdir/python
magic/Magdir/ruby

index 7b3f703ef3ec6425c1d67529628fb422911e0f6d..becf6b02eccac1c6c582deae0d27cb711023db34 100644 (file)
@@ -1,5 +1,5 @@
 #------------------------------------------------------------------------------
-# $File: c-lang,v 1.26 2017/08/14 07:40:38 christos Exp $
+# $File: c-lang,v 1.27 2019/02/27 16:46:23 christos Exp $
 # c-lang:  file(1) magic for C and related languages programs
 #
 # The strength is to beat standard HTML
 
 # C
 # Check for class if include is found, otherwise class is beaten by include becouse of lowered strength
-0      regex   \^#include                      C
->0     regex   \^class[[:space:]]+
->>&0   regex   \\{[\.\*]\\}(;)?$                       \b++
->&0    clear   x                               source text
+0      search/8192     #include
+>0     regex   \^#include                      C
+>>0    regex   \^class[[:space:]]+
+>>>&0  regex   \\{[\.\*]\\}(;)?$                       \b++
+>>&0   clear   x                               source text
 !:strength + 13
 !:mime text/x-c
-0      regex   \^#[[:space:]]*pragma   C source text
+0      search/8192     pragma
+>0     regex   \^#[[:space:]]*pragma   C source text
 !:mime text/x-c
-0      regex   \^#[[:space:]]*(if\|ifn)def
->&0    regex   \^#[[:space:]]*endif$   C source text
+0      search/8192     endif
+>0     regex   \^#[[:space:]]*(if\|ifn)def
+>>&0   regex   \^#[[:space:]]*endif$   C source text
 !:mime text/x-c
-0      regex   \^#[[:space:]]*(if\|ifn)def
->&0    regex   \^#[[:space:]]*define   C source text
+0      search/8192     define
+>0     regex   \^#[[:space:]]*(if\|ifn)def
+>>&0   regex   \^#[[:space:]]*define   C source text
 !:mime text/x-c
-0      regex   \^[[:space:]]*char(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$                   C source text
+0      search/8192     char
+>0     regex   \^[[:space:]]*char(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$                   C source text
 !:mime text/x-c
-0      regex   \^[[:space:]]*double(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$                 C source text
+0      search/8192     double
+>0     regex   \^[[:space:]]*double(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$                 C source text
 !:mime text/x-c
-0      regex   \^[[:space:]]*extern[[:space:]]+                C source text
+0      search/8192     extern
+>0     regex   \^[[:space:]]*extern[[:space:]]+                C source text
 !:mime text/x-c
-0      regex   \^[[:space:]]*float(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$                  C source text
+0      search/8192     float
+>0     regex   \^[[:space:]]*float(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$                  C source text
 !:mime text/x-c
-0      regex   \^struct[[:space:]]+            C source text
+0      search/8192     struct
+>0     regex   \^struct[[:space:]]+            C source text
 !:mime text/x-c
-0      regex   \^union[[:space:]]+             C source text
+0      search/8192     union
+>0     regex   \^union[[:space:]]+             C source text
 !:mime text/x-c
 0      search/8192     main(
 >&0 regex      \\)[[:space:]]*\\{              C source text
 
 # C++
 # The strength of these rules is increased so they beat the C rules above
-0      regex   \^namespace[[:space:]]+[_[:alpha:]]{1,30}[[:space:]]*\\{        C++ source text
+0      search/8192     namespace
+>0     regex   \^namespace[[:space:]]+[_[:alpha:]]{1,30}[[:space:]]*\\{        C++ source text
 !:strength + 30
 !:mime text/x-c++
 # using namespace [namespace] or using std::[lib]
-0      regex   \^using[[:space:]]+(namespace\ )?std(::)?[[:alpha:]]*[[:space:]]*;              C++ source text
+0      search/8192     using
+>0     regex   \^using[[:space:]]+(namespace\ )?std(::)?[[:alpha:]]*[[:space:]]*;              C++ source text
 !:strength + 30
 !:mime text/x-c++
-0      regex   \^[[:space:]]*template[[:space:]]*<.*>[[:space:]]*$     C++ source text
+0      search/8192     template
+>0     regex   \^[[:space:]]*template[[:space:]]*<.*>[[:space:]]*$     C++ source text
 !:strength + 30
 !:mime text/x-c++
-0      regex   \^[[:space:]]*virtual[[:space:]]+.*[};][[:space:]]*$            C++ source text
+0      search/8192     virtual
+>0     regex   \^[[:space:]]*virtual[[:space:]]+.*[};][[:space:]]*$            C++ source text
 !:strength + 30
 !:mime text/x-c++
 # But class alone is reduced to avoid beating php (Jens Schleusener)
-0      regex   \^[[:space:]]*class[[:space:]]+[[:digit:][:alpha:]:_]+[[:space:]]*\\{(.*[\n]*)*\\}(;)?$         C++ source text
+0      search/8192     class
+>0     regex   \^[[:space:]]*class[[:space:]]+[[:digit:][:alpha:]:_]+[[:space:]]*\\{(.*[\n]*)*\\}(;)?$         C++ source text
 !:strength + 13
 !:mime text/x-c++
-0      regex   \^[[:space:]]*public:           C++ source text
+0      search/8192     public
+>0     regex   \^[[:space:]]*public:           C++ source text
 !:strength + 30
 !:mime text/x-c++
-0      regex   \^[[:space:]]*private:          C++ source text
+0      search/8192     private
+>0     regex   \^[[:space:]]*private:          C++ source text
 !:strength + 30
 !:mime text/x-c++
-0      regex   \^[[:space:]]*protected:                C++ source text
+0      search/8192     protected
+>0     regex   \^[[:space:]]*protected:                C++ source text
 !:strength + 30
 !:mime text/x-c++
 
 # Objective-C
-0      regex   \^#import                       Objective-C source text
+0      search/8192     #import
+>0     regex   \^#import                       Objective-C source text
 !:strength + 25
 !:mime text/x-objective-c
 
index 3a1c6d19f7dbb8a1ad75fc127c12088d5b98fd86..587ebe80c66dc48af4ba5974bd5d62a5100dba23 100644 (file)
@@ -1,9 +1,11 @@
 #------------------------------------------------------------------------------
-# $File: m4,v 1.2 2017/08/14 07:40:38 christos Exp $
+# $File: m4,v 1.3 2019/02/27 16:46:23 christos Exp $
 # make:  file(1) magic for M4 scripts
 #
-0      regex   \^dnl\          M4 macro processor script text
+0      search/8192     dnl
+>0     regex   \^dnl\          M4 macro processor script text
 !:mime text/x-m4
-0      regex   \^AC_DEFUN\\(\\[        M4 macro processor script text
+0      search/8192     AC_DEFUN
+>0     regex   \^AC_DEFUN\\(\\[        M4 macro processor script text
 !:strength + 15
 !:mime text/x-m4
index f21ff659b4533678417b24c171de474cbdf7fa65..d2fbc2d37c6db78642fd779c40c31253405fb4c5 100644 (file)
@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: python,v 1.34 2017/08/14 07:40:38 christos Exp $
+# $File: python,v 1.35 2019/02/27 16:46:23 christos Exp $
 # python:  file(1) magic for python
 #
 # Outlook puts """ too for urgent messages
@@ -45,7 +45,8 @@
 
 
 # from module.submodule import func1, func2
-0      regex           \^from[\040\t\f\r\n]+([A-Za-z0-9_]|\\.)+[\040\t\f\r\n]+import.*$        Python script text executable
+0      search/8192     import
+>0     regex           \^from[\040\t\f\r\n]+([A-Za-z0-9_]|\\.)+[\040\t\f\r\n]+import.*$        Python script text executable
 !:strength + 15
 !:mime text/x-python
 
@@ -63,7 +64,8 @@
 !:mime text/x-python
 
 # import module [as abrev]
-0      regex   \^import\ [_[:alpha:]]+\ as\ [[:alpha:]][[:space:]]*$ Python script text executable
+0      search/8192     import
+>0     regex   \^import\ [_[:alpha:]]+\ as\ [[:alpha:]][[:space:]]*$ Python script text executable
 !:mime text/x-python
 
 # comments
 !:mime text/x-python
 
 # class name[(base classes,)]: [pass]
-0      regex   \^class\ [_[:alpha:]]+(\\(.*\\))?(\ )*:([\ \t]+pass)?$          Python script text executable
+0      search/8192     class
+>0     regex   \^class\ [_[:alpha:]]+(\\(.*\\))?(\ )*:([\ \t]+pass)?$          Python script text executable
 !:strength + 15
 !:mime text/x-python
 
 # def name(*args, **kwargs):
-0      regex    \^[[:space:]]{0,50}def\ {1,50}[_a-zA-Z]{1,100}
->&0    regex    \\(([[:alpha:]*_,\ ]){0,255}\\):$ Python script text executable
+0      search/8192     def\ 
+>0     regex    \^[[:space:]]{0,50}def\ {1,50}[_a-zA-Z]{1,100}
+>>&0   regex    \\(([[:alpha:]*_,\ ]){0,255}\\):$ Python script text executable
 !:strength + 15
 !:mime text/x-python
index 45a253cb5f697bdbff8f5bd8547c8e82a639d2f3..d208ffec3f78c88d7751ade18fd66d3c11fa022a 100644 (file)
@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: ruby,v 1.7 2017/08/14 13:39:18 christos Exp $
+# $File: ruby,v 1.8 2019/02/27 16:46:23 christos Exp $
 # ruby:  file(1) magic for Ruby scripting language
 # URL:  http://www.ruby-lang.org/
 # From: Reuben Thomas <rrt@sc3d.org>
 # What looks like ruby, but does not have a shebang
 # (modules and such)
 # From: Lubomir Rintel <lkundrak@v3.sk>
-0      regex           \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+'
->0     regex           def\ [a-z]|\ do$
->>&0   regex           \^[[:space:]]*end([[:space:]]+[;#].*)?$         Ruby script text
+0      search/8192     require
+>0     regex           \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+'
+>>0    regex           def\ [a-z]|\ do$
+>>>&0  regex           \^[[:space:]]*end([[:space:]]+[;#].*)?$         Ruby script text
 !:strength + 30
 !:mime text/x-ruby
 0      regex           \^[[:space:]]*(class|module)[[:space:]][A-Z]
 # Looks for function definition to balance python magic
 # def name (args)
 # end
-0      regex           \^[[:space:]]*def\ [a-z]|def\ [[:alpha:]]+::[a-z]
->&0    regex           \^[[:space:]]*end([[:space:]]+[;#].*)?$         Ruby script text
+0      search/8192     def\ 
+>0     regex           \^[[:space:]]*def\ [a-z]|def\ [[:alpha:]]+::[a-z]
+>>&0   regex           \^[[:space:]]*end([[:space:]]+[;#].*)?$         Ruby script text
 !:strength + 10
 !:mime text/x-ruby
 
-0      regex           \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+'   Ruby script text
+0      search/8192     require
+>0     regex           \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+'   Ruby script text
 !:mime text/x-ruby
-0 regex        \^[[:space:]]*include\ ([A-Z]+[a-z]*(::))+      Ruby script text
+0      search/8192     include
+>0 regex       \^[[:space:]]*include\ ([A-Z]+[a-z]*(::))+      Ruby script text
 !:mime text/x-ruby