From 756a4ed5ad3e57c26a247234de371a6ca21806cd Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 27 Oct 2011 13:50:57 -0400 Subject: [PATCH] Add simple script to check for right recursion in Bison grammars. We should generally use left-recursion not right-recursion to parse lists. Bison hasn't got any built-in way to check for this type of inefficiency, and I didn't find anything on the net in a quick search, so I wrote a little Perl script to do it. Add to src/tools/ so we don't have to re-invent this wheel next time we wonder if we're doing anything stupid. Currently, the only place that seems to need fixing is plpgsql's stmt_else production, so the problem doesn't appear to be common enough to warrant trying to include such a test in our standard build process. If we did want to do that, we'd need a way to ignore some false positives, such as a_expr := '-' a_expr --- src/tools/check_bison_recursion.pl | 74 ++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100755 src/tools/check_bison_recursion.pl diff --git a/src/tools/check_bison_recursion.pl b/src/tools/check_bison_recursion.pl new file mode 100755 index 0000000000..34f6ed8466 --- /dev/null +++ b/src/tools/check_bison_recursion.pl @@ -0,0 +1,74 @@ +#! /usr/bin/perl + +################################################################# +# +# check_bison_recursion.pl -- check for right recursion in Bison grammars +# +# The standard way to parse list constructs in Bison grammars is via left +# recursion, wherein a nonterminal symbol has itself as the first symbol +# in one of its expansion rules. It is also possible to parse a list via +# right recursion, wherein a nonterminal symbol has itself as the last +# symbol of an expansion; but that's a bad way to write it because a long +# enough list will result in parser stack overflow. Since Bison doesn't +# have any built-in way to warn about use of right recursion, we use this +# script when we want to check for the problem. +# +# To use: run bison with the -v switch, then feed the produced y.output +# file to this script. +# +# Copyright (c) 2011, PostgreSQL Global Development Group +# +# src/tools/check_bison_recursion.pl +################################################################# + +use strict; +use warnings; + +my $debug = 0; + +# must retain this across input lines +my $cur_nonterminal; + +# We parse the input and emit warnings on the fly. +my $in_grammar = 0; + +while (<>) { + my $rule_number; + my $rhs; + + # We only care about the "Grammar" part of the input. + if (m/^Grammar$/) { + $in_grammar = 1; + } elsif (m/^Terminal/) { + $in_grammar = 0; + } elsif ($in_grammar) { + if (m/^\s*(\d+)\s+(\S+):\s+(.*)$/) { + # first rule for nonterminal + $rule_number = $1; + $cur_nonterminal = $2; + $rhs = $3; + } elsif (m/^\s*(\d+)\s+\|\s+(.*)$/) { + # additional rule for nonterminal + $rule_number = $1; + $rhs = $2; + } + } + + # Process rule if we found one + if (defined $rule_number) { + # deconstruct the RHS + $rhs =~ s|^/\* empty \*/$||; + my @rhs = split '\s', $rhs; + print "Rule $rule_number: $cur_nonterminal := @rhs\n" if $debug; + # We complain if the nonterminal appears as the last RHS element + # but not elsewhere, since "expr := expr + expr" is reasonable + my $lastrhs = pop @rhs; + if (defined $lastrhs && + $cur_nonterminal eq $lastrhs && + !grep { $cur_nonterminal eq $_ } @rhs) { + print "Right recursion in rule $rule_number: $cur_nonterminal := $rhs\n"; + } + } +} + +exit 0; -- 2.40.0