]> granicus.if.org Git - php/commitdiff
have a go at writing lexer for input
authorkrakjoe <joe.watkins@live.co.uk>
Sat, 15 Feb 2014 04:52:04 +0000 (04:52 +0000)
committerkrakjoe <joe.watkins@live.co.uk>
Sat, 15 Feb 2014 04:52:04 +0000 (04:52 +0000)
phpdbg_lexer.l [new file with mode: 0644]
phpdbg_parser.y [new file with mode: 0644]

diff --git a/phpdbg_lexer.l b/phpdbg_lexer.l
new file mode 100644 (file)
index 0000000..eb93d13
--- /dev/null
@@ -0,0 +1,89 @@
+%{
+/*
+ * phpdbg_lexer.l
+ */
+
+#include "phpdbg.h"
+#include "phpdbg_cmd.h"
+#define YYSTYPE phpdbg_param_t
+
+#include "phpdbg_parser.h"
+#include <stdio.h>
+#include <string.h>
+%}
+
+%option outfile="phpdbg_lexer.c" header-file="phpdbg_lexer.h"
+%option warn nodefault
+%option reentrant noyywrap never-interactive nounistd
+%option bison-bridge
+
+C_TRUE         "true"
+C_YES          "yes"
+C_ON           "on"
+C_ENABLED      "enabled"
+C_FALSE                "false"
+C_NO           "no"
+C_OFF          "off"
+C_DISABLED     "disabled"
+
+DIGITS         [0-9]+
+ID                     [a-zA-Z][a-zA-Z0-9_]+
+METHOD         {ID}::{ID}
+FILE           [^ :0-9]{1,}:[0-9]+
+OPLINE         0x[a-fA-F0-9]+
+LITERAL                \"(\\.|[^\\"])*\"
+WS          [ \r\n\t]+
+%%
+
+{WS}                                                           { }
+{C_YES}|{C_ON}|{C_ENABLED}|{C_TRUE}    {
+       yylval->type = NUMERIC_PARAM;
+       yylval->num = 1;                
+       return C_TRUTHY;
+}
+{C_NO}|{C_OFF}|{C_DISABLED}|{C_FALSE} {
+       yylval->type = NUMERIC_PARAM;
+       yylval->num = 0;
+       return C_FALSY;
+}
+{DIGITS}         { 
+       yylval->type = NUMERIC_PARAM;
+       yylval->num = atoi(yytext);
+       return T_DIGITS;
+}
+{METHOD}               {
+       yylval->type = METHOD_PARAM;
+       yylval->method.class = "class";
+       yylval->method.name = "func";
+       return T_METHOD; 
+}
+{FILE}                 {
+       yylval->type = FILE_PARAM;
+       yylval->file.name = strndup(yytext, yyleng);
+       yylval->file.line = 0;
+       return T_FILE;
+}
+{OPLINE}               { 
+       yylval->type = ADDR_PARAM;
+       yylval->addr = strtoul(yytext, NULL, 10);
+       return T_OPLINE; 
+}
+{LITERAL}              { 
+       yylval->type = STR_PARAM;
+       yylval->str = strndup(yytext, yyleng);
+       yylval->len = yyleng;
+       return T_LITERAL; 
+}
+[a-zA-Z]+       {
+       yylval->type = STR_PARAM;
+       yylval->str = strndup(yytext, yyleng);
+       yylval->len = yyleng;
+       
+       return C_CMD;
+}
+.                      {
+       /** command detection here **/
+}
+%%
diff --git a/phpdbg_parser.y b/phpdbg_parser.y
new file mode 100644 (file)
index 0000000..ef3dca5
--- /dev/null
@@ -0,0 +1,140 @@
+%error-verbose
+%{
+/*
+ * phpdbg_parser.y
+ *
+ * flex phpdb_lexer.l
+ * bison phpdbg_parser.y
+ * gcc -g -o parser phpdbg_lexer.c phpdbg_parser.c -I/usr/src/php-src/main -I/usr/src/php-src/Zend -I/usr/src/php-src/TSRM -I/usr/src/php-src
+ */
+#include "phpdbg.h"
+#include "phpdbg_cmd.h"
+
+#define YYSTYPE phpdbg_param_t
+
+void phpdbg_debug_param(const phpdbg_param_t *param, const char *msg) {
+       if (param && param->type) {
+               switch (param->type) {
+                       case STR_PARAM:
+                               fprintf(stderr, "%s STR_PARAM(%s=%d)\n", msg, param->str, param->len);
+                       break;
+                       
+                       case ADDR_PARAM:
+                               fprintf(stderr, "%s ADDR_PARAM(%lu)\n", msg, param->addr);
+                       break;
+                       
+                       case FILE_PARAM:
+                               fprintf(stderr, "%s FILE_PARAM(%s:%d)\n", msg, param->file.name, param->file.line);
+                       break;
+                       
+                       case METHOD_PARAM:
+                               fprintf(stderr, "%s METHOD_PARAM(%s::%s)\n", msg, param->method.class, param->method.name);
+                       break;
+                       
+                       case NUMERIC_PARAM:
+                               fprintf(stderr, "%s NUMERIC_PARAM(%ld)\n", msg, param->num);
+                       break;
+               }
+       }
+}
+
+#include "phpdbg_parser.h"
+#include "phpdbg_lexer.h"
+
+int yyerror(phpdbg_param_t **param, yyscan_t scanner, const char *msg) {
+    fprintf(stderr, "Parse Error: %s\n", msg);
+}
+
+int main(int argc, char **argv) {
+       do {
+               phpdbg_param_t *expression;
+               yyscan_t scanner;
+               YY_BUFFER_STATE state;
+               char buffer[8096];
+               size_t buflen = 0L;
+               
+               if (fgets(&buffer[0], 8096, stdin) != NULL) {
+                       if (yylex_init(&scanner)) {
+                               // couldn't initialize
+                               fprintf(stderr, "could not initialize scanner\n");
+                               return 1;
+                       }
+
+                       state = yy_scan_string(buffer, scanner);
+                       
+                       if (yyparse(&expression, scanner) <= 0) {
+                               // error parsing
+                               yy_delete_buffer(state, scanner);
+                               yylex_destroy(scanner);
+                       } else fprintf(stderr, "could not parse input (%s) !!\n", buffer);
+               } else fprintf(stderr, "could not get input !!\n");
+       } while (1);
+       
+       return 0;
+}
+%}
+%code requires {
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+}
+%output  "phpdbg_parser.c"
+%defines "phpdbg_parser.h"
+%define api.pure
+%lex-param   { yyscan_t scanner }
+%parse-param { phpdbg_param_t **expression }
+%parse-param { yyscan_t scanner }
+
+%token C_CMD           "command (possibly automatically detected)"
+
+%token C_TRUTHY                "truthy (true, on, yes or enabled)"
+%token C_FALSY         "falsy (false, off, no or disabled)"
+%token C_STRING                "string (some input, perhaps)"
+
+%token T_DIGITS         "digits (numbers)"
+%token T_LITERAL "literal (T_LITERAL)"
+%token T_METHOD         "method (T_METHOD)"
+%token T_OPLINE         "opline (T_OPLINE)"
+%token T_FILE   "file (T_FILE)"
+%token T_ID             "identifier (T_ID)"
+
+%%
+
+input
+    : handler                                                          {} 
+    ;
+
+parameters
+       : parameters parameter                                  { phpdbg_debug_param(&$2, "got another parameter"); }
+       | parameter                                                             { phpdbg_debug_param(&$1, "got first parameter");   }
+       ;
+
+params
+       : /* empty */                                                   { /* do nothing */ }
+       | parameters                                                    { $$ = $1; }
+       ;
+
+command
+       : C_CMD                                                                 { fprintf(stderr, "got cmd: %s\n", $1.str);    }
+       | C_CMD C_CMD                                                   { fprintf(stderr, "got sub: %s -> %s\n", $1.str, $2.str); }
+       ;
+       
+parameter
+       : T_DIGITS                                                              { $$ = $1; }
+       | T_FILE                                                                { $$ = $1; }
+       | T_METHOD                                                              { $$ = $1; }
+       | T_OPLINE                                                              { $$ = $1; }
+       | T_ID                                                                  { $$ = $1; } 
+       | T_LITERAL                                                             { $$ = $1; }
+       ;
+
+handler
+       : command params                                                {}
+       ;
+%%