From: krakjoe Date: Sat, 15 Feb 2014 04:52:04 +0000 (+0000) Subject: have a go at writing lexer for input X-Git-Tag: php-5.6.0beta2~1^2~37^2~20^2~51 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5778e010b7a5db3229d925c6a93c88f6dc78bc9e;p=php have a go at writing lexer for input --- diff --git a/phpdbg_lexer.l b/phpdbg_lexer.l new file mode 100644 index 0000000000..eb93d130f0 --- /dev/null +++ b/phpdbg_lexer.l @@ -0,0 +1,89 @@ +%{ + +/* + * phpdbg_lexer.l + */ + +#include "phpdbg.h" +#include "phpdbg_cmd.h" +#define YYSTYPE phpdbg_param_t + +#include "phpdbg_parser.h" +#include +#include +%} + +%option outfile="phpdbg_lexer.c" header-file="phpdbg_lexer.h" +%option warn nodefault + +%option reentrant noyywrap never-interactive nounistd +%option bison-bridge + +C_TRUE "true" +C_YES "yes" +C_ON "on" +C_ENABLED "enabled" +C_FALSE "false" +C_NO "no" +C_OFF "off" +C_DISABLED "disabled" + +DIGITS [0-9]+ +ID [a-zA-Z][a-zA-Z0-9_]+ +METHOD {ID}::{ID} +FILE [^ :0-9]{1,}:[0-9]+ +OPLINE 0x[a-fA-F0-9]+ +LITERAL \"(\\.|[^\\"])*\" +WS [ \r\n\t]+ +%% + +{WS} { } +{C_YES}|{C_ON}|{C_ENABLED}|{C_TRUE} { + yylval->type = NUMERIC_PARAM; + yylval->num = 1; + return C_TRUTHY; +} +{C_NO}|{C_OFF}|{C_DISABLED}|{C_FALSE} { + yylval->type = NUMERIC_PARAM; + yylval->num = 0; + return C_FALSY; +} +{DIGITS} { + yylval->type = NUMERIC_PARAM; + yylval->num = atoi(yytext); + return T_DIGITS; +} +{METHOD} { + yylval->type = METHOD_PARAM; + yylval->method.class = "class"; + yylval->method.name = "func"; + return T_METHOD; +} +{FILE} { + yylval->type = FILE_PARAM; + yylval->file.name = strndup(yytext, yyleng); + yylval->file.line = 0; + return T_FILE; +} +{OPLINE} { + yylval->type = ADDR_PARAM; + yylval->addr = strtoul(yytext, NULL, 10); + return T_OPLINE; +} +{LITERAL} { + yylval->type = STR_PARAM; + yylval->str = strndup(yytext, yyleng); + yylval->len = yyleng; + return T_LITERAL; +} +[a-zA-Z]+ { + yylval->type = STR_PARAM; + yylval->str = strndup(yytext, yyleng); + yylval->len = yyleng; + + return C_CMD; +} +. { + /** command detection here **/ +} +%% diff --git a/phpdbg_parser.y b/phpdbg_parser.y new file mode 100644 index 0000000000..ef3dca58bb --- /dev/null +++ b/phpdbg_parser.y @@ -0,0 +1,140 @@ +%error-verbose +%{ + +/* + * phpdbg_parser.y + * + * flex phpdb_lexer.l + * bison phpdbg_parser.y + * gcc -g -o parser phpdbg_lexer.c phpdbg_parser.c -I/usr/src/php-src/main -I/usr/src/php-src/Zend -I/usr/src/php-src/TSRM -I/usr/src/php-src + */ + +#include "phpdbg.h" +#include "phpdbg_cmd.h" + +#define YYSTYPE phpdbg_param_t + +void phpdbg_debug_param(const phpdbg_param_t *param, const char *msg) { + if (param && param->type) { + switch (param->type) { + case STR_PARAM: + fprintf(stderr, "%s STR_PARAM(%s=%d)\n", msg, param->str, param->len); + break; + + case ADDR_PARAM: + fprintf(stderr, "%s ADDR_PARAM(%lu)\n", msg, param->addr); + break; + + case FILE_PARAM: + fprintf(stderr, "%s FILE_PARAM(%s:%d)\n", msg, param->file.name, param->file.line); + break; + + case METHOD_PARAM: + fprintf(stderr, "%s METHOD_PARAM(%s::%s)\n", msg, param->method.class, param->method.name); + break; + + case NUMERIC_PARAM: + fprintf(stderr, "%s NUMERIC_PARAM(%ld)\n", msg, param->num); + break; + } + } +} + +#include "phpdbg_parser.h" +#include "phpdbg_lexer.h" + +int yyerror(phpdbg_param_t **param, yyscan_t scanner, const char *msg) { + fprintf(stderr, "Parse Error: %s\n", msg); +} + +int main(int argc, char **argv) { + do { + phpdbg_param_t *expression; + yyscan_t scanner; + YY_BUFFER_STATE state; + char buffer[8096]; + size_t buflen = 0L; + + if (fgets(&buffer[0], 8096, stdin) != NULL) { + if (yylex_init(&scanner)) { + // couldn't initialize + fprintf(stderr, "could not initialize scanner\n"); + return 1; + } + + state = yy_scan_string(buffer, scanner); + + if (yyparse(&expression, scanner) <= 0) { + // error parsing + yy_delete_buffer(state, scanner); + yylex_destroy(scanner); + } else fprintf(stderr, "could not parse input (%s) !!\n", buffer); + } else fprintf(stderr, "could not get input !!\n"); + } while (1); + + return 0; +} +%} + +%code requires { +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif +} + +%output "phpdbg_parser.c" +%defines "phpdbg_parser.h" + +%define api.pure +%lex-param { yyscan_t scanner } +%parse-param { phpdbg_param_t **expression } +%parse-param { yyscan_t scanner } + +%token C_CMD "command (possibly automatically detected)" + +%token C_TRUTHY "truthy (true, on, yes or enabled)" +%token C_FALSY "falsy (false, off, no or disabled)" +%token C_STRING "string (some input, perhaps)" + +%token T_DIGITS "digits (numbers)" +%token T_LITERAL "literal (T_LITERAL)" +%token T_METHOD "method (T_METHOD)" +%token T_OPLINE "opline (T_OPLINE)" +%token T_FILE "file (T_FILE)" +%token T_ID "identifier (T_ID)" + +%% + +input + : handler {} + ; + +parameters + : parameters parameter { phpdbg_debug_param(&$2, "got another parameter"); } + | parameter { phpdbg_debug_param(&$1, "got first parameter"); } + ; + +params + : /* empty */ { /* do nothing */ } + | parameters { $$ = $1; } + ; + +command + : C_CMD { fprintf(stderr, "got cmd: %s\n", $1.str); } + | C_CMD C_CMD { fprintf(stderr, "got sub: %s -> %s\n", $1.str, $2.str); } + ; + +parameter + : T_DIGITS { $$ = $1; } + | T_FILE { $$ = $1; } + | T_METHOD { $$ = $1; } + | T_OPLINE { $$ = $1; } + | T_ID { $$ = $1; } + | T_LITERAL { $$ = $1; } + ; + +handler + : command params {} + ; +%%