granicus.if.org Git - postgresql/blob - contrib/test_parser/test_parser.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * test_parser.c
   4  *        Simple example of a text search parser
   5  *
   6  * Copyright (c) 2007-2014, PostgreSQL Global Development Group
   7  *
   8  * IDENTIFICATION
   9  *        contrib/test_parser/test_parser.c
  10  *
  11  *-------------------------------------------------------------------------
  12  */
  13 #include "postgres.h"
  14
  15 #include "fmgr.h"
  16
  17 PG_MODULE_MAGIC;
  18
  19 /*
  20  * types
  21  */
  22
  23 /* self-defined type */
  24 typedef struct
  25 {
  26         char       *buffer;                     /* text to parse */
  27         int                     len;                    /* length of the text in buffer */
  28         int                     pos;                    /* position of the parser */
  29 } ParserState;
  30
  31 /* copy-paste from wparser.h of tsearch2 */
  32 typedef struct
  33 {
  34         int                     lexid;
  35         char       *alias;
  36         char       *descr;
  37 } LexDescr;
  38
  39 /*
  40  * functions
  41  */
  42 PG_FUNCTION_INFO_V1(testprs_start);
  43 PG_FUNCTION_INFO_V1(testprs_getlexeme);
  44 PG_FUNCTION_INFO_V1(testprs_end);
  45 PG_FUNCTION_INFO_V1(testprs_lextype);
  46
  47 Datum
  48 testprs_start(PG_FUNCTION_ARGS)
  49 {
  50         ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
  51
  52         pst->buffer = (char *) PG_GETARG_POINTER(0);
  53         pst->len = PG_GETARG_INT32(1);
  54         pst->pos = 0;
  55
  56         PG_RETURN_POINTER(pst);
  57 }
  58
  59 Datum
  60 testprs_getlexeme(PG_FUNCTION_ARGS)
  61 {
  62         ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
  63         char      **t = (char **) PG_GETARG_POINTER(1);
  64         int                *tlen = (int *) PG_GETARG_POINTER(2);
  65         int                     startpos = pst->pos;
  66         int                     type;
  67
  68         *t = pst->buffer + pst->pos;
  69
  70         if (pst->pos < pst->len &&
  71                 (pst->buffer)[pst->pos] == ' ')
  72         {
  73                 /* blank type */
  74                 type = 12;
  75                 /* go to the next non-space character */
  76                 while (pst->pos < pst->len &&
  77                            (pst->buffer)[pst->pos] == ' ')
  78                         (pst->pos)++;
  79         }
  80         else
  81         {
  82                 /* word type */
  83                 type = 3;
  84                 /* go to the next space character */
  85                 while (pst->pos < pst->len &&
  86                            (pst->buffer)[pst->pos] != ' ')
  87                         (pst->pos)++;
  88         }
  89
  90         *tlen = pst->pos - startpos;
  91
  92         /* we are finished if (*tlen == 0) */
  93         if (*tlen == 0)
  94                 type = 0;
  95
  96         PG_RETURN_INT32(type);
  97 }
  98
  99 Datum
 100 testprs_end(PG_FUNCTION_ARGS)
 101 {
 102         ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
 103
 104         pfree(pst);
 105         PG_RETURN_VOID();
 106 }
 107
 108 Datum
 109 testprs_lextype(PG_FUNCTION_ARGS)
 110 {
 111         /*
 112          * Remarks: - we have to return the blanks for headline reason - we use
 113          * the same lexids like Teodor in the default word parser; in this way we
 114          * can reuse the headline function of the default word parser.
 115          */
 116         LexDescr   *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2 + 1));
 117
 118         /* there are only two types in this parser */
 119         descr[0].lexid = 3;
 120         descr[0].alias = pstrdup("word");
 121         descr[0].descr = pstrdup("Word");
 122         descr[1].lexid = 12;
 123         descr[1].alias = pstrdup("blank");
 124         descr[1].descr = pstrdup("Space symbols");
 125         descr[2].lexid = 0;
 126
 127         PG_RETURN_POINTER(descr);
 128 }