granicus.if.org Git - postgresql/blob - src/backend/utils/misc/tzparser.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * tzparser.c
   4  *        Functions for parsing timezone offset files
   5  *
   6  * Note: this code is invoked from the check_hook for the GUC variable
   7  * timezone_abbreviations.  Therefore, it should report problems using
   8  * GUC_check_errmsg() and related functions, and try to avoid throwing
   9  * elog(ERROR).  This is not completely bulletproof at present --- in
  10  * particular out-of-memory will throw an error.  Could probably fix with
  11  * PG_TRY if necessary.
  12  *
  13  *
  14  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  15  * Portions Copyright (c) 1994, Regents of the University of California
  16  *
  17  * IDENTIFICATION
  18  *        src/backend/utils/misc/tzparser.c
  19  *
  20  *-------------------------------------------------------------------------
  21  */
  22
  23 #include "postgres.h"
  24
  25 #include <ctype.h>
  26
  27 #include "miscadmin.h"
  28 #include "storage/fd.h"
  29 #include "utils/guc.h"
  30 #include "utils/memutils.h"
  31 #include "utils/tzparser.h"
  32
  33
  34 #define WHITESPACE " \t\n\r"
  35
  36 static bool validateTzEntry(tzEntry *tzentry);
  37 static bool splitTzLine(const char *filename, int lineno,
  38                         char *line, tzEntry *tzentry);
  39 static int addToArray(tzEntry **base, int *arraysize, int n,
  40                    tzEntry *entry, bool override);
  41 static int ParseTzFile(const char *filename, int depth,
  42                         tzEntry **base, int *arraysize, int n);
  43
  44
  45 /*
  46  * Apply additional validation checks to a tzEntry
  47  *
  48  * Returns TRUE if OK, else false
  49  */
  50 static bool
  51 validateTzEntry(tzEntry *tzentry)
  52 {
  53         unsigned char *p;
  54
  55         /*
  56          * Check restrictions imposed by datetkntbl storage format (see
  57          * datetime.c)
  58          */
  59         if (strlen(tzentry->abbrev) > TOKMAXLEN)
  60         {
  61                 GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
  62                                                  tzentry->abbrev, TOKMAXLEN,
  63                                                  tzentry->filename, tzentry->lineno);
  64                 return false;
  65         }
  66
  67         /*
  68          * Sanity-check the offset: shouldn't exceed 14 hours
  69          */
  70         if (tzentry->offset > 14 * 60 * 60 ||
  71                 tzentry->offset < -14 * 60 * 60)
  72         {
  73                 GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
  74                                                  tzentry->offset,
  75                                                  tzentry->filename, tzentry->lineno);
  76                 return false;
  77         }
  78
  79         /*
  80          * Convert abbrev to lowercase (must match datetime.c's conversion)
  81          */
  82         for (p = (unsigned char *) tzentry->abbrev; *p; p++)
  83                 *p = pg_tolower(*p);
  84
  85         return true;
  86 }
  87
  88 /*
  89  * Attempt to parse the line as a timezone abbrev spec
  90  *
  91  * Valid formats are:
  92  *      name  zone
  93  *      name  offset  dst
  94  *
  95  * Returns TRUE if OK, else false; data is stored in *tzentry
  96  */
  97 static bool
  98 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
  99 {
 100         char       *abbrev;
 101         char       *offset;
 102         char       *offset_endptr;
 103         char       *remain;
 104         char       *is_dst;
 105
 106         tzentry->lineno = lineno;
 107         tzentry->filename = filename;
 108
 109         abbrev = strtok(line, WHITESPACE);
 110         if (!abbrev)
 111         {
 112                 GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
 113                                                  filename, lineno);
 114                 return false;
 115         }
 116         tzentry->abbrev = pstrdup(abbrev);
 117
 118         offset = strtok(NULL, WHITESPACE);
 119         if (!offset)
 120         {
 121                 GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
 122                                                  filename, lineno);
 123                 return false;
 124         }
 125
 126         /* We assume zone names don't begin with a digit or sign */
 127         if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
 128         {
 129                 tzentry->zone = NULL;
 130                 tzentry->offset = strtol(offset, &offset_endptr, 10);
 131                 if (offset_endptr == offset || *offset_endptr != '\0')
 132                 {
 133                         GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
 134                                                          filename, lineno);
 135                         return false;
 136                 }
 137
 138                 is_dst = strtok(NULL, WHITESPACE);
 139                 if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
 140                 {
 141                         tzentry->is_dst = true;
 142                         remain = strtok(NULL, WHITESPACE);
 143                 }
 144                 else
 145                 {
 146                         /* there was no 'D' dst specifier */
 147                         tzentry->is_dst = false;
 148                         remain = is_dst;
 149                 }
 150         }
 151         else
 152         {
 153                 /*
 154                  * Assume entry is a zone name.  We do not try to validate it by
 155                  * looking up the zone, because that would force loading of a lot of
 156                  * zones that probably will never be used in the current session.
 157                  */
 158                 tzentry->zone = pstrdup(offset);
 159                 tzentry->offset = 0;
 160                 tzentry->is_dst = false;
 161                 remain = strtok(NULL, WHITESPACE);
 162         }
 163
 164         if (!remain)                            /* no more non-whitespace chars */
 165                 return true;
 166
 167         if (remain[0] != '#')           /* must be a comment */
 168         {
 169                 GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
 170                                                  filename, lineno);
 171                 return false;
 172         }
 173         return true;
 174 }
 175
 176 /*
 177  * Insert entry into sorted array
 178  *
 179  * *base: base address of array (changeable if must enlarge array)
 180  * *arraysize: allocated length of array (changeable if must enlarge array)
 181  * n: current number of valid elements in array
 182  * entry: new data to insert
 183  * override: TRUE if OK to override
 184  *
 185  * Returns the new array length (new value for n), or -1 if error
 186  */
 187 static int
 188 addToArray(tzEntry **base, int *arraysize, int n,
 189                    tzEntry *entry, bool override)
 190 {
 191         tzEntry    *arrayptr;
 192         int                     low;
 193         int                     high;
 194
 195         /*
 196          * Search the array for a duplicate; as a useful side effect, the array is
 197          * maintained in sorted order.  We use strcmp() to ensure we match the
 198          * sort order datetime.c expects.
 199          */
 200         arrayptr = *base;
 201         low = 0;
 202         high = n - 1;
 203         while (low <= high)
 204         {
 205                 int                     mid = (low + high) >> 1;
 206                 tzEntry    *midptr = arrayptr + mid;
 207                 int                     cmp;
 208
 209                 cmp = strcmp(entry->abbrev, midptr->abbrev);
 210                 if (cmp < 0)
 211                         high = mid - 1;
 212                 else if (cmp > 0)
 213                         low = mid + 1;
 214                 else
 215                 {
 216                         /*
 217                          * Found a duplicate entry; complain unless it's the same.
 218                          */
 219                         if ((midptr->zone == NULL && entry->zone == NULL &&
 220                                  midptr->offset == entry->offset &&
 221                                  midptr->is_dst == entry->is_dst) ||
 222                                 (midptr->zone != NULL && entry->zone != NULL &&
 223                                  strcmp(midptr->zone, entry->zone) == 0))
 224                         {
 225                                 /* return unchanged array */
 226                                 return n;
 227                         }
 228                         if (override)
 229                         {
 230                                 /* same abbrev but something is different, override */
 231                                 midptr->zone = entry->zone;
 232                                 midptr->offset = entry->offset;
 233                                 midptr->is_dst = entry->is_dst;
 234                                 return n;
 235                         }
 236                         /* same abbrev but something is different, complain */
 237                         GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
 238                                                          entry->abbrev);
 239                         GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
 240                                                                 midptr->filename, midptr->lineno,
 241                                                                 entry->filename, entry->lineno);
 242                         return -1;
 243                 }
 244         }
 245
 246         /*
 247          * No match, insert at position "low".
 248          */
 249         if (n >= *arraysize)
 250         {
 251                 *arraysize *= 2;
 252                 *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
 253         }
 254
 255         arrayptr = *base + low;
 256
 257         memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
 258
 259         memcpy(arrayptr, entry, sizeof(tzEntry));
 260
 261         return n + 1;
 262 }
 263
 264 /*
 265  * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
 266  *
 267  * filename: user-specified file name (does not include path)
 268  * depth: current recursion depth
 269  * *base: array for results (changeable if must enlarge array)
 270  * *arraysize: allocated length of array (changeable if must enlarge array)
 271  * n: current number of valid elements in array
 272  *
 273  * Returns the new array length (new value for n), or -1 if error
 274  */
 275 static int
 276 ParseTzFile(const char *filename, int depth,
 277                         tzEntry **base, int *arraysize, int n)
 278 {
 279         char            share_path[MAXPGPATH];
 280         char            file_path[MAXPGPATH];
 281         FILE       *tzFile;
 282         char            tzbuf[1024];
 283         char       *line;
 284         tzEntry         tzentry;
 285         int                     lineno = 0;
 286         bool            override = false;
 287         const char *p;
 288
 289         /*
 290          * We enforce that the filename is all alpha characters.  This may be
 291          * overly restrictive, but we don't want to allow access to anything
 292          * outside the timezonesets directory, so for instance '/' *must* be
 293          * rejected.
 294          */
 295         for (p = filename; *p; p++)
 296         {
 297                 if (!isalpha((unsigned char) *p))
 298                 {
 299                         /* at level 0, just use guc.c's regular "invalid value" message */
 300                         if (depth > 0)
 301                                 GUC_check_errmsg("invalid time zone file name \"%s\"",
 302                                                                  filename);
 303                         return -1;
 304                 }
 305         }
 306
 307         /*
 308          * The maximal recursion depth is a pretty arbitrary setting. It is hard
 309          * to imagine that someone needs more than 3 levels so stick with this
 310          * conservative setting until someone complains.
 311          */
 312         if (depth > 3)
 313         {
 314                 GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
 315                                                  filename);
 316                 return -1;
 317         }
 318
 319         get_share_path(my_exec_path, share_path);
 320         snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
 321                          share_path, filename);
 322         tzFile = AllocateFile(file_path, "r");
 323         if (!tzFile)
 324         {
 325                 /*
 326                  * Check to see if the problem is not the filename but the directory.
 327                  * This is worth troubling over because if the installation share/
 328                  * directory is missing or unreadable, this is likely to be the first
 329                  * place we notice a problem during postmaster startup.
 330                  */
 331                 int                     save_errno = errno;
 332                 DIR                *tzdir;
 333
 334                 snprintf(file_path, sizeof(file_path), "%s/timezonesets",
 335                                  share_path);
 336                 tzdir = AllocateDir(file_path);
 337                 if (tzdir == NULL)
 338                 {
 339                         GUC_check_errmsg("could not open directory \"%s\": %m",
 340                                                          file_path);
 341                         GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
 342                                                           my_exec_path);
 343                         return -1;
 344                 }
 345                 FreeDir(tzdir);
 346                 errno = save_errno;
 347
 348                 /*
 349                  * otherwise, if file doesn't exist and it's level 0, guc.c's
 350                  * complaint is enough
 351                  */
 352                 if (errno != ENOENT || depth > 0)
 353                         GUC_check_errmsg("could not read time zone file \"%s\": %m",
 354                                                          filename);
 355
 356                 return -1;
 357         }
 358
 359         while (!feof(tzFile))
 360         {
 361                 lineno++;
 362                 if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
 363                 {
 364                         if (ferror(tzFile))
 365                         {
 366                                 GUC_check_errmsg("could not read time zone file \"%s\": %m",
 367                                                                  filename);
 368                                 return -1;
 369                         }
 370                         /* else we're at EOF after all */
 371                         break;
 372                 }
 373                 if (strlen(tzbuf) == sizeof(tzbuf) - 1)
 374                 {
 375                         /* the line is too long for tzbuf */
 376                         GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
 377                                                          filename, lineno);
 378                         return -1;
 379                 }
 380
 381                 /* skip over whitespace */
 382                 line = tzbuf;
 383                 while (*line && isspace((unsigned char) *line))
 384                         line++;
 385
 386                 if (*line == '\0')              /* empty line */
 387                         continue;
 388                 if (*line == '#')               /* comment line */
 389                         continue;
 390
 391                 if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
 392                 {
 393                         /* pstrdup so we can use filename in result data structure */
 394                         char       *includeFile = pstrdup(line + strlen("@INCLUDE"));
 395
 396                         includeFile = strtok(includeFile, WHITESPACE);
 397                         if (!includeFile || !*includeFile)
 398                         {
 399                                 GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
 400                                                                  filename, lineno);
 401                                 return -1;
 402                         }
 403                         n = ParseTzFile(includeFile, depth + 1,
 404                                                         base, arraysize, n);
 405                         if (n < 0)
 406                                 return -1;
 407                         continue;
 408                 }
 409
 410                 if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
 411                 {
 412                         override = true;
 413                         continue;
 414                 }
 415
 416                 if (!splitTzLine(filename, lineno, line, &tzentry))
 417                         return -1;
 418                 if (!validateTzEntry(&tzentry))
 419                         return -1;
 420                 n = addToArray(base, arraysize, n, &tzentry, override);
 421                 if (n < 0)
 422                         return -1;
 423         }
 424
 425         FreeFile(tzFile);
 426
 427         return n;
 428 }
 429
 430 /*
 431  * load_tzoffsets --- read and parse the specified timezone offset file
 432  *
 433  * On success, return a filled-in TimeZoneAbbrevTable, which must have been
 434  * malloc'd not palloc'd.  On failure, return NULL, using GUC_check_errmsg
 435  * and friends to give details of the problem.
 436  */
 437 TimeZoneAbbrevTable *
 438 load_tzoffsets(const char *filename)
 439 {
 440         TimeZoneAbbrevTable *result = NULL;
 441         MemoryContext tmpContext;
 442         MemoryContext oldContext;
 443         tzEntry    *array;
 444         int                     arraysize;
 445         int                     n;
 446
 447         /*
 448          * Create a temp memory context to work in.  This makes it easy to clean
 449          * up afterwards.
 450          */
 451         tmpContext = AllocSetContextCreate(CurrentMemoryContext,
 452                                                                            "TZParserMemory",
 453                                                                            ALLOCSET_SMALL_MINSIZE,
 454                                                                            ALLOCSET_SMALL_INITSIZE,
 455                                                                            ALLOCSET_SMALL_MAXSIZE);
 456         oldContext = MemoryContextSwitchTo(tmpContext);
 457
 458         /* Initialize array at a reasonable size */
 459         arraysize = 128;
 460         array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
 461
 462         /* Parse the file(s) */
 463         n = ParseTzFile(filename, 0, &array, &arraysize, 0);
 464
 465         /* If no errors so far, let datetime.c allocate memory & convert format */
 466         if (n >= 0)
 467         {
 468                 result = ConvertTimeZoneAbbrevs(array, n);
 469                 if (!result)
 470                         GUC_check_errmsg("out of memory");
 471         }
 472
 473         /* Clean up */
 474         MemoryContextSwitchTo(oldContext);
 475         MemoryContextDelete(tmpContext);
 476
 477         return result;
 478 }