]> granicus.if.org Git - postgresql/blob - src/backend/access/transam/timeline.c
Reduce pinning and buffer content locking for btree scans.
[postgresql] / src / backend / access / transam / timeline.c
1 /*-------------------------------------------------------------------------
2  *
3  * timeline.c
4  *              Functions for reading and writing timeline history files.
5  *
6  * A timeline history file lists the timeline changes of the timeline, in
7  * a simple text format. They are archived along with the WAL segments.
8  *
9  * The files are named like "<tli>.history". For example, if the database
10  * starts up and switches to timeline 5, the timeline history file would be
11  * called "00000005.history".
12  *
13  * Each line in the file represents a timeline switch:
14  *
15  * <parentTLI> <switchpoint> <reason>
16  *
17  *      parentTLI       ID of the parent timeline
18  *      switchpoint XLogRecPtr of the WAL position where the switch happened
19  *      reason          human-readable explanation of why the timeline was changed
20  *
21  * The fields are separated by tabs. Lines beginning with # are comments, and
22  * are ignored. Empty lines are also ignored.
23  *
24  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  * src/backend/access/transam/timeline.c
28  *
29  *-------------------------------------------------------------------------
30  */
31
32 #include "postgres.h"
33
34 #include <sys/stat.h>
35 #include <stdio.h>
36 #include <unistd.h>
37
38 #include "access/timeline.h"
39 #include "access/xlog.h"
40 #include "access/xlog_internal.h"
41 #include "access/xlogdefs.h"
42 #include "storage/fd.h"
43
44 /*
45  * Copies all timeline history files with id's between 'begin' and 'end'
46  * from archive to pg_xlog.
47  */
48 void
49 restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
50 {
51         char            path[MAXPGPATH];
52         char            histfname[MAXFNAMELEN];
53         TimeLineID      tli;
54
55         for (tli = begin; tli < end; tli++)
56         {
57                 if (tli == 1)
58                         continue;
59
60                 TLHistoryFileName(histfname, tli);
61                 if (RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false))
62                         KeepFileRestoredFromArchive(path, histfname);
63         }
64 }
65
66 /*
67  * Try to read a timeline's history file.
68  *
69  * If successful, return the list of component TLIs (the given TLI followed by
70  * its ancestor TLIs).  If we can't find the history file, assume that the
71  * timeline has no parents, and return a list of just the specified timeline
72  * ID.
73  */
74 List *
75 readTimeLineHistory(TimeLineID targetTLI)
76 {
77         List       *result;
78         char            path[MAXPGPATH];
79         char            histfname[MAXFNAMELEN];
80         char            fline[MAXPGPATH];
81         FILE       *fd;
82         TimeLineHistoryEntry *entry;
83         TimeLineID      lasttli = 0;
84         XLogRecPtr      prevend;
85         bool            fromArchive = false;
86
87         /* Timeline 1 does not have a history file, so no need to check */
88         if (targetTLI == 1)
89         {
90                 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
91                 entry->tli = targetTLI;
92                 entry->begin = entry->end = InvalidXLogRecPtr;
93                 return list_make1(entry);
94         }
95
96         if (ArchiveRecoveryRequested)
97         {
98                 TLHistoryFileName(histfname, targetTLI);
99                 fromArchive =
100                         RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
101         }
102         else
103                 TLHistoryFilePath(path, targetTLI);
104
105         fd = AllocateFile(path, "r");
106         if (fd == NULL)
107         {
108                 if (errno != ENOENT)
109                         ereport(FATAL,
110                                         (errcode_for_file_access(),
111                                          errmsg("could not open file \"%s\": %m", path)));
112                 /* Not there, so assume no parents */
113                 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
114                 entry->tli = targetTLI;
115                 entry->begin = entry->end = InvalidXLogRecPtr;
116                 return list_make1(entry);
117         }
118
119         result = NIL;
120
121         /*
122          * Parse the file...
123          */
124         prevend = InvalidXLogRecPtr;
125         while (fgets(fline, sizeof(fline), fd) != NULL)
126         {
127                 /* skip leading whitespace and check for # comment */
128                 char       *ptr;
129                 TimeLineID      tli;
130                 uint32          switchpoint_hi;
131                 uint32          switchpoint_lo;
132                 int                     nfields;
133
134                 for (ptr = fline; *ptr; ptr++)
135                 {
136                         if (!isspace((unsigned char) *ptr))
137                                 break;
138                 }
139                 if (*ptr == '\0' || *ptr == '#')
140                         continue;
141
142                 nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
143
144                 if (nfields < 1)
145                 {
146                         /* expect a numeric timeline ID as first field of line */
147                         ereport(FATAL,
148                                         (errmsg("syntax error in history file: %s", fline),
149                                          errhint("Expected a numeric timeline ID.")));
150                 }
151                 if (nfields != 3)
152                         ereport(FATAL,
153                                         (errmsg("syntax error in history file: %s", fline),
154                            errhint("Expected a transaction log switchpoint location.")));
155
156                 if (result && tli <= lasttli)
157                         ereport(FATAL,
158                                         (errmsg("invalid data in history file: %s", fline),
159                                    errhint("Timeline IDs must be in increasing sequence.")));
160
161                 lasttli = tli;
162
163                 entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
164                 entry->tli = tli;
165                 entry->begin = prevend;
166                 entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
167                 prevend = entry->end;
168
169                 /* Build list with newest item first */
170                 result = lcons(entry, result);
171
172                 /* we ignore the remainder of each line */
173         }
174
175         FreeFile(fd);
176
177         if (result && targetTLI <= lasttli)
178                 ereport(FATAL,
179                                 (errmsg("invalid data in history file \"%s\"", path),
180                         errhint("Timeline IDs must be less than child timeline's ID.")));
181
182         /*
183          * Create one more entry for the "tip" of the timeline, which has no entry
184          * in the history file.
185          */
186         entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry));
187         entry->tli = targetTLI;
188         entry->begin = prevend;
189         entry->end = InvalidXLogRecPtr;
190
191         result = lcons(entry, result);
192
193         /*
194          * If the history file was fetched from archive, save it in pg_xlog for
195          * future reference.
196          */
197         if (fromArchive)
198                 KeepFileRestoredFromArchive(path, histfname);
199
200         return result;
201 }
202
203 /*
204  * Probe whether a timeline history file exists for the given timeline ID
205  */
206 bool
207 existsTimeLineHistory(TimeLineID probeTLI)
208 {
209         char            path[MAXPGPATH];
210         char            histfname[MAXFNAMELEN];
211         FILE       *fd;
212
213         /* Timeline 1 does not have a history file, so no need to check */
214         if (probeTLI == 1)
215                 return false;
216
217         if (ArchiveRecoveryRequested)
218         {
219                 TLHistoryFileName(histfname, probeTLI);
220                 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
221         }
222         else
223                 TLHistoryFilePath(path, probeTLI);
224
225         fd = AllocateFile(path, "r");
226         if (fd != NULL)
227         {
228                 FreeFile(fd);
229                 return true;
230         }
231         else
232         {
233                 if (errno != ENOENT)
234                         ereport(FATAL,
235                                         (errcode_for_file_access(),
236                                          errmsg("could not open file \"%s\": %m", path)));
237                 return false;
238         }
239 }
240
241 /*
242  * Find the newest existing timeline, assuming that startTLI exists.
243  *
244  * Note: while this is somewhat heuristic, it does positively guarantee
245  * that (result + 1) is not a known timeline, and therefore it should
246  * be safe to assign that ID to a new timeline.
247  */
248 TimeLineID
249 findNewestTimeLine(TimeLineID startTLI)
250 {
251         TimeLineID      newestTLI;
252         TimeLineID      probeTLI;
253
254         /*
255          * The algorithm is just to probe for the existence of timeline history
256          * files.  XXX is it useful to allow gaps in the sequence?
257          */
258         newestTLI = startTLI;
259
260         for (probeTLI = startTLI + 1;; probeTLI++)
261         {
262                 if (existsTimeLineHistory(probeTLI))
263                 {
264                         newestTLI = probeTLI;           /* probeTLI exists */
265                 }
266                 else
267                 {
268                         /* doesn't exist, assume we're done */
269                         break;
270                 }
271         }
272
273         return newestTLI;
274 }
275
276 /*
277  * Create a new timeline history file.
278  *
279  *      newTLI: ID of the new timeline
280  *      parentTLI: ID of its immediate parent
281  *      switchpoint: XLOG position where the system switched to the new timeline
282  *      reason: human-readable explanation of why the timeline was switched
283  *
284  * Currently this is only used at the end recovery, and so there are no locking
285  * considerations.  But we should be just as tense as XLogFileInit to avoid
286  * emplacing a bogus file.
287  */
288 void
289 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
290                                          XLogRecPtr switchpoint, char *reason)
291 {
292         char            path[MAXPGPATH];
293         char            tmppath[MAXPGPATH];
294         char            histfname[MAXFNAMELEN];
295         char            buffer[BLCKSZ];
296         int                     srcfd;
297         int                     fd;
298         int                     nbytes;
299
300         Assert(newTLI > parentTLI); /* else bad selection of newTLI */
301
302         /*
303          * Write into a temp file name.
304          */
305         snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
306
307         unlink(tmppath);
308
309         /* do not use get_sync_bit() here --- want to fsync only at end of fill */
310         fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
311                                                    S_IRUSR | S_IWUSR);
312         if (fd < 0)
313                 ereport(ERROR,
314                                 (errcode_for_file_access(),
315                                  errmsg("could not create file \"%s\": %m", tmppath)));
316
317         /*
318          * If a history file exists for the parent, copy it verbatim
319          */
320         if (ArchiveRecoveryRequested)
321         {
322                 TLHistoryFileName(histfname, parentTLI);
323                 RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
324         }
325         else
326                 TLHistoryFilePath(path, parentTLI);
327
328         srcfd = OpenTransientFile(path, O_RDONLY, 0);
329         if (srcfd < 0)
330         {
331                 if (errno != ENOENT)
332                         ereport(ERROR,
333                                         (errcode_for_file_access(),
334                                          errmsg("could not open file \"%s\": %m", path)));
335                 /* Not there, so assume parent has no parents */
336         }
337         else
338         {
339                 for (;;)
340                 {
341                         errno = 0;
342                         nbytes = (int) read(srcfd, buffer, sizeof(buffer));
343                         if (nbytes < 0 || errno != 0)
344                                 ereport(ERROR,
345                                                 (errcode_for_file_access(),
346                                                  errmsg("could not read file \"%s\": %m", path)));
347                         if (nbytes == 0)
348                                 break;
349                         errno = 0;
350                         if ((int) write(fd, buffer, nbytes) != nbytes)
351                         {
352                                 int                     save_errno = errno;
353
354                                 /*
355                                  * If we fail to make the file, delete it to release disk
356                                  * space
357                                  */
358                                 unlink(tmppath);
359
360                                 /*
361                                  * if write didn't set errno, assume problem is no disk space
362                                  */
363                                 errno = save_errno ? save_errno : ENOSPC;
364
365                                 ereport(ERROR,
366                                                 (errcode_for_file_access(),
367                                          errmsg("could not write to file \"%s\": %m", tmppath)));
368                         }
369                 }
370                 CloseTransientFile(srcfd);
371         }
372
373         /*
374          * Append one line with the details of this timeline split.
375          *
376          * If we did have a parent file, insert an extra newline just in case the
377          * parent file failed to end with one.
378          */
379         snprintf(buffer, sizeof(buffer),
380                          "%s%u\t%X/%X\t%s\n",
381                          (srcfd < 0) ? "" : "\n",
382                          parentTLI,
383                          (uint32) (switchpoint >> 32), (uint32) (switchpoint),
384                          reason);
385
386         nbytes = strlen(buffer);
387         errno = 0;
388         if ((int) write(fd, buffer, nbytes) != nbytes)
389         {
390                 int                     save_errno = errno;
391
392                 /*
393                  * If we fail to make the file, delete it to release disk space
394                  */
395                 unlink(tmppath);
396                 /* if write didn't set errno, assume problem is no disk space */
397                 errno = save_errno ? save_errno : ENOSPC;
398
399                 ereport(ERROR,
400                                 (errcode_for_file_access(),
401                                  errmsg("could not write to file \"%s\": %m", tmppath)));
402         }
403
404         if (pg_fsync(fd) != 0)
405                 ereport(ERROR,
406                                 (errcode_for_file_access(),
407                                  errmsg("could not fsync file \"%s\": %m", tmppath)));
408
409         if (CloseTransientFile(fd))
410                 ereport(ERROR,
411                                 (errcode_for_file_access(),
412                                  errmsg("could not close file \"%s\": %m", tmppath)));
413
414
415         /*
416          * Now move the completed history file into place with its final name.
417          */
418         TLHistoryFilePath(path, newTLI);
419
420         /*
421          * Prefer link() to rename() here just to be really sure that we don't
422          * overwrite an existing file.  However, there shouldn't be one, so
423          * rename() is an acceptable substitute except for the truly paranoid.
424          */
425 #if HAVE_WORKING_LINK
426         if (link(tmppath, path) < 0)
427                 ereport(ERROR,
428                                 (errcode_for_file_access(),
429                                  errmsg("could not link file \"%s\" to \"%s\": %m",
430                                                 tmppath, path)));
431         unlink(tmppath);
432 #else
433         if (rename(tmppath, path) < 0)
434                 ereport(ERROR,
435                                 (errcode_for_file_access(),
436                                  errmsg("could not rename file \"%s\" to \"%s\": %m",
437                                                 tmppath, path)));
438 #endif
439
440         /* The history file can be archived immediately. */
441         if (XLogArchivingActive())
442         {
443                 TLHistoryFileName(histfname, newTLI);
444                 XLogArchiveNotify(histfname);
445         }
446 }
447
448 /*
449  * Writes a history file for given timeline and contents.
450  *
451  * Currently this is only used in the walreceiver process, and so there are
452  * no locking considerations.  But we should be just as tense as XLogFileInit
453  * to avoid emplacing a bogus file.
454  */
455 void
456 writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
457 {
458         char            path[MAXPGPATH];
459         char            tmppath[MAXPGPATH];
460         int                     fd;
461
462         /*
463          * Write into a temp file name.
464          */
465         snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
466
467         unlink(tmppath);
468
469         /* do not use get_sync_bit() here --- want to fsync only at end of fill */
470         fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
471                                                    S_IRUSR | S_IWUSR);
472         if (fd < 0)
473                 ereport(ERROR,
474                                 (errcode_for_file_access(),
475                                  errmsg("could not create file \"%s\": %m", tmppath)));
476
477         errno = 0;
478         if ((int) write(fd, content, size) != size)
479         {
480                 int                     save_errno = errno;
481
482                 /*
483                  * If we fail to make the file, delete it to release disk space
484                  */
485                 unlink(tmppath);
486                 /* if write didn't set errno, assume problem is no disk space */
487                 errno = save_errno ? save_errno : ENOSPC;
488
489                 ereport(ERROR,
490                                 (errcode_for_file_access(),
491                                  errmsg("could not write to file \"%s\": %m", tmppath)));
492         }
493
494         if (pg_fsync(fd) != 0)
495                 ereport(ERROR,
496                                 (errcode_for_file_access(),
497                                  errmsg("could not fsync file \"%s\": %m", tmppath)));
498
499         if (CloseTransientFile(fd))
500                 ereport(ERROR,
501                                 (errcode_for_file_access(),
502                                  errmsg("could not close file \"%s\": %m", tmppath)));
503
504
505         /*
506          * Now move the completed history file into place with its final name.
507          */
508         TLHistoryFilePath(path, tli);
509
510         /*
511          * Prefer link() to rename() here just to be really sure that we don't
512          * overwrite an existing logfile.  However, there shouldn't be one, so
513          * rename() is an acceptable substitute except for the truly paranoid.
514          */
515 #if HAVE_WORKING_LINK
516         if (link(tmppath, path) < 0)
517                 ereport(ERROR,
518                                 (errcode_for_file_access(),
519                                  errmsg("could not link file \"%s\" to \"%s\": %m",
520                                                 tmppath, path)));
521         unlink(tmppath);
522 #else
523         if (rename(tmppath, path) < 0)
524                 ereport(ERROR,
525                                 (errcode_for_file_access(),
526                                  errmsg("could not rename file \"%s\" to \"%s\": %m",
527                                                 tmppath, path)));
528 #endif
529 }
530
531 /*
532  * Returns true if 'expectedTLEs' contains a timeline with id 'tli'
533  */
534 bool
535 tliInHistory(TimeLineID tli, List *expectedTLEs)
536 {
537         ListCell   *cell;
538
539         foreach(cell, expectedTLEs)
540         {
541                 if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli)
542                         return true;
543         }
544
545         return false;
546 }
547
548 /*
549  * Returns the ID of the timeline in use at a particular point in time, in
550  * the given timeline history.
551  */
552 TimeLineID
553 tliOfPointInHistory(XLogRecPtr ptr, List *history)
554 {
555         ListCell   *cell;
556
557         foreach(cell, history)
558         {
559                 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
560
561                 if ((XLogRecPtrIsInvalid(tle->begin) || tle->begin <= ptr) &&
562                         (XLogRecPtrIsInvalid(tle->end) || ptr < tle->end))
563                 {
564                         /* found it */
565                         return tle->tli;
566                 }
567         }
568
569         /* shouldn't happen. */
570         elog(ERROR, "timeline history was not contiguous");
571         return 0;                                       /* keep compiler quiet */
572 }
573
574 /*
575  * Returns the point in history where we branched off the given timeline,
576  * and the timeline we branched to (*nextTLI). Returns InvalidXLogRecPtr if
577  * the timeline is current, ie. we have not branched off from it, and throws
578  * an error if the timeline is not part of this server's history.
579  */
580 XLogRecPtr
581 tliSwitchPoint(TimeLineID tli, List *history, TimeLineID *nextTLI)
582 {
583         ListCell   *cell;
584
585         if (nextTLI)
586                 *nextTLI = 0;
587         foreach(cell, history)
588         {
589                 TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell);
590
591                 if (tle->tli == tli)
592                         return tle->end;
593                 if (nextTLI)
594                         *nextTLI = tle->tli;
595         }
596
597         ereport(ERROR,
598                         (errmsg("requested timeline %u is not in this server's history",
599                                         tli)));
600         return InvalidXLogRecPtr;       /* keep compiler quiet */
601 }