1 /*-------------------------------------------------------------------------
4 * A data structure for keeping track of files that have changed.
6 * Copyright (c) 2013-2019, PostgreSQL Global Development Group
8 *-------------------------------------------------------------------------
11 #include "postgres_fe.h"
16 #include "datapagemap.h"
18 #include "pg_rewind.h"
20 #include "common/string.h"
21 #include "catalog/pg_tablespace_d.h"
22 #include "storage/fd.h"
24 filemap_t *filemap = NULL;
26 static bool isRelDataFile(const char *path);
27 static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
29 static int path_cmp(const void *a, const void *b);
30 static int final_filemap_cmp(const void *a, const void *b);
31 static void filemap_list_to_array(filemap_t *map);
32 static bool check_file_excluded(const char *path, bool is_source);
35 * The contents of these directories are removed or recreated during server
36 * start so they are not included in data processed by pg_rewind.
38 * Note: those lists should be kept in sync with what basebackup.c provides.
39 * Some of the values, contrary to what basebackup.c uses, are hardcoded as
40 * they are defined in backend-only headers. So this list is maintained
41 * with a best effort in mind.
43 static const char *excludeDirContents[] =
46 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
47 * when stats_temp_directory is set because PGSS_TEXT_FILE is always
50 "pg_stat_tmp", /* defined as PG_STAT_TMP_DIR */
53 * It is generally not useful to backup the contents of this directory
54 * even if the intention is to restore to another master. See backup.sgml
55 * for a more detailed description.
59 /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
60 "pg_dynshmem", /* defined as PG_DYNSHMEM_DIR */
62 /* Contents removed on startup, see AsyncShmemInit(). */
66 * Old contents are loaded for possible debugging but are not required for
67 * normal operation, see OldSerXidInit().
71 /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
74 /* Contents zeroed on startup, see StartupSUBTRANS(). */
82 * List of files excluded from filemap processing.
84 static const char *excludeFiles[] =
86 /* Skip auto conf temporary file. */
87 "postgresql.auto.conf.tmp", /* defined as PG_AUTOCONF_FILENAME */
89 /* Skip current log file temporary file */
90 "current_logfiles.tmp", /* defined as LOG_METAINFO_DATAFILE_TMP */
92 /* Skip relation cache because it is rebuilt on startup */
93 "pg_internal.init", /* defined as RELCACHE_INIT_FILENAME */
96 * If there's a backup_label or tablespace_map file, it belongs to a
97 * backup started by the user with pg_start_backup(). It is *not* correct
98 * for this backup. Our backup_label is written later on separately.
100 "backup_label", /* defined as BACKUP_LABEL_FILE */
101 "tablespace_map", /* defined as TABLESPACE_MAP */
111 * Create a new file map (stored in the global pointer "filemap").
118 map = pg_malloc(sizeof(filemap_t));
119 map->first = map->last = NULL;
124 Assert(filemap == NULL);
129 * Callback for processing source file list.
131 * This is called once for every file in the source server. We decide what
132 * action needs to be taken for the file, depending on whether the file
133 * exists in the target and whether the size matches.
136 process_source_file(const char *path, file_type_t type, size_t newsize,
137 const char *link_target)
140 char localpath[MAXPGPATH];
142 filemap_t *map = filemap;
143 file_action_t action = FILE_ACTION_NONE;
147 Assert(map->array == NULL);
150 * Skip any files matching the exclusion filters. This has the effect to
151 * remove all those files on the target.
153 if (check_file_excluded(path, true))
157 * Pretend that pg_wal is a directory, even if it's really a symlink. We
158 * don't want to mess with the symlink itself, nor complain if it's a
159 * symlink in source but not in target or vice versa.
161 if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
162 type = FILE_TYPE_DIRECTORY;
165 * Skip temporary files, .../pgsql_tmp/... and .../pgsql_tmp.* in source.
166 * This has the effect that all temporary files in the destination will be
169 if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL)
171 if (strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
175 * sanity check: a filename that looks like a data file better be a
178 if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
179 pg_fatal("data file \"%s\" in source is not a regular file", path);
181 snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
183 /* Does the corresponding file exist in the target data dir? */
184 if (lstat(localpath, &statbuf) < 0)
187 pg_fatal("could not stat file \"%s\": %m",
197 case FILE_TYPE_DIRECTORY:
198 if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
200 /* it's a directory in source, but not in target. Strange.. */
201 pg_fatal("\"%s\" is not a directory", localpath);
205 action = FILE_ACTION_CREATE;
207 action = FILE_ACTION_NONE;
211 case FILE_TYPE_SYMLINK:
214 !S_ISLNK(statbuf.st_mode)
216 !pgwin32_is_junction(localpath)
221 * It's a symbolic link in source, but not in target.
224 pg_fatal("\"%s\" is not a symbolic link", localpath);
228 action = FILE_ACTION_CREATE;
230 action = FILE_ACTION_NONE;
234 case FILE_TYPE_REGULAR:
235 if (exists && !S_ISREG(statbuf.st_mode))
236 pg_fatal("\"%s\" is not a regular file", localpath);
238 if (!exists || !isRelDataFile(path))
241 * File exists in source, but not in target. Or it's a
242 * non-data file that we have no special processing for. Copy
245 * An exception: PG_VERSIONs should be identical, but avoid
246 * overwriting it for paranoia.
248 if (pg_str_endswith(path, "PG_VERSION"))
250 action = FILE_ACTION_NONE;
251 oldsize = statbuf.st_size;
255 action = FILE_ACTION_COPY;
262 * It's a data file that exists in both.
264 * If it's larger in target, we can truncate it. There will
265 * also be a WAL record of the truncation in the source
266 * system, so WAL replay would eventually truncate the target
267 * too, but we might as well do it now.
269 * If it's smaller in the target, it means that it has been
270 * truncated in the target, or enlarged in the source, or
271 * both. If it was truncated in the target, we need to copy
272 * the missing tail from the source system. If it was enlarged
273 * in the source system, there will be WAL records in the
274 * source system for the new blocks, so we wouldn't need to
275 * copy them here. But we don't know which scenario we're
276 * dealing with, and there's no harm in copying the missing
277 * blocks now, so do it now.
279 * If it's the same size, do nothing here. Any blocks modified
280 * in the target will be copied based on parsing the target
281 * system's WAL, and any blocks modified in the source will be
282 * updated after rewinding, when the source system's WAL is
285 oldsize = statbuf.st_size;
286 if (oldsize < newsize)
287 action = FILE_ACTION_COPY_TAIL;
288 else if (oldsize > newsize)
289 action = FILE_ACTION_TRUNCATE;
291 action = FILE_ACTION_NONE;
296 /* Create a new entry for this file */
297 entry = pg_malloc(sizeof(file_entry_t));
298 entry->path = pg_strdup(path);
300 entry->action = action;
301 entry->oldsize = oldsize;
302 entry->newsize = newsize;
303 entry->link_target = link_target ? pg_strdup(link_target) : NULL;
305 entry->pagemap.bitmap = NULL;
306 entry->pagemap.bitmapsize = 0;
307 entry->isrelfile = isRelDataFile(path);
311 map->last->next = entry;
315 map->first = map->last = entry;
320 * Callback for processing target file list.
322 * All source files must be already processed before calling this. This only
323 * marks target data directory's files that didn't exist in the source for
327 process_target_file(const char *path, file_type_t type, size_t oldsize,
328 const char *link_target)
331 char localpath[MAXPGPATH];
334 file_entry_t *key_ptr;
335 filemap_t *map = filemap;
339 * Do not apply any exclusion filters here. This has advantage to remove
340 * from the target data folder all paths which have been filtered out from
341 * the source data folder when processing the source files.
344 snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path);
345 if (lstat(localpath, &statbuf) < 0)
348 pg_fatal("could not stat file \"%s\": %m",
354 if (map->array == NULL)
356 /* on first call, initialize lookup array */
359 /* should not happen */
360 pg_fatal("source file list is empty");
363 filemap_list_to_array(map);
365 Assert(map->array != NULL);
367 qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
371 * Like in process_source_file, pretend that xlog is always a directory.
373 if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
374 type = FILE_TYPE_DIRECTORY;
376 key.path = (char *) path;
378 exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
381 /* Remove any file or folder that doesn't exist in the source system. */
384 entry = pg_malloc(sizeof(file_entry_t));
385 entry->path = pg_strdup(path);
387 entry->action = FILE_ACTION_REMOVE;
388 entry->oldsize = oldsize;
390 entry->link_target = link_target ? pg_strdup(link_target) : NULL;
392 entry->pagemap.bitmap = NULL;
393 entry->pagemap.bitmapsize = 0;
394 entry->isrelfile = isRelDataFile(path);
396 if (map->last == NULL)
399 map->last->next = entry;
406 * We already handled all files that exist in the source system in
407 * process_source_file().
413 * This callback gets called while we read the WAL in the target, for every
414 * block that have changed in the target system. It makes note of all the
415 * changed blocks in the pagemap of the file.
418 process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
422 file_entry_t *key_ptr;
424 BlockNumber blkno_inseg;
426 filemap_t *map = filemap;
431 segno = blkno / RELSEG_SIZE;
432 blkno_inseg = blkno % RELSEG_SIZE;
434 path = datasegpath(rnode, forknum, segno);
436 key.path = (char *) path;
439 e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
449 Assert(entry->isrelfile);
451 switch (entry->action)
453 case FILE_ACTION_NONE:
454 case FILE_ACTION_TRUNCATE:
455 /* skip if we're truncating away the modified block anyway */
456 if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize)
457 datapagemap_add(&entry->pagemap, blkno_inseg);
460 case FILE_ACTION_COPY_TAIL:
463 * skip the modified block if it is part of the "tail" that
464 * we're copying anyway.
466 if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize)
467 datapagemap_add(&entry->pagemap, blkno_inseg);
470 case FILE_ACTION_COPY:
471 case FILE_ACTION_REMOVE:
474 case FILE_ACTION_CREATE:
475 pg_fatal("unexpected page modification for directory or symbolic link \"%s\"", entry->path);
481 * If we don't have any record of this file in the file map, it means
482 * that it's a relation that doesn't exist in the source system, and
483 * it was subsequently removed in the target system, too. We can
490 * Is this the path of file that pg_rewind can skip copying?
493 check_file_excluded(const char *path, bool is_source)
495 char localpath[MAXPGPATH];
497 const char *filename;
499 /* check individual files... */
500 for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
502 filename = last_dir_separator(path);
503 if (filename == NULL)
507 if (strcmp(filename, excludeFiles[excludeIdx]) == 0)
510 pg_log_debug("entry \"%s\" excluded from source file list",
513 pg_log_debug("entry \"%s\" excluded from target file list",
520 * ... And check some directories. Note that this includes any contents
521 * within the directories themselves.
523 for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
525 snprintf(localpath, sizeof(localpath), "%s/",
526 excludeDirContents[excludeIdx]);
527 if (strstr(path, localpath) == path)
530 pg_log_debug("entry \"%s\" excluded from source file list",
533 pg_log_debug("entry \"%s\" excluded from target file list",
543 * Convert the linked list of entries in map->first/last to the array,
547 filemap_list_to_array(filemap_t *map)
553 map->array = (file_entry_t **)
554 pg_realloc(map->array,
555 (map->nlist + map->narray) * sizeof(file_entry_t *));
557 narray = map->narray;
558 for (entry = map->first; entry != NULL; entry = next)
560 map->array[narray++] = entry;
564 Assert(narray == map->nlist + map->narray);
565 map->narray = narray;
567 map->first = map->last = NULL;
571 filemap_finalize(void)
573 filemap_t *map = filemap;
575 filemap_list_to_array(map);
576 qsort(map->array, map->narray, sizeof(file_entry_t *),
581 action_to_str(file_action_t action)
585 case FILE_ACTION_NONE:
587 case FILE_ACTION_COPY:
589 case FILE_ACTION_TRUNCATE:
591 case FILE_ACTION_COPY_TAIL:
593 case FILE_ACTION_CREATE:
595 case FILE_ACTION_REMOVE:
604 * Calculate the totals needed for progress reports.
607 calculate_totals(void)
611 filemap_t *map = filemap;
616 for (i = 0; i < map->narray; i++)
618 entry = map->array[i];
620 if (entry->type != FILE_TYPE_REGULAR)
623 map->total_size += entry->newsize;
625 if (entry->action == FILE_ACTION_COPY)
627 map->fetch_size += entry->newsize;
631 if (entry->action == FILE_ACTION_COPY_TAIL)
632 map->fetch_size += (entry->newsize - entry->oldsize);
634 if (entry->pagemap.bitmapsize > 0)
636 datapagemap_iterator_t *iter;
639 iter = datapagemap_iterate(&entry->pagemap);
640 while (datapagemap_next(iter, &blk))
641 map->fetch_size += BLCKSZ;
651 filemap_t *map = filemap;
655 for (i = 0; i < map->narray; i++)
657 entry = map->array[i];
658 if (entry->action != FILE_ACTION_NONE ||
659 entry->pagemap.bitmapsize > 0)
661 pg_log_debug("%s (%s)", entry->path,
662 action_to_str(entry->action));
664 if (entry->pagemap.bitmapsize > 0)
665 datapagemap_print(&entry->pagemap);
672 * Does it look like a relation data file?
674 * For our purposes, only files belonging to the main fork are considered
675 * relation files. Other forks are always copied in toto, because we cannot
676 * reliably track changes to them, because WAL only contains block references
680 isRelDataFile(const char *path)
688 * Relation data files can be in one of the following directories:
694 * regular relations, default tablespace
696 * pg_tblspc/<tblspc oid>/<tblspc version>/
697 * within a non-default tablespace (the name of the directory
698 * depends on version)
700 * And the relation data files themselves have a filename like:
702 * <oid>.<segment number>
706 rnode.spcNode = InvalidOid;
707 rnode.dbNode = InvalidOid;
708 rnode.relNode = InvalidOid;
712 nmatch = sscanf(path, "global/%u.%u", &rnode.relNode, &segNo);
713 if (nmatch == 1 || nmatch == 2)
715 rnode.spcNode = GLOBALTABLESPACE_OID;
721 nmatch = sscanf(path, "base/%u/%u.%u",
722 &rnode.dbNode, &rnode.relNode, &segNo);
723 if (nmatch == 2 || nmatch == 3)
725 rnode.spcNode = DEFAULTTABLESPACE_OID;
730 nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
731 &rnode.spcNode, &rnode.dbNode, &rnode.relNode,
733 if (nmatch == 3 || nmatch == 4)
739 * The sscanf tests above can match files that have extra characters at
740 * the end. To eliminate such cases, cross-check that GetRelationPath
741 * creates the exact same filename, when passed the RelFileNode
742 * information we extracted from the filename.
746 char *check_path = datasegpath(rnode, MAIN_FORKNUM, segNo);
748 if (strcmp(check_path, path) != 0)
758 * A helper function to create the path of a relation file and segment.
760 * The returned path is palloc'd
763 datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
768 path = relpathperm(rnode, forknum);
771 segpath = psprintf("%s.%u", path, segno);
780 path_cmp(const void *a, const void *b)
782 file_entry_t *fa = *((file_entry_t **) a);
783 file_entry_t *fb = *((file_entry_t **) b);
785 return strcmp(fa->path, fb->path);
789 * In the final stage, the filemap is sorted so that removals come last.
790 * From disk space usage point of view, it would be better to do removals
791 * first, but for now, safety first. If a whole directory is deleted, all
792 * files and subdirectories inside it need to removed first. On creation,
793 * parent directory needs to be created before files and directories inside
794 * it. To achieve that, the file_action_t enum is ordered so that we can
795 * just sort on that first. Furthermore, sort REMOVE entries in reverse
796 * path order, so that "foo/bar" subdirectory is removed before "foo".
799 final_filemap_cmp(const void *a, const void *b)
801 file_entry_t *fa = *((file_entry_t **) a);
802 file_entry_t *fb = *((file_entry_t **) b);
804 if (fa->action > fb->action)
806 if (fa->action < fb->action)
809 if (fa->action == FILE_ACTION_REMOVE)
810 return strcmp(fb->path, fa->path);
812 return strcmp(fa->path, fb->path);