]> granicus.if.org Git - postgresql/blob - contrib/pg_prewarm/autoprewarm.c
Replace heapam.h includes with {table, relation}.h where applicable.
[postgresql] / contrib / pg_prewarm / autoprewarm.c
1 /*-------------------------------------------------------------------------
2  *
3  * autoprewarm.c
4  *              Periodically dump information about the blocks present in
5  *              shared_buffers, and reload them on server restart.
6  *
7  *              Due to locking considerations, we can't actually begin prewarming
8  *              until the server reaches a consistent state.  We need the catalogs
9  *              to be consistent so that we can figure out which relation to lock,
10  *              and we need to lock the relations so that we don't try to prewarm
11  *              pages from a relation that is in the process of being dropped.
12  *
13  *              While prewarming, autoprewarm will use two workers.  There's a
14  *              master worker that reads and sorts the list of blocks to be
15  *              prewarmed and then launches a per-database worker for each
16  *              relevant database in turn.  The former keeps running after the
17  *              initial prewarm is complete to update the dump file periodically.
18  *
19  *      Copyright (c) 2016-2019, PostgreSQL Global Development Group
20  *
21  *      IDENTIFICATION
22  *              contrib/pg_prewarm/autoprewarm.c
23  *
24  *-------------------------------------------------------------------------
25  */
26
27 #include "postgres.h"
28
29 #include <unistd.h>
30
31 #include "access/relation.h"
32 #include "access/xact.h"
33 #include "catalog/pg_class.h"
34 #include "catalog/pg_type.h"
35 #include "miscadmin.h"
36 #include "pgstat.h"
37 #include "postmaster/bgworker.h"
38 #include "storage/buf_internals.h"
39 #include "storage/dsm.h"
40 #include "storage/ipc.h"
41 #include "storage/latch.h"
42 #include "storage/lwlock.h"
43 #include "storage/proc.h"
44 #include "storage/procsignal.h"
45 #include "storage/shmem.h"
46 #include "storage/smgr.h"
47 #include "tcop/tcopprot.h"
48 #include "utils/acl.h"
49 #include "utils/guc.h"
50 #include "utils/memutils.h"
51 #include "utils/rel.h"
52 #include "utils/relfilenodemap.h"
53 #include "utils/resowner.h"
54
55 #define AUTOPREWARM_FILE "autoprewarm.blocks"
56
57 /* Metadata for each block we dump. */
58 typedef struct BlockInfoRecord
59 {
60         Oid                     database;
61         Oid                     tablespace;
62         Oid                     filenode;
63         ForkNumber      forknum;
64         BlockNumber blocknum;
65 } BlockInfoRecord;
66
67 /* Shared state information for autoprewarm bgworker. */
68 typedef struct AutoPrewarmSharedState
69 {
70         LWLock          lock;                   /* mutual exclusion */
71         pid_t           bgworker_pid;   /* for main bgworker */
72         pid_t           pid_using_dumpfile; /* for autoprewarm or block dump */
73
74         /* Following items are for communication with per-database worker */
75         dsm_handle      block_info_handle;
76         Oid                     database;
77         int                     prewarm_start_idx;
78         int                     prewarm_stop_idx;
79         int                     prewarmed_blocks;
80 } AutoPrewarmSharedState;
81
82 void            _PG_init(void);
83 void            autoprewarm_main(Datum main_arg);
84 void            autoprewarm_database_main(Datum main_arg);
85
86 PG_FUNCTION_INFO_V1(autoprewarm_start_worker);
87 PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
88
89 static void apw_load_buffers(void);
90 static int      apw_dump_now(bool is_bgworker, bool dump_unlogged);
91 static void apw_start_master_worker(void);
92 static void apw_start_database_worker(void);
93 static bool apw_init_shmem(void);
94 static void apw_detach_shmem(int code, Datum arg);
95 static int      apw_compare_blockinfo(const void *p, const void *q);
96 static void apw_sigterm_handler(SIGNAL_ARGS);
97 static void apw_sighup_handler(SIGNAL_ARGS);
98
99 /* Flags set by signal handlers */
100 static volatile sig_atomic_t got_sigterm = false;
101 static volatile sig_atomic_t got_sighup = false;
102
103 /* Pointer to shared-memory state. */
104 static AutoPrewarmSharedState *apw_state = NULL;
105
106 /* GUC variables. */
107 static bool autoprewarm = true; /* start worker? */
108 static int      autoprewarm_interval;   /* dump interval */
109
110 /*
111  * Module load callback.
112  */
113 void
114 _PG_init(void)
115 {
116         DefineCustomIntVariable("pg_prewarm.autoprewarm_interval",
117                                                         "Sets the interval between dumps of shared buffers",
118                                                         "If set to zero, time-based dumping is disabled.",
119                                                         &autoprewarm_interval,
120                                                         300,
121                                                         0, INT_MAX / 1000,
122                                                         PGC_SIGHUP,
123                                                         GUC_UNIT_S,
124                                                         NULL,
125                                                         NULL,
126                                                         NULL);
127
128         if (!process_shared_preload_libraries_in_progress)
129                 return;
130
131         /* can't define PGC_POSTMASTER variable after startup */
132         DefineCustomBoolVariable("pg_prewarm.autoprewarm",
133                                                          "Starts the autoprewarm worker.",
134                                                          NULL,
135                                                          &autoprewarm,
136                                                          true,
137                                                          PGC_POSTMASTER,
138                                                          0,
139                                                          NULL,
140                                                          NULL,
141                                                          NULL);
142
143         EmitWarningsOnPlaceholders("pg_prewarm");
144
145         RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
146
147         /* Register autoprewarm worker, if enabled. */
148         if (autoprewarm)
149                 apw_start_master_worker();
150 }
151
152 /*
153  * Main entry point for the master autoprewarm process.  Per-database workers
154  * have a separate entry point.
155  */
156 void
157 autoprewarm_main(Datum main_arg)
158 {
159         bool            first_time = true;
160         TimestampTz last_dump_time = 0;
161
162         /* Establish signal handlers; once that's done, unblock signals. */
163         pqsignal(SIGTERM, apw_sigterm_handler);
164         pqsignal(SIGHUP, apw_sighup_handler);
165         pqsignal(SIGUSR1, procsignal_sigusr1_handler);
166         BackgroundWorkerUnblockSignals();
167
168         /* Create (if necessary) and attach to our shared memory area. */
169         if (apw_init_shmem())
170                 first_time = false;
171
172         /* Set on-detach hook so that our PID will be cleared on exit. */
173         on_shmem_exit(apw_detach_shmem, 0);
174
175         /*
176          * Store our PID in the shared memory area --- unless there's already
177          * another worker running, in which case just exit.
178          */
179         LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
180         if (apw_state->bgworker_pid != InvalidPid)
181         {
182                 LWLockRelease(&apw_state->lock);
183                 ereport(LOG,
184                                 (errmsg("autoprewarm worker is already running under PID %lu",
185                                                 (unsigned long) apw_state->bgworker_pid)));
186                 return;
187         }
188         apw_state->bgworker_pid = MyProcPid;
189         LWLockRelease(&apw_state->lock);
190
191         /*
192          * Preload buffers from the dump file only if we just created the shared
193          * memory region.  Otherwise, it's either already been done or shouldn't
194          * be done - e.g. because the old dump file has been overwritten since the
195          * server was started.
196          *
197          * There's not much point in performing a dump immediately after we finish
198          * preloading; so, if we do end up preloading, consider the last dump time
199          * to be equal to the current time.
200          */
201         if (first_time)
202         {
203                 apw_load_buffers();
204                 last_dump_time = GetCurrentTimestamp();
205         }
206
207         /* Periodically dump buffers until terminated. */
208         while (!got_sigterm)
209         {
210                 /* In case of a SIGHUP, just reload the configuration. */
211                 if (got_sighup)
212                 {
213                         got_sighup = false;
214                         ProcessConfigFile(PGC_SIGHUP);
215                 }
216
217                 if (autoprewarm_interval <= 0)
218                 {
219                         /* We're only dumping at shutdown, so just wait forever. */
220                         (void) WaitLatch(&MyProc->procLatch,
221                                                          WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
222                                                          -1L,
223                                                          PG_WAIT_EXTENSION);
224                 }
225                 else
226                 {
227                         long            delay_in_ms = 0;
228                         TimestampTz next_dump_time = 0;
229                         long            secs = 0;
230                         int                     usecs = 0;
231
232                         /* Compute the next dump time. */
233                         next_dump_time =
234                                 TimestampTzPlusMilliseconds(last_dump_time,
235                                                                                         autoprewarm_interval * 1000);
236                         TimestampDifference(GetCurrentTimestamp(), next_dump_time,
237                                                                 &secs, &usecs);
238                         delay_in_ms = secs + (usecs / 1000);
239
240                         /* Perform a dump if it's time. */
241                         if (delay_in_ms <= 0)
242                         {
243                                 last_dump_time = GetCurrentTimestamp();
244                                 apw_dump_now(true, false);
245                                 continue;
246                         }
247
248                         /* Sleep until the next dump time. */
249                         (void) WaitLatch(&MyProc->procLatch,
250                                                          WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
251                                                          delay_in_ms,
252                                                          PG_WAIT_EXTENSION);
253                 }
254
255                 /* Reset the latch, loop. */
256                 ResetLatch(&MyProc->procLatch);
257         }
258
259         /*
260          * Dump one last time.  We assume this is probably the result of a system
261          * shutdown, although it's possible that we've merely been terminated.
262          */
263         apw_dump_now(true, true);
264 }
265
266 /*
267  * Read the dump file and launch per-database workers one at a time to
268  * prewarm the buffers found there.
269  */
270 static void
271 apw_load_buffers(void)
272 {
273         FILE       *file = NULL;
274         int                     num_elements,
275                                 i;
276         BlockInfoRecord *blkinfo;
277         dsm_segment *seg;
278
279         /*
280          * Skip the prewarm if the dump file is in use; otherwise, prevent any
281          * other process from writing it while we're using it.
282          */
283         LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
284         if (apw_state->pid_using_dumpfile == InvalidPid)
285                 apw_state->pid_using_dumpfile = MyProcPid;
286         else
287         {
288                 LWLockRelease(&apw_state->lock);
289                 ereport(LOG,
290                                 (errmsg("skipping prewarm because block dump file is being written by PID %lu",
291                                                 (unsigned long) apw_state->pid_using_dumpfile)));
292                 return;
293         }
294         LWLockRelease(&apw_state->lock);
295
296         /*
297          * Open the block dump file.  Exit quietly if it doesn't exist, but report
298          * any other error.
299          */
300         file = AllocateFile(AUTOPREWARM_FILE, "r");
301         if (!file)
302         {
303                 if (errno == ENOENT)
304                 {
305                         LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
306                         apw_state->pid_using_dumpfile = InvalidPid;
307                         LWLockRelease(&apw_state->lock);
308                         return;                         /* No file to load. */
309                 }
310                 ereport(ERROR,
311                                 (errcode_for_file_access(),
312                                  errmsg("could not read file \"%s\": %m",
313                                                 AUTOPREWARM_FILE)));
314         }
315
316         /* First line of the file is a record count. */
317         if (fscanf(file, "<<%d>>\n", &num_elements) != 1)
318                 ereport(ERROR,
319                                 (errcode_for_file_access(),
320                                  errmsg("could not read from file \"%s\": %m",
321                                                 AUTOPREWARM_FILE)));
322
323         /* Allocate a dynamic shared memory segment to store the record data. */
324         seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
325         blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
326
327         /* Read records, one per line. */
328         for (i = 0; i < num_elements; i++)
329         {
330                 unsigned        forknum;
331
332                 if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
333                                    &blkinfo[i].tablespace, &blkinfo[i].filenode,
334                                    &forknum, &blkinfo[i].blocknum) != 5)
335                         ereport(ERROR,
336                                         (errmsg("autoprewarm block dump file is corrupted at line %d",
337                                                         i + 1)));
338                 blkinfo[i].forknum = forknum;
339         }
340
341         FreeFile(file);
342
343         /* Sort the blocks to be loaded. */
344         pg_qsort(blkinfo, num_elements, sizeof(BlockInfoRecord),
345                          apw_compare_blockinfo);
346
347         /* Populate shared memory state. */
348         apw_state->block_info_handle = dsm_segment_handle(seg);
349         apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
350         apw_state->prewarmed_blocks = 0;
351
352         /* Get the info position of the first block of the next database. */
353         while (apw_state->prewarm_start_idx < num_elements)
354         {
355                 int                     j = apw_state->prewarm_start_idx;
356                 Oid                     current_db = blkinfo[j].database;
357
358                 /*
359                  * Advance the prewarm_stop_idx to the first BlockRecordInfo that does
360                  * not belong to this database.
361                  */
362                 j++;
363                 while (j < num_elements)
364                 {
365                         if (current_db != blkinfo[j].database)
366                         {
367                                 /*
368                                  * Combine BlockRecordInfos for global objects with those of
369                                  * the database.
370                                  */
371                                 if (current_db != InvalidOid)
372                                         break;
373                                 current_db = blkinfo[j].database;
374                         }
375
376                         j++;
377                 }
378
379                 /*
380                  * If we reach this point with current_db == InvalidOid, then only
381                  * BlockRecordInfos belonging to global objects exist.  We can't
382                  * prewarm without a database connection, so just bail out.
383                  */
384                 if (current_db == InvalidOid)
385                         break;
386
387                 /* Configure stop point and database for next per-database worker. */
388                 apw_state->prewarm_stop_idx = j;
389                 apw_state->database = current_db;
390                 Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
391
392                 /* If we've run out of free buffers, don't launch another worker. */
393                 if (!have_free_buffer())
394                         break;
395
396                 /*
397                  * Start a per-database worker to load blocks for this database; this
398                  * function will return once the per-database worker exits.
399                  */
400                 apw_start_database_worker();
401
402                 /* Prepare for next database. */
403                 apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx;
404         }
405
406         /* Clean up. */
407         dsm_detach(seg);
408         LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
409         apw_state->block_info_handle = DSM_HANDLE_INVALID;
410         apw_state->pid_using_dumpfile = InvalidPid;
411         LWLockRelease(&apw_state->lock);
412
413         /* Report our success. */
414         ereport(LOG,
415                         (errmsg("autoprewarm successfully prewarmed %d of %d previously-loaded blocks",
416                                         apw_state->prewarmed_blocks, num_elements)));
417 }
418
419 /*
420  * Prewarm all blocks for one database (and possibly also global objects, if
421  * those got grouped with this database).
422  */
423 void
424 autoprewarm_database_main(Datum main_arg)
425 {
426         int                     pos;
427         BlockInfoRecord *block_info;
428         Relation        rel = NULL;
429         BlockNumber nblocks = 0;
430         BlockInfoRecord *old_blk = NULL;
431         dsm_segment *seg;
432
433         /* Establish signal handlers; once that's done, unblock signals. */
434         pqsignal(SIGTERM, die);
435         BackgroundWorkerUnblockSignals();
436
437         /* Connect to correct database and get block information. */
438         apw_init_shmem();
439         seg = dsm_attach(apw_state->block_info_handle);
440         if (seg == NULL)
441                 ereport(ERROR,
442                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
443                                  errmsg("could not map dynamic shared memory segment")));
444         BackgroundWorkerInitializeConnectionByOid(apw_state->database, InvalidOid, 0);
445         block_info = (BlockInfoRecord *) dsm_segment_address(seg);
446         pos = apw_state->prewarm_start_idx;
447
448         /*
449          * Loop until we run out of blocks to prewarm or until we run out of free
450          * buffers.
451          */
452         while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
453         {
454                 BlockInfoRecord *blk = &block_info[pos++];
455                 Buffer          buf;
456
457                 CHECK_FOR_INTERRUPTS();
458
459                 /*
460                  * Quit if we've reached records for another database. If previous
461                  * blocks are of some global objects, then continue pre-warming.
462                  */
463                 if (old_blk != NULL && old_blk->database != blk->database &&
464                         old_blk->database != 0)
465                         break;
466
467                 /*
468                  * As soon as we encounter a block of a new relation, close the old
469                  * relation. Note that rel will be NULL if try_relation_open failed
470                  * previously; in that case, there is nothing to close.
471                  */
472                 if (old_blk != NULL && old_blk->filenode != blk->filenode &&
473                         rel != NULL)
474                 {
475                         relation_close(rel, AccessShareLock);
476                         rel = NULL;
477                         CommitTransactionCommand();
478                 }
479
480                 /*
481                  * Try to open each new relation, but only once, when we first
482                  * encounter it. If it's been dropped, skip the associated blocks.
483                  */
484                 if (old_blk == NULL || old_blk->filenode != blk->filenode)
485                 {
486                         Oid                     reloid;
487
488                         Assert(rel == NULL);
489                         StartTransactionCommand();
490                         reloid = RelidByRelfilenode(blk->tablespace, blk->filenode);
491                         if (OidIsValid(reloid))
492                                 rel = try_relation_open(reloid, AccessShareLock);
493
494                         if (!rel)
495                                 CommitTransactionCommand();
496                 }
497                 if (!rel)
498                 {
499                         old_blk = blk;
500                         continue;
501                 }
502
503                 /* Once per fork, check for fork existence and size. */
504                 if (old_blk == NULL ||
505                         old_blk->filenode != blk->filenode ||
506                         old_blk->forknum != blk->forknum)
507                 {
508                         RelationOpenSmgr(rel);
509
510                         /*
511                          * smgrexists is not safe for illegal forknum, hence check whether
512                          * the passed forknum is valid before using it in smgrexists.
513                          */
514                         if (blk->forknum > InvalidForkNumber &&
515                                 blk->forknum <= MAX_FORKNUM &&
516                                 smgrexists(rel->rd_smgr, blk->forknum))
517                                 nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
518                         else
519                                 nblocks = 0;
520                 }
521
522                 /* Check whether blocknum is valid and within fork file size. */
523                 if (blk->blocknum >= nblocks)
524                 {
525                         /* Move to next forknum. */
526                         old_blk = blk;
527                         continue;
528                 }
529
530                 /* Prewarm buffer. */
531                 buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
532                                                                  NULL);
533                 if (BufferIsValid(buf))
534                 {
535                         apw_state->prewarmed_blocks++;
536                         ReleaseBuffer(buf);
537                 }
538
539                 old_blk = blk;
540         }
541
542         dsm_detach(seg);
543
544         /* Release lock on previous relation. */
545         if (rel)
546         {
547                 relation_close(rel, AccessShareLock);
548                 CommitTransactionCommand();
549         }
550 }
551
552 /*
553  * Dump information on blocks in shared buffers.  We use a text format here
554  * so that it's easy to understand and even change the file contents if
555  * necessary.
556  * Returns the number of blocks dumped.
557  */
558 static int
559 apw_dump_now(bool is_bgworker, bool dump_unlogged)
560 {
561         int                     num_blocks;
562         int                     i;
563         int                     ret;
564         BlockInfoRecord *block_info_array;
565         BufferDesc *bufHdr;
566         FILE       *file;
567         char            transient_dump_file_path[MAXPGPATH];
568         pid_t           pid;
569
570         LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
571         pid = apw_state->pid_using_dumpfile;
572         if (apw_state->pid_using_dumpfile == InvalidPid)
573                 apw_state->pid_using_dumpfile = MyProcPid;
574         LWLockRelease(&apw_state->lock);
575
576         if (pid != InvalidPid)
577         {
578                 if (!is_bgworker)
579                         ereport(ERROR,
580                                         (errmsg("could not perform block dump because dump file is being used by PID %lu",
581                                                         (unsigned long) apw_state->pid_using_dumpfile)));
582
583                 ereport(LOG,
584                                 (errmsg("skipping block dump because it is already being performed by PID %lu",
585                                                 (unsigned long) apw_state->pid_using_dumpfile)));
586                 return 0;
587         }
588
589         block_info_array =
590                 (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
591
592         for (num_blocks = 0, i = 0; i < NBuffers; i++)
593         {
594                 uint32          buf_state;
595
596                 CHECK_FOR_INTERRUPTS();
597
598                 bufHdr = GetBufferDescriptor(i);
599
600                 /* Lock each buffer header before inspecting. */
601                 buf_state = LockBufHdr(bufHdr);
602
603                 /*
604                  * Unlogged tables will be automatically truncated after a crash or
605                  * unclean shutdown. In such cases we need not prewarm them. Dump them
606                  * only if requested by caller.
607                  */
608                 if (buf_state & BM_TAG_VALID &&
609                         ((buf_state & BM_PERMANENT) || dump_unlogged))
610                 {
611                         block_info_array[num_blocks].database = bufHdr->tag.rnode.dbNode;
612                         block_info_array[num_blocks].tablespace = bufHdr->tag.rnode.spcNode;
613                         block_info_array[num_blocks].filenode = bufHdr->tag.rnode.relNode;
614                         block_info_array[num_blocks].forknum = bufHdr->tag.forkNum;
615                         block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
616                         ++num_blocks;
617                 }
618
619                 UnlockBufHdr(bufHdr, buf_state);
620         }
621
622         snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
623         file = AllocateFile(transient_dump_file_path, "w");
624         if (!file)
625                 ereport(ERROR,
626                                 (errcode_for_file_access(),
627                                  errmsg("could not open file \"%s\": %m",
628                                                 transient_dump_file_path)));
629
630         ret = fprintf(file, "<<%d>>\n", num_blocks);
631         if (ret < 0)
632         {
633                 int                     save_errno = errno;
634
635                 FreeFile(file);
636                 unlink(transient_dump_file_path);
637                 errno = save_errno;
638                 ereport(ERROR,
639                                 (errcode_for_file_access(),
640                                  errmsg("could not write to file \"%s\": %m",
641                                                 transient_dump_file_path)));
642         }
643
644         for (i = 0; i < num_blocks; i++)
645         {
646                 CHECK_FOR_INTERRUPTS();
647
648                 ret = fprintf(file, "%u,%u,%u,%u,%u\n",
649                                           block_info_array[i].database,
650                                           block_info_array[i].tablespace,
651                                           block_info_array[i].filenode,
652                                           (uint32) block_info_array[i].forknum,
653                                           block_info_array[i].blocknum);
654                 if (ret < 0)
655                 {
656                         int                     save_errno = errno;
657
658                         FreeFile(file);
659                         unlink(transient_dump_file_path);
660                         errno = save_errno;
661                         ereport(ERROR,
662                                         (errcode_for_file_access(),
663                                          errmsg("could not write to file \"%s\": %m",
664                                                         transient_dump_file_path)));
665                 }
666         }
667
668         pfree(block_info_array);
669
670         /*
671          * Rename transient_dump_file_path to AUTOPREWARM_FILE to make things
672          * permanent.
673          */
674         ret = FreeFile(file);
675         if (ret != 0)
676         {
677                 int                     save_errno = errno;
678
679                 unlink(transient_dump_file_path);
680                 errno = save_errno;
681                 ereport(ERROR,
682                                 (errcode_for_file_access(),
683                                  errmsg("could not close file \"%s\": %m",
684                                                 transient_dump_file_path)));
685         }
686
687         (void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
688         apw_state->pid_using_dumpfile = InvalidPid;
689
690         ereport(DEBUG1,
691                         (errmsg("wrote block details for %d blocks", num_blocks)));
692         return num_blocks;
693 }
694
695 /*
696  * SQL-callable function to launch autoprewarm.
697  */
698 Datum
699 autoprewarm_start_worker(PG_FUNCTION_ARGS)
700 {
701         pid_t           pid;
702
703         if (!autoprewarm)
704                 ereport(ERROR,
705                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
706                                  errmsg("autoprewarm is disabled")));
707
708         apw_init_shmem();
709         LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
710         pid = apw_state->bgworker_pid;
711         LWLockRelease(&apw_state->lock);
712
713         if (pid != InvalidPid)
714                 ereport(ERROR,
715                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
716                                  errmsg("autoprewarm worker is already running under PID %lu",
717                                                 (unsigned long) pid)));
718
719         apw_start_master_worker();
720
721         PG_RETURN_VOID();
722 }
723
724 /*
725  * SQL-callable function to perform an immediate block dump.
726  *
727  * Note: this is declared to return int8, as insurance against some
728  * very distant day when we might make NBuffers wider than int.
729  */
730 Datum
731 autoprewarm_dump_now(PG_FUNCTION_ARGS)
732 {
733         int                     num_blocks;
734
735         apw_init_shmem();
736
737         PG_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
738         {
739                 num_blocks = apw_dump_now(false, true);
740         }
741         PG_END_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
742
743         PG_RETURN_INT64((int64) num_blocks);
744 }
745
746 /*
747  * Allocate and initialize autoprewarm related shared memory, if not already
748  * done, and set up backend-local pointer to that state.  Returns true if an
749  * existing shared memory segment was found.
750  */
751 static bool
752 apw_init_shmem(void)
753 {
754         bool            found;
755
756         LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
757         apw_state = ShmemInitStruct("autoprewarm",
758                                                                 sizeof(AutoPrewarmSharedState),
759                                                                 &found);
760         if (!found)
761         {
762                 /* First time through ... */
763                 LWLockInitialize(&apw_state->lock, LWLockNewTrancheId());
764                 apw_state->bgworker_pid = InvalidPid;
765                 apw_state->pid_using_dumpfile = InvalidPid;
766         }
767         LWLockRelease(AddinShmemInitLock);
768
769         LWLockRegisterTranche(apw_state->lock.tranche, "autoprewarm");
770
771         return found;
772 }
773
774 /*
775  * Clear our PID from autoprewarm shared state.
776  */
777 static void
778 apw_detach_shmem(int code, Datum arg)
779 {
780         LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
781         if (apw_state->pid_using_dumpfile == MyProcPid)
782                 apw_state->pid_using_dumpfile = InvalidPid;
783         if (apw_state->bgworker_pid == MyProcPid)
784                 apw_state->bgworker_pid = InvalidPid;
785         LWLockRelease(&apw_state->lock);
786 }
787
788 /*
789  * Start autoprewarm master worker process.
790  */
791 static void
792 apw_start_master_worker(void)
793 {
794         BackgroundWorker worker;
795         BackgroundWorkerHandle *handle;
796         BgwHandleStatus status;
797         pid_t           pid;
798
799         MemSet(&worker, 0, sizeof(BackgroundWorker));
800         worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
801         worker.bgw_start_time = BgWorkerStart_ConsistentState;
802         strcpy(worker.bgw_library_name, "pg_prewarm");
803         strcpy(worker.bgw_function_name, "autoprewarm_main");
804         strcpy(worker.bgw_name, "autoprewarm master");
805         strcpy(worker.bgw_type, "autoprewarm master");
806
807         if (process_shared_preload_libraries_in_progress)
808         {
809                 RegisterBackgroundWorker(&worker);
810                 return;
811         }
812
813         /* must set notify PID to wait for startup */
814         worker.bgw_notify_pid = MyProcPid;
815
816         if (!RegisterDynamicBackgroundWorker(&worker, &handle))
817                 ereport(ERROR,
818                                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
819                                  errmsg("could not register background process"),
820                                  errhint("You may need to increase max_worker_processes.")));
821
822         status = WaitForBackgroundWorkerStartup(handle, &pid);
823         if (status != BGWH_STARTED)
824                 ereport(ERROR,
825                                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
826                                  errmsg("could not start background process"),
827                                  errhint("More details may be available in the server log.")));
828 }
829
830 /*
831  * Start autoprewarm per-database worker process.
832  */
833 static void
834 apw_start_database_worker(void)
835 {
836         BackgroundWorker worker;
837         BackgroundWorkerHandle *handle;
838
839         MemSet(&worker, 0, sizeof(BackgroundWorker));
840         worker.bgw_flags =
841                 BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
842         worker.bgw_start_time = BgWorkerStart_ConsistentState;
843         strcpy(worker.bgw_library_name, "pg_prewarm");
844         strcpy(worker.bgw_function_name, "autoprewarm_database_main");
845         strcpy(worker.bgw_name, "autoprewarm worker");
846         strcpy(worker.bgw_type, "autoprewarm worker");
847
848         /* must set notify PID to wait for shutdown */
849         worker.bgw_notify_pid = MyProcPid;
850
851         if (!RegisterDynamicBackgroundWorker(&worker, &handle))
852                 ereport(ERROR,
853                                 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
854                                  errmsg("registering dynamic bgworker autoprewarm failed"),
855                                  errhint("Consider increasing configuration parameter \"max_worker_processes\".")));
856
857         /*
858          * Ignore return value; if it fails, postmaster has died, but we have
859          * checks for that elsewhere.
860          */
861         WaitForBackgroundWorkerShutdown(handle);
862 }
863
864 /* Compare member elements to check whether they are not equal. */
865 #define cmp_member_elem(fld)    \
866 do { \
867         if (a->fld < b->fld)            \
868                 return -1;                              \
869         else if (a->fld > b->fld)       \
870                 return 1;                               \
871 } while(0)
872
873 /*
874  * apw_compare_blockinfo
875  *
876  * We depend on all records for a particular database being consecutive
877  * in the dump file; each per-database worker will preload blocks until
878  * it sees a block for some other database.  Sorting by tablespace,
879  * filenode, forknum, and blocknum isn't critical for correctness, but
880  * helps us get a sequential I/O pattern.
881  */
882 static int
883 apw_compare_blockinfo(const void *p, const void *q)
884 {
885         const BlockInfoRecord *a = (const BlockInfoRecord *) p;
886         const BlockInfoRecord *b = (const BlockInfoRecord *) q;
887
888         cmp_member_elem(database);
889         cmp_member_elem(tablespace);
890         cmp_member_elem(filenode);
891         cmp_member_elem(forknum);
892         cmp_member_elem(blocknum);
893
894         return 0;
895 }
896
897 /*
898  * Signal handler for SIGTERM
899  */
900 static void
901 apw_sigterm_handler(SIGNAL_ARGS)
902 {
903         int                     save_errno = errno;
904
905         got_sigterm = true;
906
907         if (MyProc)
908                 SetLatch(&MyProc->procLatch);
909
910         errno = save_errno;
911 }
912
913 /*
914  * Signal handler for SIGHUP
915  */
916 static void
917 apw_sighup_handler(SIGNAL_ARGS)
918 {
919         int                     save_errno = errno;
920
921         got_sighup = true;
922
923         if (MyProc)
924                 SetLatch(&MyProc->procLatch);
925
926         errno = save_errno;
927 }