]> granicus.if.org Git - postgresql/blob - src/backend/commands/dbcommands.c
Drop no-longer-needed buffers during ALTER DATABASE SET TABLESPACE.
[postgresql] / src / backend / commands / dbcommands.c
1 /*-------------------------------------------------------------------------
2  *
3  * dbcommands.c
4  *              Database management commands (create/drop database).
5  *
6  * Note: database creation/destruction commands use exclusive locks on
7  * the database objects (as expressed by LockSharedObject()) to avoid
8  * stepping on each others' toes.  Formerly we used table-level locks
9  * on pg_database, but that's too coarse-grained.
10  *
11  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
12  * Portions Copyright (c) 1994, Regents of the University of California
13  *
14  *
15  * IDENTIFICATION
16  *        src/backend/commands/dbcommands.c
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21
22 #include <fcntl.h>
23 #include <locale.h>
24 #include <unistd.h>
25 #include <sys/stat.h>
26
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/xact.h"
31 #include "access/xlogutils.h"
32 #include "catalog/catalog.h"
33 #include "catalog/dependency.h"
34 #include "catalog/indexing.h"
35 #include "catalog/objectaccess.h"
36 #include "catalog/pg_authid.h"
37 #include "catalog/pg_database.h"
38 #include "catalog/pg_db_role_setting.h"
39 #include "catalog/pg_tablespace.h"
40 #include "commands/comment.h"
41 #include "commands/dbcommands.h"
42 #include "commands/defrem.h"
43 #include "commands/seclabel.h"
44 #include "commands/tablespace.h"
45 #include "mb/pg_wchar.h"
46 #include "miscadmin.h"
47 #include "pgstat.h"
48 #include "postmaster/bgwriter.h"
49 #include "replication/slot.h"
50 #include "storage/copydir.h"
51 #include "storage/fd.h"
52 #include "storage/lmgr.h"
53 #include "storage/ipc.h"
54 #include "storage/procarray.h"
55 #include "storage/smgr.h"
56 #include "utils/acl.h"
57 #include "utils/builtins.h"
58 #include "utils/fmgroids.h"
59 #include "utils/pg_locale.h"
60 #include "utils/snapmgr.h"
61 #include "utils/syscache.h"
62 #include "utils/tqual.h"
63
64
65 typedef struct
66 {
67         Oid                     src_dboid;              /* source (template) DB */
68         Oid                     dest_dboid;             /* DB we are trying to create */
69 } createdb_failure_params;
70
71 typedef struct
72 {
73         Oid                     dest_dboid;             /* DB we are trying to move */
74         Oid                     dest_tsoid;             /* tablespace we are trying to move to */
75 } movedb_failure_params;
76
77 /* non-export function prototypes */
78 static void createdb_failure_callback(int code, Datum arg);
79 static void movedb(const char *dbname, const char *tblspcname);
80 static void movedb_failure_callback(int code, Datum arg);
81 static bool get_db_info(const char *name, LOCKMODE lockmode,
82                         Oid *dbIdP, Oid *ownerIdP,
83                         int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
84                         Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
85                         MultiXactId *dbMinMultiP,
86                         Oid *dbTablespace, char **dbCollate, char **dbCtype);
87 static bool have_createdb_privilege(void);
88 static void remove_dbtablespaces(Oid db_id);
89 static bool check_db_file_conflict(Oid db_id);
90 static int      errdetail_busy_db(int notherbackends, int npreparedxacts);
91
92
93 /*
94  * CREATE DATABASE
95  */
96 Oid
97 createdb(const CreatedbStmt *stmt)
98 {
99         HeapScanDesc scan;
100         Relation        rel;
101         Oid                     src_dboid;
102         Oid                     src_owner;
103         int                     src_encoding;
104         char       *src_collate;
105         char       *src_ctype;
106         bool            src_istemplate;
107         bool            src_allowconn;
108         Oid                     src_lastsysoid;
109         TransactionId src_frozenxid;
110         MultiXactId src_minmxid;
111         Oid                     src_deftablespace;
112         volatile Oid dst_deftablespace;
113         Relation        pg_database_rel;
114         HeapTuple       tuple;
115         Datum           new_record[Natts_pg_database];
116         bool            new_record_nulls[Natts_pg_database];
117         Oid                     dboid;
118         Oid                     datdba;
119         ListCell   *option;
120         DefElem    *dtablespacename = NULL;
121         DefElem    *downer = NULL;
122         DefElem    *dtemplate = NULL;
123         DefElem    *dencoding = NULL;
124         DefElem    *dcollate = NULL;
125         DefElem    *dctype = NULL;
126         DefElem    *distemplate = NULL;
127         DefElem    *dallowconnections = NULL;
128         DefElem    *dconnlimit = NULL;
129         char       *dbname = stmt->dbname;
130         char       *dbowner = NULL;
131         const char *dbtemplate = NULL;
132         char       *dbcollate = NULL;
133         char       *dbctype = NULL;
134         char       *canonname;
135         int                     encoding = -1;
136         bool            dbistemplate = false;
137         bool            dballowconnections = true;
138         int                     dbconnlimit = -1;
139         int                     notherbackends;
140         int                     npreparedxacts;
141         createdb_failure_params fparms;
142
143         /* Extract options from the statement node tree */
144         foreach(option, stmt->options)
145         {
146                 DefElem    *defel = (DefElem *) lfirst(option);
147
148                 if (strcmp(defel->defname, "tablespace") == 0)
149                 {
150                         if (dtablespacename)
151                                 ereport(ERROR,
152                                                 (errcode(ERRCODE_SYNTAX_ERROR),
153                                                  errmsg("conflicting or redundant options")));
154                         dtablespacename = defel;
155                 }
156                 else if (strcmp(defel->defname, "owner") == 0)
157                 {
158                         if (downer)
159                                 ereport(ERROR,
160                                                 (errcode(ERRCODE_SYNTAX_ERROR),
161                                                  errmsg("conflicting or redundant options")));
162                         downer = defel;
163                 }
164                 else if (strcmp(defel->defname, "template") == 0)
165                 {
166                         if (dtemplate)
167                                 ereport(ERROR,
168                                                 (errcode(ERRCODE_SYNTAX_ERROR),
169                                                  errmsg("conflicting or redundant options")));
170                         dtemplate = defel;
171                 }
172                 else if (strcmp(defel->defname, "encoding") == 0)
173                 {
174                         if (dencoding)
175                                 ereport(ERROR,
176                                                 (errcode(ERRCODE_SYNTAX_ERROR),
177                                                  errmsg("conflicting or redundant options")));
178                         dencoding = defel;
179                 }
180                 else if (strcmp(defel->defname, "lc_collate") == 0)
181                 {
182                         if (dcollate)
183                                 ereport(ERROR,
184                                                 (errcode(ERRCODE_SYNTAX_ERROR),
185                                                  errmsg("conflicting or redundant options")));
186                         dcollate = defel;
187                 }
188                 else if (strcmp(defel->defname, "lc_ctype") == 0)
189                 {
190                         if (dctype)
191                                 ereport(ERROR,
192                                                 (errcode(ERRCODE_SYNTAX_ERROR),
193                                                  errmsg("conflicting or redundant options")));
194                         dctype = defel;
195                 }
196                 else if (strcmp(defel->defname, "is_template") == 0)
197                 {
198                         if (distemplate)
199                                 ereport(ERROR,
200                                                 (errcode(ERRCODE_SYNTAX_ERROR),
201                                                  errmsg("conflicting or redundant options")));
202                         distemplate = defel;
203                 }
204                 else if (strcmp(defel->defname, "allow_connections") == 0)
205                 {
206                         if (dallowconnections)
207                                 ereport(ERROR,
208                                                 (errcode(ERRCODE_SYNTAX_ERROR),
209                                                  errmsg("conflicting or redundant options")));
210                         dallowconnections = defel;
211                 }
212                 else if (strcmp(defel->defname, "connection_limit") == 0)
213                 {
214                         if (dconnlimit)
215                                 ereport(ERROR,
216                                                 (errcode(ERRCODE_SYNTAX_ERROR),
217                                                  errmsg("conflicting or redundant options")));
218                         dconnlimit = defel;
219                 }
220                 else if (strcmp(defel->defname, "location") == 0)
221                 {
222                         ereport(WARNING,
223                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
224                                          errmsg("LOCATION is not supported anymore"),
225                                          errhint("Consider using tablespaces instead.")));
226                 }
227                 else
228                         ereport(ERROR,
229                                         (errcode(ERRCODE_SYNTAX_ERROR),
230                                          errmsg("option \"%s\" not recognized", defel->defname)));
231         }
232
233         if (downer && downer->arg)
234                 dbowner = defGetString(downer);
235         if (dtemplate && dtemplate->arg)
236                 dbtemplate = defGetString(dtemplate);
237         if (dencoding && dencoding->arg)
238         {
239                 const char *encoding_name;
240
241                 if (IsA(dencoding->arg, Integer))
242                 {
243                         encoding = defGetInt32(dencoding);
244                         encoding_name = pg_encoding_to_char(encoding);
245                         if (strcmp(encoding_name, "") == 0 ||
246                                 pg_valid_server_encoding(encoding_name) < 0)
247                                 ereport(ERROR,
248                                                 (errcode(ERRCODE_UNDEFINED_OBJECT),
249                                                  errmsg("%d is not a valid encoding code",
250                                                                 encoding)));
251                 }
252                 else
253                 {
254                         encoding_name = defGetString(dencoding);
255                         encoding = pg_valid_server_encoding(encoding_name);
256                         if (encoding < 0)
257                                 ereport(ERROR,
258                                                 (errcode(ERRCODE_UNDEFINED_OBJECT),
259                                                  errmsg("%s is not a valid encoding name",
260                                                                 encoding_name)));
261                 }
262         }
263         if (dcollate && dcollate->arg)
264                 dbcollate = defGetString(dcollate);
265         if (dctype && dctype->arg)
266                 dbctype = defGetString(dctype);
267         if (distemplate && distemplate->arg)
268                 dbistemplate = defGetBoolean(distemplate);
269         if (dallowconnections && dallowconnections->arg)
270                 dballowconnections = defGetBoolean(dallowconnections);
271         if (dconnlimit && dconnlimit->arg)
272         {
273                 dbconnlimit = defGetInt32(dconnlimit);
274                 if (dbconnlimit < -1)
275                         ereport(ERROR,
276                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
277                                          errmsg("invalid connection limit: %d", dbconnlimit)));
278         }
279
280         /* obtain OID of proposed owner */
281         if (dbowner)
282                 datdba = get_role_oid(dbowner, false);
283         else
284                 datdba = GetUserId();
285
286         /*
287          * To create a database, must have createdb privilege and must be able to
288          * become the target role (this does not imply that the target role itself
289          * must have createdb privilege).  The latter provision guards against
290          * "giveaway" attacks.  Note that a superuser will always have both of
291          * these privileges a fortiori.
292          */
293         if (!have_createdb_privilege())
294                 ereport(ERROR,
295                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
296                                  errmsg("permission denied to create database")));
297
298         check_is_member_of_role(GetUserId(), datdba);
299
300         /*
301          * Lookup database (template) to be cloned, and obtain share lock on it.
302          * ShareLock allows two CREATE DATABASEs to work from the same template
303          * concurrently, while ensuring no one is busy dropping it in parallel
304          * (which would be Very Bad since we'd likely get an incomplete copy
305          * without knowing it).  This also prevents any new connections from being
306          * made to the source until we finish copying it, so we can be sure it
307          * won't change underneath us.
308          */
309         if (!dbtemplate)
310                 dbtemplate = "template1";               /* Default template database name */
311
312         if (!get_db_info(dbtemplate, ShareLock,
313                                          &src_dboid, &src_owner, &src_encoding,
314                                          &src_istemplate, &src_allowconn, &src_lastsysoid,
315                                          &src_frozenxid, &src_minmxid, &src_deftablespace,
316                                          &src_collate, &src_ctype))
317                 ereport(ERROR,
318                                 (errcode(ERRCODE_UNDEFINED_DATABASE),
319                                  errmsg("template database \"%s\" does not exist",
320                                                 dbtemplate)));
321
322         /*
323          * Permission check: to copy a DB that's not marked datistemplate, you
324          * must be superuser or the owner thereof.
325          */
326         if (!src_istemplate)
327         {
328                 if (!pg_database_ownercheck(src_dboid, GetUserId()))
329                         ereport(ERROR,
330                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
331                                          errmsg("permission denied to copy database \"%s\"",
332                                                         dbtemplate)));
333         }
334
335         /* If encoding or locales are defaulted, use source's setting */
336         if (encoding < 0)
337                 encoding = src_encoding;
338         if (dbcollate == NULL)
339                 dbcollate = src_collate;
340         if (dbctype == NULL)
341                 dbctype = src_ctype;
342
343         /* Some encodings are client only */
344         if (!PG_VALID_BE_ENCODING(encoding))
345                 ereport(ERROR,
346                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
347                                  errmsg("invalid server encoding %d", encoding)));
348
349         /* Check that the chosen locales are valid, and get canonical spellings */
350         if (!check_locale(LC_COLLATE, dbcollate, &canonname))
351                 ereport(ERROR,
352                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
353                                  errmsg("invalid locale name: \"%s\"", dbcollate)));
354         dbcollate = canonname;
355         if (!check_locale(LC_CTYPE, dbctype, &canonname))
356                 ereport(ERROR,
357                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
358                                  errmsg("invalid locale name: \"%s\"", dbctype)));
359         dbctype = canonname;
360
361         check_encoding_locale_matches(encoding, dbcollate, dbctype);
362
363         /*
364          * Check that the new encoding and locale settings match the source
365          * database.  We insist on this because we simply copy the source data ---
366          * any non-ASCII data would be wrongly encoded, and any indexes sorted
367          * according to the source locale would be wrong.
368          *
369          * However, we assume that template0 doesn't contain any non-ASCII data
370          * nor any indexes that depend on collation or ctype, so template0 can be
371          * used as template for creating a database with any encoding or locale.
372          */
373         if (strcmp(dbtemplate, "template0") != 0)
374         {
375                 if (encoding != src_encoding)
376                         ereport(ERROR,
377                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
378                                          errmsg("new encoding (%s) is incompatible with the encoding of the template database (%s)",
379                                                         pg_encoding_to_char(encoding),
380                                                         pg_encoding_to_char(src_encoding)),
381                                          errhint("Use the same encoding as in the template database, or use template0 as template.")));
382
383                 if (strcmp(dbcollate, src_collate) != 0)
384                         ereport(ERROR,
385                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
386                                          errmsg("new collation (%s) is incompatible with the collation of the template database (%s)",
387                                                         dbcollate, src_collate),
388                                          errhint("Use the same collation as in the template database, or use template0 as template.")));
389
390                 if (strcmp(dbctype, src_ctype) != 0)
391                         ereport(ERROR,
392                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
393                                          errmsg("new LC_CTYPE (%s) is incompatible with the LC_CTYPE of the template database (%s)",
394                                                         dbctype, src_ctype),
395                                          errhint("Use the same LC_CTYPE as in the template database, or use template0 as template.")));
396         }
397
398         /* Resolve default tablespace for new database */
399         if (dtablespacename && dtablespacename->arg)
400         {
401                 char       *tablespacename;
402                 AclResult       aclresult;
403
404                 tablespacename = defGetString(dtablespacename);
405                 dst_deftablespace = get_tablespace_oid(tablespacename, false);
406                 /* check permissions */
407                 aclresult = pg_tablespace_aclcheck(dst_deftablespace, GetUserId(),
408                                                                                    ACL_CREATE);
409                 if (aclresult != ACLCHECK_OK)
410                         aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
411                                                    tablespacename);
412
413                 /* pg_global must never be the default tablespace */
414                 if (dst_deftablespace == GLOBALTABLESPACE_OID)
415                         ereport(ERROR,
416                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
417                                   errmsg("pg_global cannot be used as default tablespace")));
418
419                 /*
420                  * If we are trying to change the default tablespace of the template,
421                  * we require that the template not have any files in the new default
422                  * tablespace.  This is necessary because otherwise the copied
423                  * database would contain pg_class rows that refer to its default
424                  * tablespace both explicitly (by OID) and implicitly (as zero), which
425                  * would cause problems.  For example another CREATE DATABASE using
426                  * the copied database as template, and trying to change its default
427                  * tablespace again, would yield outright incorrect results (it would
428                  * improperly move tables to the new default tablespace that should
429                  * stay in the same tablespace).
430                  */
431                 if (dst_deftablespace != src_deftablespace)
432                 {
433                         char       *srcpath;
434                         struct stat st;
435
436                         srcpath = GetDatabasePath(src_dboid, dst_deftablespace);
437
438                         if (stat(srcpath, &st) == 0 &&
439                                 S_ISDIR(st.st_mode) &&
440                                 !directory_is_empty(srcpath))
441                                 ereport(ERROR,
442                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
443                                                  errmsg("cannot assign new default tablespace \"%s\"",
444                                                                 tablespacename),
445                                                  errdetail("There is a conflict because database \"%s\" already has some tables in this tablespace.",
446                                                                    dbtemplate)));
447                         pfree(srcpath);
448                 }
449         }
450         else
451         {
452                 /* Use template database's default tablespace */
453                 dst_deftablespace = src_deftablespace;
454                 /* Note there is no additional permission check in this path */
455         }
456
457         /*
458          * Check for db name conflict.  This is just to give a more friendly error
459          * message than "unique index violation".  There's a race condition but
460          * we're willing to accept the less friendly message in that case.
461          */
462         if (OidIsValid(get_database_oid(dbname, true)))
463                 ereport(ERROR,
464                                 (errcode(ERRCODE_DUPLICATE_DATABASE),
465                                  errmsg("database \"%s\" already exists", dbname)));
466
467         /*
468          * The source DB can't have any active backends, except this one
469          * (exception is to allow CREATE DB while connected to template1).
470          * Otherwise we might copy inconsistent data.
471          *
472          * This should be last among the basic error checks, because it involves
473          * potential waiting; we may as well throw an error first if we're gonna
474          * throw one.
475          */
476         if (CountOtherDBBackends(src_dboid, &notherbackends, &npreparedxacts))
477                 ereport(ERROR,
478                                 (errcode(ERRCODE_OBJECT_IN_USE),
479                         errmsg("source database \"%s\" is being accessed by other users",
480                                    dbtemplate),
481                                  errdetail_busy_db(notherbackends, npreparedxacts)));
482
483         /*
484          * Select an OID for the new database, checking that it doesn't have a
485          * filename conflict with anything already existing in the tablespace
486          * directories.
487          */
488         pg_database_rel = heap_open(DatabaseRelationId, RowExclusiveLock);
489
490         do
491         {
492                 dboid = GetNewOid(pg_database_rel);
493         } while (check_db_file_conflict(dboid));
494
495         /*
496          * Insert a new tuple into pg_database.  This establishes our ownership of
497          * the new database name (anyone else trying to insert the same name will
498          * block on the unique index, and fail after we commit).
499          */
500
501         /* Form tuple */
502         MemSet(new_record, 0, sizeof(new_record));
503         MemSet(new_record_nulls, false, sizeof(new_record_nulls));
504
505         new_record[Anum_pg_database_datname - 1] =
506                 DirectFunctionCall1(namein, CStringGetDatum(dbname));
507         new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba);
508         new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding);
509         new_record[Anum_pg_database_datcollate - 1] =
510                 DirectFunctionCall1(namein, CStringGetDatum(dbcollate));
511         new_record[Anum_pg_database_datctype - 1] =
512                 DirectFunctionCall1(namein, CStringGetDatum(dbctype));
513         new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
514         new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
515         new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
516         new_record[Anum_pg_database_datlastsysoid - 1] = ObjectIdGetDatum(src_lastsysoid);
517         new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
518         new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid);
519         new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
520
521         /*
522          * We deliberately set datacl to default (NULL), rather than copying it
523          * from the template database.  Copying it would be a bad idea when the
524          * owner is not the same as the template's owner.
525          */
526         new_record_nulls[Anum_pg_database_datacl - 1] = true;
527
528         tuple = heap_form_tuple(RelationGetDescr(pg_database_rel),
529                                                         new_record, new_record_nulls);
530
531         HeapTupleSetOid(tuple, dboid);
532
533         simple_heap_insert(pg_database_rel, tuple);
534
535         /* Update indexes */
536         CatalogUpdateIndexes(pg_database_rel, tuple);
537
538         /*
539          * Now generate additional catalog entries associated with the new DB
540          */
541
542         /* Register owner dependency */
543         recordDependencyOnOwner(DatabaseRelationId, dboid, datdba);
544
545         /* Create pg_shdepend entries for objects within database */
546         copyTemplateDependencies(src_dboid, dboid);
547
548         /* Post creation hook for new database */
549         InvokeObjectPostCreateHook(DatabaseRelationId, dboid, 0);
550
551         /*
552          * Force a checkpoint before starting the copy. This will force all dirty
553          * buffers, including those of unlogged tables, out to disk, to ensure
554          * source database is up-to-date on disk for the copy.
555          * FlushDatabaseBuffers() would suffice for that, but we also want
556          * to process any pending unlink requests. Otherwise, if a checkpoint
557          * happened while we're copying files, a file might be deleted just when
558          * we're about to copy it, causing the lstat() call in copydir() to fail
559          * with ENOENT.
560          */
561         RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
562                                           | CHECKPOINT_FLUSH_ALL);
563
564         /*
565          * Once we start copying subdirectories, we need to be able to clean 'em
566          * up if we fail.  Use an ENSURE block to make sure this happens.  (This
567          * is not a 100% solution, because of the possibility of failure during
568          * transaction commit after we leave this routine, but it should handle
569          * most scenarios.)
570          */
571         fparms.src_dboid = src_dboid;
572         fparms.dest_dboid = dboid;
573         PG_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
574                                                         PointerGetDatum(&fparms));
575         {
576                 /*
577                  * Iterate through all tablespaces of the template database, and copy
578                  * each one to the new database.
579                  */
580                 rel = heap_open(TableSpaceRelationId, AccessShareLock);
581                 scan = heap_beginscan_catalog(rel, 0, NULL);
582                 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
583                 {
584                         Oid                     srctablespace = HeapTupleGetOid(tuple);
585                         Oid                     dsttablespace;
586                         char       *srcpath;
587                         char       *dstpath;
588                         struct stat st;
589
590                         /* No need to copy global tablespace */
591                         if (srctablespace == GLOBALTABLESPACE_OID)
592                                 continue;
593
594                         srcpath = GetDatabasePath(src_dboid, srctablespace);
595
596                         if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
597                                 directory_is_empty(srcpath))
598                         {
599                                 /* Assume we can ignore it */
600                                 pfree(srcpath);
601                                 continue;
602                         }
603
604                         if (srctablespace == src_deftablespace)
605                                 dsttablespace = dst_deftablespace;
606                         else
607                                 dsttablespace = srctablespace;
608
609                         dstpath = GetDatabasePath(dboid, dsttablespace);
610
611                         /*
612                          * Copy this subdirectory to the new location
613                          *
614                          * We don't need to copy subdirectories
615                          */
616                         copydir(srcpath, dstpath, false);
617
618                         /* Record the filesystem change in XLOG */
619                         {
620                                 xl_dbase_create_rec xlrec;
621                                 XLogRecData rdata[1];
622
623                                 xlrec.db_id = dboid;
624                                 xlrec.tablespace_id = dsttablespace;
625                                 xlrec.src_db_id = src_dboid;
626                                 xlrec.src_tablespace_id = srctablespace;
627
628                                 rdata[0].data = (char *) &xlrec;
629                                 rdata[0].len = sizeof(xl_dbase_create_rec);
630                                 rdata[0].buffer = InvalidBuffer;
631                                 rdata[0].next = NULL;
632
633                                 (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
634                         }
635                 }
636                 heap_endscan(scan);
637                 heap_close(rel, AccessShareLock);
638
639                 /*
640                  * We force a checkpoint before committing.  This effectively means
641                  * that committed XLOG_DBASE_CREATE operations will never need to be
642                  * replayed (at least not in ordinary crash recovery; we still have to
643                  * make the XLOG entry for the benefit of PITR operations). This
644                  * avoids two nasty scenarios:
645                  *
646                  * #1: When PITR is off, we don't XLOG the contents of newly created
647                  * indexes; therefore the drop-and-recreate-whole-directory behavior
648                  * of DBASE_CREATE replay would lose such indexes.
649                  *
650                  * #2: Since we have to recopy the source database during DBASE_CREATE
651                  * replay, we run the risk of copying changes in it that were
652                  * committed after the original CREATE DATABASE command but before the
653                  * system crash that led to the replay.  This is at least unexpected
654                  * and at worst could lead to inconsistencies, eg duplicate table
655                  * names.
656                  *
657                  * (Both of these were real bugs in releases 8.0 through 8.0.3.)
658                  *
659                  * In PITR replay, the first of these isn't an issue, and the second
660                  * is only a risk if the CREATE DATABASE and subsequent template
661                  * database change both occur while a base backup is being taken.
662                  * There doesn't seem to be much we can do about that except document
663                  * it as a limitation.
664                  *
665                  * Perhaps if we ever implement CREATE DATABASE in a less cheesy way,
666                  * we can avoid this.
667                  */
668                 RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
669
670                 /*
671                  * Close pg_database, but keep lock till commit.
672                  */
673                 heap_close(pg_database_rel, NoLock);
674
675                 /*
676                  * Force synchronous commit, thus minimizing the window between
677                  * creation of the database files and commital of the transaction. If
678                  * we crash before committing, we'll have a DB that's taking up disk
679                  * space but is not in pg_database, which is not good.
680                  */
681                 ForceSyncCommit();
682         }
683         PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
684                                                                 PointerGetDatum(&fparms));
685
686         return dboid;
687 }
688
689 /*
690  * Check whether chosen encoding matches chosen locale settings.  This
691  * restriction is necessary because libc's locale-specific code usually
692  * fails when presented with data in an encoding it's not expecting. We
693  * allow mismatch in four cases:
694  *
695  * 1. locale encoding = SQL_ASCII, which means that the locale is C/POSIX
696  * which works with any encoding.
697  *
698  * 2. locale encoding = -1, which means that we couldn't determine the
699  * locale's encoding and have to trust the user to get it right.
700  *
701  * 3. selected encoding is UTF8 and platform is win32. This is because
702  * UTF8 is a pseudo codepage that is supported in all locales since it's
703  * converted to UTF16 before being used.
704  *
705  * 4. selected encoding is SQL_ASCII, but only if you're a superuser. This
706  * is risky but we have historically allowed it --- notably, the
707  * regression tests require it.
708  *
709  * Note: if you change this policy, fix initdb to match.
710  */
711 void
712 check_encoding_locale_matches(int encoding, const char *collate, const char *ctype)
713 {
714         int                     ctype_encoding = pg_get_encoding_from_locale(ctype, true);
715         int                     collate_encoding = pg_get_encoding_from_locale(collate, true);
716
717         if (!(ctype_encoding == encoding ||
718                   ctype_encoding == PG_SQL_ASCII ||
719                   ctype_encoding == -1 ||
720 #ifdef WIN32
721                   encoding == PG_UTF8 ||
722 #endif
723                   (encoding == PG_SQL_ASCII && superuser())))
724                 ereport(ERROR,
725                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
726                                  errmsg("encoding \"%s\" does not match locale \"%s\"",
727                                                 pg_encoding_to_char(encoding),
728                                                 ctype),
729                    errdetail("The chosen LC_CTYPE setting requires encoding \"%s\".",
730                                          pg_encoding_to_char(ctype_encoding))));
731
732         if (!(collate_encoding == encoding ||
733                   collate_encoding == PG_SQL_ASCII ||
734                   collate_encoding == -1 ||
735 #ifdef WIN32
736                   encoding == PG_UTF8 ||
737 #endif
738                   (encoding == PG_SQL_ASCII && superuser())))
739                 ereport(ERROR,
740                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
741                                  errmsg("encoding \"%s\" does not match locale \"%s\"",
742                                                 pg_encoding_to_char(encoding),
743                                                 collate),
744                  errdetail("The chosen LC_COLLATE setting requires encoding \"%s\".",
745                                    pg_encoding_to_char(collate_encoding))));
746 }
747
748 /* Error cleanup callback for createdb */
749 static void
750 createdb_failure_callback(int code, Datum arg)
751 {
752         createdb_failure_params *fparms = (createdb_failure_params *) DatumGetPointer(arg);
753
754         /*
755          * Release lock on source database before doing recursive remove. This is
756          * not essential but it seems desirable to release the lock as soon as
757          * possible.
758          */
759         UnlockSharedObject(DatabaseRelationId, fparms->src_dboid, 0, ShareLock);
760
761         /* Throw away any successfully copied subdirectories */
762         remove_dbtablespaces(fparms->dest_dboid);
763 }
764
765
766 /*
767  * DROP DATABASE
768  */
769 void
770 dropdb(const char *dbname, bool missing_ok)
771 {
772         Oid                     db_id;
773         bool            db_istemplate;
774         Relation        pgdbrel;
775         HeapTuple       tup;
776         int                     notherbackends;
777         int                     npreparedxacts;
778         int                     nslots,
779                                 nslots_active;
780
781         /*
782          * Look up the target database's OID, and get exclusive lock on it. We
783          * need this to ensure that no new backend starts up in the target
784          * database while we are deleting it (see postinit.c), and that no one is
785          * using it as a CREATE DATABASE template or trying to delete it for
786          * themselves.
787          */
788         pgdbrel = heap_open(DatabaseRelationId, RowExclusiveLock);
789
790         if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
791                                    &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
792         {
793                 if (!missing_ok)
794                 {
795                         ereport(ERROR,
796                                         (errcode(ERRCODE_UNDEFINED_DATABASE),
797                                          errmsg("database \"%s\" does not exist", dbname)));
798                 }
799                 else
800                 {
801                         /* Close pg_database, release the lock, since we changed nothing */
802                         heap_close(pgdbrel, RowExclusiveLock);
803                         ereport(NOTICE,
804                                         (errmsg("database \"%s\" does not exist, skipping",
805                                                         dbname)));
806                         return;
807                 }
808         }
809
810         /*
811          * Permission checks
812          */
813         if (!pg_database_ownercheck(db_id, GetUserId()))
814                 aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE,
815                                            dbname);
816
817         /* DROP hook for the database being removed */
818         InvokeObjectDropHook(DatabaseRelationId, db_id, 0);
819
820         /*
821          * Disallow dropping a DB that is marked istemplate.  This is just to
822          * prevent people from accidentally dropping template0 or template1; they
823          * can do so if they're really determined ...
824          */
825         if (db_istemplate)
826                 ereport(ERROR,
827                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
828                                  errmsg("cannot drop a template database")));
829
830         /* Obviously can't drop my own database */
831         if (db_id == MyDatabaseId)
832                 ereport(ERROR,
833                                 (errcode(ERRCODE_OBJECT_IN_USE),
834                                  errmsg("cannot drop the currently open database")));
835
836         /*
837          * Check whether there are, possibly unconnected, logical slots that refer
838          * to the to-be-dropped database. The database lock we are holding
839          * prevents the creation of new slots using the database.
840          */
841         if (ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active))
842                 ereport(ERROR,
843                                 (errcode(ERRCODE_OBJECT_IN_USE),
844                                  errmsg("database \"%s\" is used by a logical replication slot",
845                                                 dbname),
846                                  errdetail_plural("There is %d slot, %d of them active.",
847                                                                   "There are %d slots, %d of them active.",
848                                                                   nslots,
849                                                                   nslots, nslots_active)));
850
851         /*
852          * Check for other backends in the target database.  (Because we hold the
853          * database lock, no new ones can start after this.)
854          *
855          * As in CREATE DATABASE, check this after other error conditions.
856          */
857         if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
858                 ereport(ERROR,
859                                 (errcode(ERRCODE_OBJECT_IN_USE),
860                                  errmsg("database \"%s\" is being accessed by other users",
861                                                 dbname),
862                                  errdetail_busy_db(notherbackends, npreparedxacts)));
863
864         /*
865          * Remove the database's tuple from pg_database.
866          */
867         tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_id));
868         if (!HeapTupleIsValid(tup))
869                 elog(ERROR, "cache lookup failed for database %u", db_id);
870
871         simple_heap_delete(pgdbrel, &tup->t_self);
872
873         ReleaseSysCache(tup);
874
875         /*
876          * Delete any comments or security labels associated with the database.
877          */
878         DeleteSharedComments(db_id, DatabaseRelationId);
879         DeleteSharedSecurityLabel(db_id, DatabaseRelationId);
880
881         /*
882          * Remove settings associated with this database
883          */
884         DropSetting(db_id, InvalidOid);
885
886         /*
887          * Remove shared dependency references for the database.
888          */
889         dropDatabaseDependencies(db_id);
890
891         /*
892          * Drop pages for this database that are in the shared buffer cache. This
893          * is important to ensure that no remaining backend tries to write out a
894          * dirty buffer to the dead database later...
895          */
896         DropDatabaseBuffers(db_id);
897
898         /*
899          * Tell the stats collector to forget it immediately, too.
900          */
901         pgstat_drop_database(db_id);
902
903         /*
904          * Tell checkpointer to forget any pending fsync and unlink requests for
905          * files in the database; else the fsyncs will fail at next checkpoint, or
906          * worse, it will delete files that belong to a newly created database
907          * with the same OID.
908          */
909         ForgetDatabaseFsyncRequests(db_id);
910
911         /*
912          * Force a checkpoint to make sure the checkpointer has received the
913          * message sent by ForgetDatabaseFsyncRequests. On Windows, this also
914          * ensures that background procs don't hold any open files, which would
915          * cause rmdir() to fail.
916          */
917         RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
918
919         /*
920          * Remove all tablespace subdirs belonging to the database.
921          */
922         remove_dbtablespaces(db_id);
923
924         /*
925          * Close pg_database, but keep lock till commit.
926          */
927         heap_close(pgdbrel, NoLock);
928
929         /*
930          * Force synchronous commit, thus minimizing the window between removal of
931          * the database files and commital of the transaction. If we crash before
932          * committing, we'll have a DB that's gone on disk but still there
933          * according to pg_database, which is not good.
934          */
935         ForceSyncCommit();
936 }
937
938
939 /*
940  * Rename database
941  */
942 Oid
943 RenameDatabase(const char *oldname, const char *newname)
944 {
945         Oid                     db_id;
946         HeapTuple       newtup;
947         Relation        rel;
948         int                     notherbackends;
949         int                     npreparedxacts;
950
951         /*
952          * Look up the target database's OID, and get exclusive lock on it. We
953          * need this for the same reasons as DROP DATABASE.
954          */
955         rel = heap_open(DatabaseRelationId, RowExclusiveLock);
956
957         if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
958                                          NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
959                 ereport(ERROR,
960                                 (errcode(ERRCODE_UNDEFINED_DATABASE),
961                                  errmsg("database \"%s\" does not exist", oldname)));
962
963         /* must be owner */
964         if (!pg_database_ownercheck(db_id, GetUserId()))
965                 aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE,
966                                            oldname);
967
968         /* must have createdb rights */
969         if (!have_createdb_privilege())
970                 ereport(ERROR,
971                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
972                                  errmsg("permission denied to rename database")));
973
974         /*
975          * Make sure the new name doesn't exist.  See notes for same error in
976          * CREATE DATABASE.
977          */
978         if (OidIsValid(get_database_oid(newname, true)))
979                 ereport(ERROR,
980                                 (errcode(ERRCODE_DUPLICATE_DATABASE),
981                                  errmsg("database \"%s\" already exists", newname)));
982
983         /*
984          * XXX Client applications probably store the current database somewhere,
985          * so renaming it could cause confusion.  On the other hand, there may not
986          * be an actual problem besides a little confusion, so think about this
987          * and decide.
988          */
989         if (db_id == MyDatabaseId)
990                 ereport(ERROR,
991                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
992                                  errmsg("current database cannot be renamed")));
993
994         /*
995          * Make sure the database does not have active sessions.  This is the same
996          * concern as above, but applied to other sessions.
997          *
998          * As in CREATE DATABASE, check this after other error conditions.
999          */
1000         if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
1001                 ereport(ERROR,
1002                                 (errcode(ERRCODE_OBJECT_IN_USE),
1003                                  errmsg("database \"%s\" is being accessed by other users",
1004                                                 oldname),
1005                                  errdetail_busy_db(notherbackends, npreparedxacts)));
1006
1007         /* rename */
1008         newtup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
1009         if (!HeapTupleIsValid(newtup))
1010                 elog(ERROR, "cache lookup failed for database %u", db_id);
1011         namestrcpy(&(((Form_pg_database) GETSTRUCT(newtup))->datname), newname);
1012         simple_heap_update(rel, &newtup->t_self, newtup);
1013         CatalogUpdateIndexes(rel, newtup);
1014
1015         InvokeObjectPostAlterHook(DatabaseRelationId, db_id, 0);
1016
1017         /*
1018          * Close pg_database, but keep lock till commit.
1019          */
1020         heap_close(rel, NoLock);
1021
1022         return db_id;
1023 }
1024
1025
1026 /*
1027  * ALTER DATABASE SET TABLESPACE
1028  */
1029 static void
1030 movedb(const char *dbname, const char *tblspcname)
1031 {
1032         Oid                     db_id;
1033         Relation        pgdbrel;
1034         int                     notherbackends;
1035         int                     npreparedxacts;
1036         HeapTuple       oldtuple,
1037                                 newtuple;
1038         Oid                     src_tblspcoid,
1039                                 dst_tblspcoid;
1040         Datum           new_record[Natts_pg_database];
1041         bool            new_record_nulls[Natts_pg_database];
1042         bool            new_record_repl[Natts_pg_database];
1043         ScanKeyData scankey;
1044         SysScanDesc sysscan;
1045         AclResult       aclresult;
1046         char       *src_dbpath;
1047         char       *dst_dbpath;
1048         DIR                *dstdir;
1049         struct dirent *xlde;
1050         movedb_failure_params fparms;
1051
1052         /*
1053          * Look up the target database's OID, and get exclusive lock on it. We
1054          * need this to ensure that no new backend starts up in the database while
1055          * we are moving it, and that no one is using it as a CREATE DATABASE
1056          * template or trying to delete it.
1057          */
1058         pgdbrel = heap_open(DatabaseRelationId, RowExclusiveLock);
1059
1060         if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
1061                                    NULL, NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL))
1062                 ereport(ERROR,
1063                                 (errcode(ERRCODE_UNDEFINED_DATABASE),
1064                                  errmsg("database \"%s\" does not exist", dbname)));
1065
1066         /*
1067          * We actually need a session lock, so that the lock will persist across
1068          * the commit/restart below.  (We could almost get away with letting the
1069          * lock be released at commit, except that someone could try to move
1070          * relations of the DB back into the old directory while we rmtree() it.)
1071          */
1072         LockSharedObjectForSession(DatabaseRelationId, db_id, 0,
1073                                                            AccessExclusiveLock);
1074
1075         /*
1076          * Permission checks
1077          */
1078         if (!pg_database_ownercheck(db_id, GetUserId()))
1079                 aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE,
1080                                            dbname);
1081
1082         /*
1083          * Obviously can't move the tables of my own database
1084          */
1085         if (db_id == MyDatabaseId)
1086                 ereport(ERROR,
1087                                 (errcode(ERRCODE_OBJECT_IN_USE),
1088                                  errmsg("cannot change the tablespace of the currently open database")));
1089
1090         /*
1091          * Get tablespace's oid
1092          */
1093         dst_tblspcoid = get_tablespace_oid(tblspcname, false);
1094
1095         /*
1096          * Permission checks
1097          */
1098         aclresult = pg_tablespace_aclcheck(dst_tblspcoid, GetUserId(),
1099                                                                            ACL_CREATE);
1100         if (aclresult != ACLCHECK_OK)
1101                 aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
1102                                            tblspcname);
1103
1104         /*
1105          * pg_global must never be the default tablespace
1106          */
1107         if (dst_tblspcoid == GLOBALTABLESPACE_OID)
1108                 ereport(ERROR,
1109                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1110                                  errmsg("pg_global cannot be used as default tablespace")));
1111
1112         /*
1113          * No-op if same tablespace
1114          */
1115         if (src_tblspcoid == dst_tblspcoid)
1116         {
1117                 heap_close(pgdbrel, NoLock);
1118                 UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0,
1119                                                                          AccessExclusiveLock);
1120                 return;
1121         }
1122
1123         /*
1124          * Check for other backends in the target database.  (Because we hold the
1125          * database lock, no new ones can start after this.)
1126          *
1127          * As in CREATE DATABASE, check this after other error conditions.
1128          */
1129         if (CountOtherDBBackends(db_id, &notherbackends, &npreparedxacts))
1130                 ereport(ERROR,
1131                                 (errcode(ERRCODE_OBJECT_IN_USE),
1132                                  errmsg("database \"%s\" is being accessed by other users",
1133                                                 dbname),
1134                                  errdetail_busy_db(notherbackends, npreparedxacts)));
1135
1136         /*
1137          * Get old and new database paths
1138          */
1139         src_dbpath = GetDatabasePath(db_id, src_tblspcoid);
1140         dst_dbpath = GetDatabasePath(db_id, dst_tblspcoid);
1141
1142         /*
1143          * Force a checkpoint before proceeding. This will force all dirty
1144          * buffers, including those of unlogged tables, out to disk, to ensure
1145          * source database is up-to-date on disk for the copy.
1146          * FlushDatabaseBuffers() would suffice for that, but we also want to
1147          * process any pending unlink requests. Otherwise, the check for existing
1148          * files in the target directory might fail unnecessarily, not to mention
1149          * that the copy might fail due to source files getting deleted under it.
1150          * On Windows, this also ensures that background procs don't hold any open
1151          * files, which would cause rmdir() to fail.
1152          */
1153         RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
1154                                           | CHECKPOINT_FLUSH_ALL);
1155
1156         /*
1157          * Now drop all buffers holding data of the target database; they should
1158          * no longer be dirty so DropDatabaseBuffers is safe.
1159          *
1160          * It might seem that we could just let these buffers age out of shared
1161          * buffers naturally, since they should not get referenced anymore.  The
1162          * problem with that is that if the user later moves the database back to
1163          * its original tablespace, any still-surviving buffers would appear to
1164          * contain valid data again --- but they'd be missing any changes made in
1165          * the database while it was in the new tablespace.  In any case, freeing
1166          * buffers that should never be used again seems worth the cycles.
1167          *
1168          * Note: it'd be sufficient to get rid of buffers matching db_id and
1169          * src_tblspcoid, but bufmgr.c presently provides no API for that.
1170          */
1171         DropDatabaseBuffers(db_id);
1172
1173         /*
1174          * Check for existence of files in the target directory, i.e., objects of
1175          * this database that are already in the target tablespace.  We can't
1176          * allow the move in such a case, because we would need to change those
1177          * relations' pg_class.reltablespace entries to zero, and we don't have
1178          * access to the DB's pg_class to do so.
1179          */
1180         dstdir = AllocateDir(dst_dbpath);
1181         if (dstdir != NULL)
1182         {
1183                 while ((xlde = ReadDir(dstdir, dst_dbpath)) != NULL)
1184                 {
1185                         if (strcmp(xlde->d_name, ".") == 0 ||
1186                                 strcmp(xlde->d_name, "..") == 0)
1187                                 continue;
1188
1189                         ereport(ERROR,
1190                                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1191                                          errmsg("some relations of database \"%s\" are already in tablespace \"%s\"",
1192                                                         dbname, tblspcname),
1193                                          errhint("You must move them back to the database's default tablespace before using this command.")));
1194                 }
1195
1196                 FreeDir(dstdir);
1197
1198                 /*
1199                  * The directory exists but is empty. We must remove it before using
1200                  * the copydir function.
1201                  */
1202                 if (rmdir(dst_dbpath) != 0)
1203                         elog(ERROR, "could not remove directory \"%s\": %m",
1204                                  dst_dbpath);
1205         }
1206
1207         /*
1208          * Use an ENSURE block to make sure we remove the debris if the copy fails
1209          * (eg, due to out-of-disk-space).  This is not a 100% solution, because
1210          * of the possibility of failure during transaction commit, but it should
1211          * handle most scenarios.
1212          */
1213         fparms.dest_dboid = db_id;
1214         fparms.dest_tsoid = dst_tblspcoid;
1215         PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
1216                                                         PointerGetDatum(&fparms));
1217         {
1218                 /*
1219                  * Copy files from the old tablespace to the new one
1220                  */
1221                 copydir(src_dbpath, dst_dbpath, false);
1222
1223                 /*
1224                  * Record the filesystem change in XLOG
1225                  */
1226                 {
1227                         xl_dbase_create_rec xlrec;
1228                         XLogRecData rdata[1];
1229
1230                         xlrec.db_id = db_id;
1231                         xlrec.tablespace_id = dst_tblspcoid;
1232                         xlrec.src_db_id = db_id;
1233                         xlrec.src_tablespace_id = src_tblspcoid;
1234
1235                         rdata[0].data = (char *) &xlrec;
1236                         rdata[0].len = sizeof(xl_dbase_create_rec);
1237                         rdata[0].buffer = InvalidBuffer;
1238                         rdata[0].next = NULL;
1239
1240                         (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
1241                 }
1242
1243                 /*
1244                  * Update the database's pg_database tuple
1245                  */
1246                 ScanKeyInit(&scankey,
1247                                         Anum_pg_database_datname,
1248                                         BTEqualStrategyNumber, F_NAMEEQ,
1249                                         NameGetDatum(dbname));
1250                 sysscan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true,
1251                                                                          NULL, 1, &scankey);
1252                 oldtuple = systable_getnext(sysscan);
1253                 if (!HeapTupleIsValid(oldtuple))                /* shouldn't happen... */
1254                         ereport(ERROR,
1255                                         (errcode(ERRCODE_UNDEFINED_DATABASE),
1256                                          errmsg("database \"%s\" does not exist", dbname)));
1257
1258                 MemSet(new_record, 0, sizeof(new_record));
1259                 MemSet(new_record_nulls, false, sizeof(new_record_nulls));
1260                 MemSet(new_record_repl, false, sizeof(new_record_repl));
1261
1262                 new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_tblspcoid);
1263                 new_record_repl[Anum_pg_database_dattablespace - 1] = true;
1264
1265                 newtuple = heap_modify_tuple(oldtuple, RelationGetDescr(pgdbrel),
1266                                                                          new_record,
1267                                                                          new_record_nulls, new_record_repl);
1268                 simple_heap_update(pgdbrel, &oldtuple->t_self, newtuple);
1269
1270                 /* Update indexes */
1271                 CatalogUpdateIndexes(pgdbrel, newtuple);
1272
1273                 InvokeObjectPostAlterHook(DatabaseRelationId,
1274                                                                   HeapTupleGetOid(newtuple), 0);
1275
1276                 systable_endscan(sysscan);
1277
1278                 /*
1279                  * Force another checkpoint here.  As in CREATE DATABASE, this is to
1280                  * ensure that we don't have to replay a committed XLOG_DBASE_CREATE
1281                  * operation, which would cause us to lose any unlogged operations
1282                  * done in the new DB tablespace before the next checkpoint.
1283                  */
1284                 RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
1285
1286                 /*
1287                  * Force synchronous commit, thus minimizing the window between
1288                  * copying the database files and commital of the transaction. If we
1289                  * crash before committing, we'll leave an orphaned set of files on
1290                  * disk, which is not fatal but not good either.
1291                  */
1292                 ForceSyncCommit();
1293
1294                 /*
1295                  * Close pg_database, but keep lock till commit.
1296                  */
1297                 heap_close(pgdbrel, NoLock);
1298         }
1299         PG_END_ENSURE_ERROR_CLEANUP(movedb_failure_callback,
1300                                                                 PointerGetDatum(&fparms));
1301
1302         /*
1303          * Commit the transaction so that the pg_database update is committed. If
1304          * we crash while removing files, the database won't be corrupt, we'll
1305          * just leave some orphaned files in the old directory.
1306          *
1307          * (This is OK because we know we aren't inside a transaction block.)
1308          *
1309          * XXX would it be safe/better to do this inside the ensure block?      Not
1310          * convinced it's a good idea; consider elog just after the transaction
1311          * really commits.
1312          */
1313         PopActiveSnapshot();
1314         CommitTransactionCommand();
1315
1316         /* Start new transaction for the remaining work; don't need a snapshot */
1317         StartTransactionCommand();
1318
1319         /*
1320          * Remove files from the old tablespace
1321          */
1322         if (!rmtree(src_dbpath, true))
1323                 ereport(WARNING,
1324                                 (errmsg("some useless files may be left behind in old database directory \"%s\"",
1325                                                 src_dbpath)));
1326
1327         /*
1328          * Record the filesystem change in XLOG
1329          */
1330         {
1331                 xl_dbase_drop_rec xlrec;
1332                 XLogRecData rdata[1];
1333
1334                 xlrec.db_id = db_id;
1335                 xlrec.tablespace_id = src_tblspcoid;
1336
1337                 rdata[0].data = (char *) &xlrec;
1338                 rdata[0].len = sizeof(xl_dbase_drop_rec);
1339                 rdata[0].buffer = InvalidBuffer;
1340                 rdata[0].next = NULL;
1341
1342                 (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
1343         }
1344
1345         /* Now it's safe to release the database lock */
1346         UnlockSharedObjectForSession(DatabaseRelationId, db_id, 0,
1347                                                                  AccessExclusiveLock);
1348 }
1349
1350 /* Error cleanup callback for movedb */
1351 static void
1352 movedb_failure_callback(int code, Datum arg)
1353 {
1354         movedb_failure_params *fparms = (movedb_failure_params *) DatumGetPointer(arg);
1355         char       *dstpath;
1356
1357         /* Get rid of anything we managed to copy to the target directory */
1358         dstpath = GetDatabasePath(fparms->dest_dboid, fparms->dest_tsoid);
1359
1360         (void) rmtree(dstpath, true);
1361 }
1362
1363
1364 /*
1365  * ALTER DATABASE name ...
1366  */
1367 Oid
1368 AlterDatabase(AlterDatabaseStmt *stmt, bool isTopLevel)
1369 {
1370         Relation        rel;
1371         Oid                     dboid;
1372         HeapTuple       tuple,
1373                                 newtuple;
1374         ScanKeyData scankey;
1375         SysScanDesc scan;
1376         ListCell   *option;
1377         bool            dbistemplate = false;
1378         bool            dballowconnections = true;
1379         int                     dbconnlimit = -1;
1380         DefElem    *distemplate = NULL;
1381         DefElem    *dallowconnections = NULL;
1382         DefElem    *dconnlimit = NULL;
1383         DefElem    *dtablespace = NULL;
1384         Datum           new_record[Natts_pg_database];
1385         bool            new_record_nulls[Natts_pg_database];
1386         bool            new_record_repl[Natts_pg_database];
1387
1388         /* Extract options from the statement node tree */
1389         foreach(option, stmt->options)
1390         {
1391                 DefElem    *defel = (DefElem *) lfirst(option);
1392
1393                 if (strcmp(defel->defname, "is_template") == 0)
1394                 {
1395                         if (distemplate)
1396                                 ereport(ERROR,
1397                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1398                                                  errmsg("conflicting or redundant options")));
1399                         distemplate = defel;
1400                 }
1401                 else if (strcmp(defel->defname, "allow_connections") == 0)
1402                 {
1403                         if (dallowconnections)
1404                                 ereport(ERROR,
1405                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1406                                                  errmsg("conflicting or redundant options")));
1407                         dallowconnections = defel;
1408                 }
1409                 else if (strcmp(defel->defname, "connection_limit") == 0)
1410                 {
1411                         if (dconnlimit)
1412                                 ereport(ERROR,
1413                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1414                                                  errmsg("conflicting or redundant options")));
1415                         dconnlimit = defel;
1416                 }
1417                 else if (strcmp(defel->defname, "tablespace") == 0)
1418                 {
1419                         if (dtablespace)
1420                                 ereport(ERROR,
1421                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1422                                                  errmsg("conflicting or redundant options")));
1423                         dtablespace = defel;
1424                 }
1425                 else
1426                         ereport(ERROR,
1427                                         (errcode(ERRCODE_SYNTAX_ERROR),
1428                                          errmsg("option \"%s\" not recognized", defel->defname)));
1429         }
1430
1431         if (dtablespace)
1432         {
1433                 /*
1434                  * While the SET TABLESPACE syntax doesn't allow any other options,
1435                  * somebody could write "WITH TABLESPACE ...".  Forbid any other
1436                  * options from being specified in that case.
1437                  */
1438                 if (list_length(stmt->options) != 1)
1439                         ereport(ERROR,
1440                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1441                            errmsg("option \"%s\" cannot be specified with other options",
1442                                           dtablespace->defname)));
1443                 /* this case isn't allowed within a transaction block */
1444                 PreventTransactionChain(isTopLevel, "ALTER DATABASE SET TABLESPACE");
1445                 movedb(stmt->dbname, defGetString(dtablespace));
1446                 return InvalidOid;
1447         }
1448
1449         if (distemplate && distemplate->arg)
1450                 dbistemplate = defGetBoolean(distemplate);
1451         if (dallowconnections && dallowconnections->arg)
1452                 dballowconnections = defGetBoolean(dallowconnections);
1453         if (dconnlimit && dconnlimit->arg)
1454         {
1455                 dbconnlimit = defGetInt32(dconnlimit);
1456                 if (dbconnlimit < -1)
1457                         ereport(ERROR,
1458                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1459                                          errmsg("invalid connection limit: %d", dbconnlimit)));
1460         }
1461
1462         /*
1463          * Get the old tuple.  We don't need a lock on the database per se,
1464          * because we're not going to do anything that would mess up incoming
1465          * connections.
1466          */
1467         rel = heap_open(DatabaseRelationId, RowExclusiveLock);
1468         ScanKeyInit(&scankey,
1469                                 Anum_pg_database_datname,
1470                                 BTEqualStrategyNumber, F_NAMEEQ,
1471                                 NameGetDatum(stmt->dbname));
1472         scan = systable_beginscan(rel, DatabaseNameIndexId, true,
1473                                                           NULL, 1, &scankey);
1474         tuple = systable_getnext(scan);
1475         if (!HeapTupleIsValid(tuple))
1476                 ereport(ERROR,
1477                                 (errcode(ERRCODE_UNDEFINED_DATABASE),
1478                                  errmsg("database \"%s\" does not exist", stmt->dbname)));
1479
1480         dboid = HeapTupleGetOid(tuple);
1481
1482         if (!pg_database_ownercheck(HeapTupleGetOid(tuple), GetUserId()))
1483                 aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE,
1484                                            stmt->dbname);
1485
1486         /*
1487          * In order to avoid getting locked out and having to go through
1488          * standalone mode, we refuse to disallow connections to the database
1489          * we're currently connected to.  Lockout can still happen with concurrent
1490          * sessions but the likeliness of that is not high enough to worry about.
1491          */
1492         if (!dballowconnections && dboid == MyDatabaseId)
1493                 ereport(ERROR,
1494                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1495                                  errmsg("cannot disallow connections for current database")));
1496
1497         /*
1498          * Build an updated tuple, perusing the information just obtained
1499          */
1500         MemSet(new_record, 0, sizeof(new_record));
1501         MemSet(new_record_nulls, false, sizeof(new_record_nulls));
1502         MemSet(new_record_repl, false, sizeof(new_record_repl));
1503
1504         if (distemplate)
1505         {
1506                 new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
1507                 new_record_repl[Anum_pg_database_datistemplate - 1] = true;
1508         }
1509         if (dallowconnections)
1510         {
1511                 new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
1512                 new_record_repl[Anum_pg_database_datallowconn - 1] = true;
1513         }
1514         if (dconnlimit)
1515         {
1516                 new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
1517                 new_record_repl[Anum_pg_database_datconnlimit - 1] = true;
1518         }
1519
1520         newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), new_record,
1521                                                                  new_record_nulls, new_record_repl);
1522         simple_heap_update(rel, &tuple->t_self, newtuple);
1523
1524         /* Update indexes */
1525         CatalogUpdateIndexes(rel, newtuple);
1526
1527         InvokeObjectPostAlterHook(DatabaseRelationId,
1528                                                           HeapTupleGetOid(newtuple), 0);
1529
1530         systable_endscan(scan);
1531
1532         /* Close pg_database, but keep lock till commit */
1533         heap_close(rel, NoLock);
1534
1535         return dboid;
1536 }
1537
1538
1539 /*
1540  * ALTER DATABASE name SET ...
1541  */
1542 Oid
1543 AlterDatabaseSet(AlterDatabaseSetStmt *stmt)
1544 {
1545         Oid                     datid = get_database_oid(stmt->dbname, false);
1546
1547         /*
1548          * Obtain a lock on the database and make sure it didn't go away in the
1549          * meantime.
1550          */
1551         shdepLockAndCheckObject(DatabaseRelationId, datid);
1552
1553         if (!pg_database_ownercheck(datid, GetUserId()))
1554                 aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE,
1555                                            stmt->dbname);
1556
1557         AlterSetting(datid, InvalidOid, stmt->setstmt);
1558
1559         UnlockSharedObject(DatabaseRelationId, datid, 0, AccessShareLock);
1560
1561         return datid;
1562 }
1563
1564
1565 /*
1566  * ALTER DATABASE name OWNER TO newowner
1567  */
1568 Oid
1569 AlterDatabaseOwner(const char *dbname, Oid newOwnerId)
1570 {
1571         Oid                     db_id;
1572         HeapTuple       tuple;
1573         Relation        rel;
1574         ScanKeyData scankey;
1575         SysScanDesc scan;
1576         Form_pg_database datForm;
1577
1578         /*
1579          * Get the old tuple.  We don't need a lock on the database per se,
1580          * because we're not going to do anything that would mess up incoming
1581          * connections.
1582          */
1583         rel = heap_open(DatabaseRelationId, RowExclusiveLock);
1584         ScanKeyInit(&scankey,
1585                                 Anum_pg_database_datname,
1586                                 BTEqualStrategyNumber, F_NAMEEQ,
1587                                 NameGetDatum(dbname));
1588         scan = systable_beginscan(rel, DatabaseNameIndexId, true,
1589                                                           NULL, 1, &scankey);
1590         tuple = systable_getnext(scan);
1591         if (!HeapTupleIsValid(tuple))
1592                 ereport(ERROR,
1593                                 (errcode(ERRCODE_UNDEFINED_DATABASE),
1594                                  errmsg("database \"%s\" does not exist", dbname)));
1595
1596         db_id = HeapTupleGetOid(tuple);
1597         datForm = (Form_pg_database) GETSTRUCT(tuple);
1598
1599         /*
1600          * If the new owner is the same as the existing owner, consider the
1601          * command to have succeeded.  This is to be consistent with other
1602          * objects.
1603          */
1604         if (datForm->datdba != newOwnerId)
1605         {
1606                 Datum           repl_val[Natts_pg_database];
1607                 bool            repl_null[Natts_pg_database];
1608                 bool            repl_repl[Natts_pg_database];
1609                 Acl                *newAcl;
1610                 Datum           aclDatum;
1611                 bool            isNull;
1612                 HeapTuple       newtuple;
1613
1614                 /* Otherwise, must be owner of the existing object */
1615                 if (!pg_database_ownercheck(HeapTupleGetOid(tuple), GetUserId()))
1616                         aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE,
1617                                                    dbname);
1618
1619                 /* Must be able to become new owner */
1620                 check_is_member_of_role(GetUserId(), newOwnerId);
1621
1622                 /*
1623                  * must have createdb rights
1624                  *
1625                  * NOTE: This is different from other alter-owner checks in that the
1626                  * current user is checked for createdb privileges instead of the
1627                  * destination owner.  This is consistent with the CREATE case for
1628                  * databases.  Because superusers will always have this right, we need
1629                  * no special case for them.
1630                  */
1631                 if (!have_createdb_privilege())
1632                         ereport(ERROR,
1633                                         (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1634                                    errmsg("permission denied to change owner of database")));
1635
1636                 memset(repl_null, false, sizeof(repl_null));
1637                 memset(repl_repl, false, sizeof(repl_repl));
1638
1639                 repl_repl[Anum_pg_database_datdba - 1] = true;
1640                 repl_val[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(newOwnerId);
1641
1642                 /*
1643                  * Determine the modified ACL for the new owner.  This is only
1644                  * necessary when the ACL is non-null.
1645                  */
1646                 aclDatum = heap_getattr(tuple,
1647                                                                 Anum_pg_database_datacl,
1648                                                                 RelationGetDescr(rel),
1649                                                                 &isNull);
1650                 if (!isNull)
1651                 {
1652                         newAcl = aclnewowner(DatumGetAclP(aclDatum),
1653                                                                  datForm->datdba, newOwnerId);
1654                         repl_repl[Anum_pg_database_datacl - 1] = true;
1655                         repl_val[Anum_pg_database_datacl - 1] = PointerGetDatum(newAcl);
1656                 }
1657
1658                 newtuple = heap_modify_tuple(tuple, RelationGetDescr(rel), repl_val, repl_null, repl_repl);
1659                 simple_heap_update(rel, &newtuple->t_self, newtuple);
1660                 CatalogUpdateIndexes(rel, newtuple);
1661
1662                 heap_freetuple(newtuple);
1663
1664                 /* Update owner dependency reference */
1665                 changeDependencyOnOwner(DatabaseRelationId, HeapTupleGetOid(tuple),
1666                                                                 newOwnerId);
1667         }
1668
1669         InvokeObjectPostAlterHook(DatabaseRelationId, HeapTupleGetOid(tuple), 0);
1670
1671         systable_endscan(scan);
1672
1673         /* Close pg_database, but keep lock till commit */
1674         heap_close(rel, NoLock);
1675
1676         return db_id;
1677 }
1678
1679
1680 /*
1681  * Helper functions
1682  */
1683
1684 /*
1685  * Look up info about the database named "name".  If the database exists,
1686  * obtain the specified lock type on it, fill in any of the remaining
1687  * parameters that aren't NULL, and return TRUE.  If no such database,
1688  * return FALSE.
1689  */
1690 static bool
1691 get_db_info(const char *name, LOCKMODE lockmode,
1692                         Oid *dbIdP, Oid *ownerIdP,
1693                         int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
1694                         Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
1695                         MultiXactId *dbMinMultiP,
1696                         Oid *dbTablespace, char **dbCollate, char **dbCtype)
1697 {
1698         bool            result = false;
1699         Relation        relation;
1700
1701         AssertArg(name);
1702
1703         /* Caller may wish to grab a better lock on pg_database beforehand... */
1704         relation = heap_open(DatabaseRelationId, AccessShareLock);
1705
1706         /*
1707          * Loop covers the rare case where the database is renamed before we can
1708          * lock it.  We try again just in case we can find a new one of the same
1709          * name.
1710          */
1711         for (;;)
1712         {
1713                 ScanKeyData scanKey;
1714                 SysScanDesc scan;
1715                 HeapTuple       tuple;
1716                 Oid                     dbOid;
1717
1718                 /*
1719                  * there's no syscache for database-indexed-by-name, so must do it the
1720                  * hard way
1721                  */
1722                 ScanKeyInit(&scanKey,
1723                                         Anum_pg_database_datname,
1724                                         BTEqualStrategyNumber, F_NAMEEQ,
1725                                         NameGetDatum(name));
1726
1727                 scan = systable_beginscan(relation, DatabaseNameIndexId, true,
1728                                                                   NULL, 1, &scanKey);
1729
1730                 tuple = systable_getnext(scan);
1731
1732                 if (!HeapTupleIsValid(tuple))
1733                 {
1734                         /* definitely no database of that name */
1735                         systable_endscan(scan);
1736                         break;
1737                 }
1738
1739                 dbOid = HeapTupleGetOid(tuple);
1740
1741                 systable_endscan(scan);
1742
1743                 /*
1744                  * Now that we have a database OID, we can try to lock the DB.
1745                  */
1746                 if (lockmode != NoLock)
1747                         LockSharedObject(DatabaseRelationId, dbOid, 0, lockmode);
1748
1749                 /*
1750                  * And now, re-fetch the tuple by OID.  If it's still there and still
1751                  * the same name, we win; else, drop the lock and loop back to try
1752                  * again.
1753                  */
1754                 tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbOid));
1755                 if (HeapTupleIsValid(tuple))
1756                 {
1757                         Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
1758
1759                         if (strcmp(name, NameStr(dbform->datname)) == 0)
1760                         {
1761                                 /* oid of the database */
1762                                 if (dbIdP)
1763                                         *dbIdP = dbOid;
1764                                 /* oid of the owner */
1765                                 if (ownerIdP)
1766                                         *ownerIdP = dbform->datdba;
1767                                 /* character encoding */
1768                                 if (encodingP)
1769                                         *encodingP = dbform->encoding;
1770                                 /* allowed as template? */
1771                                 if (dbIsTemplateP)
1772                                         *dbIsTemplateP = dbform->datistemplate;
1773                                 /* allowing connections? */
1774                                 if (dbAllowConnP)
1775                                         *dbAllowConnP = dbform->datallowconn;
1776                                 /* last system OID used in database */
1777                                 if (dbLastSysOidP)
1778                                         *dbLastSysOidP = dbform->datlastsysoid;
1779                                 /* limit of frozen XIDs */
1780                                 if (dbFrozenXidP)
1781                                         *dbFrozenXidP = dbform->datfrozenxid;
1782                                 /* minimum MultixactId */
1783                                 if (dbMinMultiP)
1784                                         *dbMinMultiP = dbform->datminmxid;
1785                                 /* default tablespace for this database */
1786                                 if (dbTablespace)
1787                                         *dbTablespace = dbform->dattablespace;
1788                                 /* default locale settings for this database */
1789                                 if (dbCollate)
1790                                         *dbCollate = pstrdup(NameStr(dbform->datcollate));
1791                                 if (dbCtype)
1792                                         *dbCtype = pstrdup(NameStr(dbform->datctype));
1793                                 ReleaseSysCache(tuple);
1794                                 result = true;
1795                                 break;
1796                         }
1797                         /* can only get here if it was just renamed */
1798                         ReleaseSysCache(tuple);
1799                 }
1800
1801                 if (lockmode != NoLock)
1802                         UnlockSharedObject(DatabaseRelationId, dbOid, 0, lockmode);
1803         }
1804
1805         heap_close(relation, AccessShareLock);
1806
1807         return result;
1808 }
1809
1810 /* Check if current user has createdb privileges */
1811 static bool
1812 have_createdb_privilege(void)
1813 {
1814         bool            result = false;
1815         HeapTuple       utup;
1816
1817         /* Superusers can always do everything */
1818         if (superuser())
1819                 return true;
1820
1821         utup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(GetUserId()));
1822         if (HeapTupleIsValid(utup))
1823         {
1824                 result = ((Form_pg_authid) GETSTRUCT(utup))->rolcreatedb;
1825                 ReleaseSysCache(utup);
1826         }
1827         return result;
1828 }
1829
1830 /*
1831  * Remove tablespace directories
1832  *
1833  * We don't know what tablespaces db_id is using, so iterate through all
1834  * tablespaces removing <tablespace>/db_id
1835  */
1836 static void
1837 remove_dbtablespaces(Oid db_id)
1838 {
1839         Relation        rel;
1840         HeapScanDesc scan;
1841         HeapTuple       tuple;
1842
1843         rel = heap_open(TableSpaceRelationId, AccessShareLock);
1844         scan = heap_beginscan_catalog(rel, 0, NULL);
1845         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1846         {
1847                 Oid                     dsttablespace = HeapTupleGetOid(tuple);
1848                 char       *dstpath;
1849                 struct stat st;
1850
1851                 /* Don't mess with the global tablespace */
1852                 if (dsttablespace == GLOBALTABLESPACE_OID)
1853                         continue;
1854
1855                 dstpath = GetDatabasePath(db_id, dsttablespace);
1856
1857                 if (lstat(dstpath, &st) < 0 || !S_ISDIR(st.st_mode))
1858                 {
1859                         /* Assume we can ignore it */
1860                         pfree(dstpath);
1861                         continue;
1862                 }
1863
1864                 if (!rmtree(dstpath, true))
1865                         ereport(WARNING,
1866                                         (errmsg("some useless files may be left behind in old database directory \"%s\"",
1867                                                         dstpath)));
1868
1869                 /* Record the filesystem change in XLOG */
1870                 {
1871                         xl_dbase_drop_rec xlrec;
1872                         XLogRecData rdata[1];
1873
1874                         xlrec.db_id = db_id;
1875                         xlrec.tablespace_id = dsttablespace;
1876
1877                         rdata[0].data = (char *) &xlrec;
1878                         rdata[0].len = sizeof(xl_dbase_drop_rec);
1879                         rdata[0].buffer = InvalidBuffer;
1880                         rdata[0].next = NULL;
1881
1882                         (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
1883                 }
1884
1885                 pfree(dstpath);
1886         }
1887
1888         heap_endscan(scan);
1889         heap_close(rel, AccessShareLock);
1890 }
1891
1892 /*
1893  * Check for existing files that conflict with a proposed new DB OID;
1894  * return TRUE if there are any
1895  *
1896  * If there were a subdirectory in any tablespace matching the proposed new
1897  * OID, we'd get a create failure due to the duplicate name ... and then we'd
1898  * try to remove that already-existing subdirectory during the cleanup in
1899  * remove_dbtablespaces.  Nuking existing files seems like a bad idea, so
1900  * instead we make this extra check before settling on the OID of the new
1901  * database.  This exactly parallels what GetNewRelFileNode() does for table
1902  * relfilenode values.
1903  */
1904 static bool
1905 check_db_file_conflict(Oid db_id)
1906 {
1907         bool            result = false;
1908         Relation        rel;
1909         HeapScanDesc scan;
1910         HeapTuple       tuple;
1911
1912         rel = heap_open(TableSpaceRelationId, AccessShareLock);
1913         scan = heap_beginscan_catalog(rel, 0, NULL);
1914         while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1915         {
1916                 Oid                     dsttablespace = HeapTupleGetOid(tuple);
1917                 char       *dstpath;
1918                 struct stat st;
1919
1920                 /* Don't mess with the global tablespace */
1921                 if (dsttablespace == GLOBALTABLESPACE_OID)
1922                         continue;
1923
1924                 dstpath = GetDatabasePath(db_id, dsttablespace);
1925
1926                 if (lstat(dstpath, &st) == 0)
1927                 {
1928                         /* Found a conflicting file (or directory, whatever) */
1929                         pfree(dstpath);
1930                         result = true;
1931                         break;
1932                 }
1933
1934                 pfree(dstpath);
1935         }
1936
1937         heap_endscan(scan);
1938         heap_close(rel, AccessShareLock);
1939
1940         return result;
1941 }
1942
1943 /*
1944  * Issue a suitable errdetail message for a busy database
1945  */
1946 static int
1947 errdetail_busy_db(int notherbackends, int npreparedxacts)
1948 {
1949         if (notherbackends > 0 && npreparedxacts > 0)
1950
1951                 /*
1952                  * We don't deal with singular versus plural here, since gettext
1953                  * doesn't support multiple plurals in one string.
1954                  */
1955                 errdetail("There are %d other session(s) and %d prepared transaction(s) using the database.",
1956                                   notherbackends, npreparedxacts);
1957         else if (notherbackends > 0)
1958                 errdetail_plural("There is %d other session using the database.",
1959                                                  "There are %d other sessions using the database.",
1960                                                  notherbackends,
1961                                                  notherbackends);
1962         else
1963                 errdetail_plural("There is %d prepared transaction using the database.",
1964                                         "There are %d prepared transactions using the database.",
1965                                                  npreparedxacts,
1966                                                  npreparedxacts);
1967         return 0;                                       /* just to keep ereport macro happy */
1968 }
1969
1970 /*
1971  * get_database_oid - given a database name, look up the OID
1972  *
1973  * If missing_ok is false, throw an error if database name not found.  If
1974  * true, just return InvalidOid.
1975  */
1976 Oid
1977 get_database_oid(const char *dbname, bool missing_ok)
1978 {
1979         Relation        pg_database;
1980         ScanKeyData entry[1];
1981         SysScanDesc scan;
1982         HeapTuple       dbtuple;
1983         Oid                     oid;
1984
1985         /*
1986          * There's no syscache for pg_database indexed by name, so we must look
1987          * the hard way.
1988          */
1989         pg_database = heap_open(DatabaseRelationId, AccessShareLock);
1990         ScanKeyInit(&entry[0],
1991                                 Anum_pg_database_datname,
1992                                 BTEqualStrategyNumber, F_NAMEEQ,
1993                                 CStringGetDatum(dbname));
1994         scan = systable_beginscan(pg_database, DatabaseNameIndexId, true,
1995                                                           NULL, 1, entry);
1996
1997         dbtuple = systable_getnext(scan);
1998
1999         /* We assume that there can be at most one matching tuple */
2000         if (HeapTupleIsValid(dbtuple))
2001                 oid = HeapTupleGetOid(dbtuple);
2002         else
2003                 oid = InvalidOid;
2004
2005         systable_endscan(scan);
2006         heap_close(pg_database, AccessShareLock);
2007
2008         if (!OidIsValid(oid) && !missing_ok)
2009                 ereport(ERROR,
2010                                 (errcode(ERRCODE_UNDEFINED_DATABASE),
2011                                  errmsg("database \"%s\" does not exist",
2012                                                 dbname)));
2013
2014         return oid;
2015 }
2016
2017
2018 /*
2019  * get_database_name - given a database OID, look up the name
2020  *
2021  * Returns a palloc'd string, or NULL if no such database.
2022  */
2023 char *
2024 get_database_name(Oid dbid)
2025 {
2026         HeapTuple       dbtuple;
2027         char       *result;
2028
2029         dbtuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid));
2030         if (HeapTupleIsValid(dbtuple))
2031         {
2032                 result = pstrdup(NameStr(((Form_pg_database) GETSTRUCT(dbtuple))->datname));
2033                 ReleaseSysCache(dbtuple);
2034         }
2035         else
2036                 result = NULL;
2037
2038         return result;
2039 }
2040
2041 /*
2042  * DATABASE resource manager's routines
2043  */
2044 void
2045 dbase_redo(XLogRecPtr lsn, XLogRecord *record)
2046 {
2047         uint8           info = record->xl_info & ~XLR_INFO_MASK;
2048
2049         /* Backup blocks are not used in dbase records */
2050         Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
2051
2052         if (info == XLOG_DBASE_CREATE)
2053         {
2054                 xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) XLogRecGetData(record);
2055                 char       *src_path;
2056                 char       *dst_path;
2057                 struct stat st;
2058
2059                 src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
2060                 dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
2061
2062                 /*
2063                  * Our theory for replaying a CREATE is to forcibly drop the target
2064                  * subdirectory if present, then re-copy the source data. This may be
2065                  * more work than needed, but it is simple to implement.
2066                  */
2067                 if (stat(dst_path, &st) == 0 && S_ISDIR(st.st_mode))
2068                 {
2069                         if (!rmtree(dst_path, true))
2070                                 /* If this failed, copydir() below is going to error. */
2071                                 ereport(WARNING,
2072                                                 (errmsg("some useless files may be left behind in old database directory \"%s\"",
2073                                                                 dst_path)));
2074                 }
2075
2076                 /*
2077                  * Force dirty buffers out to disk, to ensure source database is
2078                  * up-to-date for the copy.
2079                  */
2080                 FlushDatabaseBuffers(xlrec->src_db_id);
2081
2082                 /*
2083                  * Copy this subdirectory to the new location
2084                  *
2085                  * We don't need to copy subdirectories
2086                  */
2087                 copydir(src_path, dst_path, false);
2088         }
2089         else if (info == XLOG_DBASE_DROP)
2090         {
2091                 xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) XLogRecGetData(record);
2092                 char       *dst_path;
2093
2094                 dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
2095
2096                 if (InHotStandby)
2097                 {
2098                         /*
2099                          * Lock database while we resolve conflicts to ensure that
2100                          * InitPostgres() cannot fully re-execute concurrently. This
2101                          * avoids backends re-connecting automatically to same database,
2102                          * which can happen in some cases.
2103                          */
2104                         LockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock);
2105                         ResolveRecoveryConflictWithDatabase(xlrec->db_id);
2106                 }
2107
2108                 /* Drop pages for this database that are in the shared buffer cache */
2109                 DropDatabaseBuffers(xlrec->db_id);
2110
2111                 /* Also, clean out any fsync requests that might be pending in md.c */
2112                 ForgetDatabaseFsyncRequests(xlrec->db_id);
2113
2114                 /* Clean out the xlog relcache too */
2115                 XLogDropDatabase(xlrec->db_id);
2116
2117                 /* And remove the physical files */
2118                 if (!rmtree(dst_path, true))
2119                         ereport(WARNING,
2120                                         (errmsg("some useless files may be left behind in old database directory \"%s\"",
2121                                                         dst_path)));
2122
2123                 if (InHotStandby)
2124                 {
2125                         /*
2126                          * Release locks prior to commit. XXX There is a race condition
2127                          * here that may allow backends to reconnect, but the window for
2128                          * this is small because the gap between here and commit is mostly
2129                          * fairly small and it is unlikely that people will be dropping
2130                          * databases that we are trying to connect to anyway.
2131                          */
2132                         UnlockSharedObjectForSession(DatabaseRelationId, xlrec->db_id, 0, AccessExclusiveLock);
2133                 }
2134         }
2135         else
2136                 elog(PANIC, "dbase_redo: unknown op code %u", info);
2137 }