Commit 04011cc9 authored by Tom Lane's avatar Tom Lane

Allow backends to start up without use of the flat-file copy of pg_database.

To make this work in the base case, pg_database now has a nailed-in-cache
relation descriptor that is initialized using hardwired knowledge in
relcache.c.  This means pg_database is added to the set of relations that
need to have a Schema_pg_xxx macro maintained in pg_attribute.h.  When this
path is taken, we'll have to do a seqscan of pg_database to find the row
we need.

In the normal case, we are able to do an indexscan to find the database's row
by name.  This is made possible by storing a global relcache init file that
describes only the shared catalogs and their indexes (and therefore is usable
by all backends in any database).  A new backend loads this cache file,
finds its database OID after an indexscan on pg_database, and then loads
the local relcache init file for that database.

This change should effectively eliminate number of databases as a factor
in backend startup time, even with large numbers of databases.  However,
the real reason for doing it is as a first step towards getting rid of
the flat files altogether.  There are still several other sub-projects
to be tackled before that can happen.
parent a1f0c9ba
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.347 2009/08/08 16:39:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.348 2009/08/12 20:53:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -5249,6 +5249,16 @@ StartupXLOG(void)
*/
ValidateXLOGDirectoryStructure();
/*
* Clear out any old relcache cache files. This is *necessary* if we
* do any WAL replay, since that would probably result in the cache files
* being out of sync with database reality. In theory we could leave
* them in place if the database had been cleanly shut down, but it
* seems safest to just remove them always and let them be rebuilt
* during the first backend startup.
*/
RelationCacheInitFileRemove();
/*
* Initialize on the assumption we want to recover to the same timeline
* that's active according to pg_control.
......
......@@ -55,7 +55,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.100 2009/07/31 20:26:22 tgl Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.101 2009/08/12 20:53:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1602,7 +1602,7 @@ AutoVacWorkerMain(int argc, char *argv[])
if (OidIsValid(dbid))
{
char *dbname;
char dbname[NAMEDATALEN];
/*
* Report autovac startup to the stats collector. We deliberately do
......@@ -1620,7 +1620,7 @@ AutoVacWorkerMain(int argc, char *argv[])
* Note: if we have selected a just-deleted database (due to using
* stale stats info), we'll fail and exit here.
*/
InitPostgres(NULL, dbid, NULL, &dbname);
InitPostgres(NULL, dbid, NULL, dbname);
SetProcessingMode(NormalProcessing);
set_ps_display(dbname, false);
ereport(DEBUG1,
......
......@@ -13,7 +13,7 @@
*
* Copyright (c) 2001-2009, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.189 2009/06/11 14:49:01 momjian Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.190 2009/08/12 20:53:30 tgl Exp $
* ----------
*/
#include "postgres.h"
......@@ -2138,6 +2138,7 @@ CreateSharedBackendStatus(void)
* Called from InitPostgres. MyBackendId must be set,
* but we must not have started any transaction yet (since the
* exit hook must run after the last transaction exit).
* NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
* ----------
*/
void
......@@ -2232,7 +2233,14 @@ pgstat_beshutdown_hook(int code, Datum arg)
{
volatile PgBackendStatus *beentry = MyBEEntry;
pgstat_report_stat(true);
/*
* If we got as far as discovering our own database ID, we can report
* what we did to the collector. Otherwise, we'd be sending an invalid
* database ID, so forget it. (This means that accesses to pg_database
* during failed backend starts might never get counted.)
*/
if (OidIsValid(MyDatabaseId))
pgstat_report_stat(true);
/*
* Clear my status entry, following the protocol of bumping st_changecount
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.207 2009/06/11 14:49:02 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.208 2009/08/12 20:53:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -332,22 +332,14 @@ InitProcess(void)
* InitProcessPhase2 -- make MyProc visible in the shared ProcArray.
*
* This is separate from InitProcess because we can't acquire LWLocks until
* we've created a PGPROC, but in the EXEC_BACKEND case there is a good deal
* of stuff to be done before this step that will require LWLock access.
* we've created a PGPROC, but in the EXEC_BACKEND case ProcArrayAdd won't
* work until after we've done CreateSharedMemoryAndSemaphores.
*/
void
InitProcessPhase2(void)
{
Assert(MyProc != NULL);
/*
* We should now know what database we're in, so advertise that. (We need
* not do any locking here, since no other backend can yet see our
* PGPROC.)
*/
Assert(OidIsValid(MyDatabaseId));
MyProc->databaseId = MyDatabaseId;
/*
* Add our PGPROC to the PGPROC array in shared memory.
*/
......
......@@ -8,14 +8,15 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.288 2009/07/29 20:56:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.289 2009/08/12 20:53:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* RelationCacheInitialize - initialize relcache (to empty)
* RelationCacheInitializePhase2 - finish initializing relcache
* RelationCacheInitializePhase2 - initialize shared-catalog entries
* RelationCacheInitializePhase3 - finish initializing relcache
* RelationIdGetRelation - get a reldesc by relation id
* RelationClose - close an open relation
*
......@@ -30,7 +31,6 @@
#include <unistd.h>
#include "access/genam.h"
#include "access/heapam.h"
#include "access/reloptions.h"
#include "access/sysattr.h"
#include "access/xact.h"
......@@ -43,10 +43,12 @@
#include "catalog/pg_attrdef.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_database.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "commands/trigger.h"
#include "miscadmin.h"
......@@ -70,20 +72,21 @@
/*
* name of relcache init file, used to speed up backend startup
* name of relcache init file(s), used to speed up backend startup
*/
#define RELCACHE_INIT_FILENAME "pg_internal.init"
#define RELCACHE_INIT_FILEMAGIC 0x573264 /* version ID value */
#define RELCACHE_INIT_FILEMAGIC 0x573265 /* version ID value */
/*
* hardcoded tuple descriptors. see include/catalog/pg_attribute.h
*/
static FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
static FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
static FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
static FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
/*
* Hash tables that index the relation cache
......@@ -105,6 +108,12 @@ static HTAB *RelationIdCache;
*/
bool criticalRelcachesBuilt = false;
/*
* This flag is false until we have prepared the critical relcache entries
* for shared catalogs (specifically, pg_database and its indexes).
*/
bool criticalSharedRelcachesBuilt = false;
/*
* This counter counts relcache inval events received since backend startup
* (but only for rels that are actually in cache). Presently, we use it only
......@@ -114,8 +123,10 @@ bool criticalRelcachesBuilt = false;
static long relcacheInvalsReceived = 0L;
/*
* This list remembers the OIDs of the relations cached in the relcache
* init file.
* This list remembers the OIDs of the non-shared relations cached in the
* database's local relcache init file. Note that there is no corresponding
* list for the shared relcache init file, for reasons explained in the
* comments for RelationCacheInitFileRemove.
*/
static List *initFileRelationIds = NIL;
......@@ -188,12 +199,12 @@ static void RelationClearRelation(Relation relation, bool rebuild);
static void RelationReloadIndexInfo(Relation relation);
static void RelationFlushRelation(Relation relation);
static bool load_relcache_init_file(void);
static void write_relcache_init_file(void);
static bool load_relcache_init_file(bool shared);
static void write_relcache_init_file(bool shared);
static void write_item(const void *data, Size len, FILE *fp);
static void formrdesc(const char *relationName, Oid relationReltype,
bool hasoids, int natts, FormData_pg_attribute *att);
static void formrdesc(const char *relationName, bool isshared,
bool hasoids, int natts, const FormData_pg_attribute *attrs);
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
......@@ -201,6 +212,7 @@ static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
static void RelationBuildTupleDesc(Relation relation);
static Relation RelationBuildDesc(Oid targetRelId, Relation oldrelation);
static void RelationInitPhysicalAddr(Relation relation);
static void load_critical_index(Oid indexoid);
static TupleDesc GetPgClassDescriptor(void);
static TupleDesc GetPgIndexDescriptor(void);
static void AttrDefaultFetch(Relation relation);
......@@ -217,6 +229,8 @@ static void IndexSupportInitialize(oidvector *indclass,
static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
StrategyNumber numStrats,
StrategyNumber numSupport);
static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
static void unlink_initfile(const char *initfilename);
/*
......@@ -239,6 +253,15 @@ ScanPgRelation(Oid targetRelId, bool indexOK)
SysScanDesc pg_class_scan;
ScanKeyData key[1];
/*
* If something goes wrong during backend startup, we might find ourselves
* trying to read pg_class before we've selected a database. That ain't
* gonna work, so bail out with a useful error message. If this happens,
* it probably means a relcache entry that needs to be nailed isn't.
*/
if (!OidIsValid(MyDatabaseId))
elog(FATAL, "cannot read pg_class without having selected a database");
/*
* form a scan key
*/
......@@ -1332,24 +1355,27 @@ LookupOpclassInfo(Oid operatorClassOid,
/*
* formrdesc
*
* This is a special cut-down version of RelationBuildDesc()
* used by RelationCacheInitializePhase2() in initializing the relcache.
* This is a special cut-down version of RelationBuildDesc(),
* used while initializing the relcache.
* The relation descriptor is built just from the supplied parameters,
* without actually looking at any system table entries. We cheat
* quite a lot since we only need to work for a few basic system
* catalogs.
*
* formrdesc is currently used for: pg_class, pg_attribute, pg_proc,
* and pg_type (see RelationCacheInitializePhase2).
* formrdesc is currently used for: pg_database, pg_class, pg_attribute,
* pg_proc, and pg_type (see RelationCacheInitializePhase2/3).
*
* Note that these catalogs can't have constraints (except attnotnull),
* default values, rules, or triggers, since we don't cope with any of that.
* (Well, actually, this only matters for properties that need to be valid
* during bootstrap or before RelationCacheInitializePhase3 runs, and none of
* these properties matter then...)
*
* NOTE: we assume we are already switched into CacheMemoryContext.
*/
static void
formrdesc(const char *relationName, Oid relationReltype,
bool hasoids, int natts, FormData_pg_attribute *att)
formrdesc(const char *relationName, bool isshared,
bool hasoids, int natts, const FormData_pg_attribute *attrs)
{
Relation relation;
int i;
......@@ -1385,21 +1411,21 @@ formrdesc(const char *relationName, Oid relationReltype,
* initialize relation tuple form
*
* The data we insert here is pretty incomplete/bogus, but it'll serve to
* get us launched. RelationCacheInitializePhase2() will read the real
* get us launched. RelationCacheInitializePhase3() will read the real
* data from pg_class and replace what we've done here.
*/
relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
namestrcpy(&relation->rd_rel->relname, relationName);
relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
relation->rd_rel->reltype = relationReltype;
/*
* It's important to distinguish between shared and non-shared relations,
* even at bootstrap time, to make sure we know where they are stored. At
* present, all relations that formrdesc is used for are not shared.
* even at bootstrap time, to make sure we know where they are stored.
*/
relation->rd_rel->relisshared = false;
relation->rd_rel->relisshared = isshared;
if (isshared)
relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
/*
* Likewise, we must know if a relation is temp ... but formrdesc is not
......@@ -1423,9 +1449,6 @@ formrdesc(const char *relationName, Oid relationReltype,
relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
relation->rd_att->tdrefcount = 1; /* mark as refcounted */
relation->rd_att->tdtypeid = relationReltype;
relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
/*
* initialize tuple desc info
*/
......@@ -1433,9 +1456,9 @@ formrdesc(const char *relationName, Oid relationReltype,
for (i = 0; i < natts; i++)
{
memcpy(relation->rd_att->attrs[i],
&att[i],
&attrs[i],
ATTRIBUTE_FIXED_PART_SIZE);
has_not_null |= att[i].attnotnull;
has_not_null |= attrs[i].attnotnull;
/* make sure attcacheoff is valid */
relation->rd_att->attrs[i]->attcacheoff = -1;
}
......@@ -1636,6 +1659,31 @@ RelationReloadIndexInfo(Relation relation)
/* Should be closed at smgr level */
Assert(relation->rd_smgr == NULL);
/*
* Must reset targblock, fsm_nblocks and vm_nblocks in case rel was
* truncated
*/
relation->rd_targblock = InvalidBlockNumber;
relation->rd_fsm_nblocks = InvalidBlockNumber;
relation->rd_vm_nblocks = InvalidBlockNumber;
/* Must free any AM cached data, too */
if (relation->rd_amcache)
pfree(relation->rd_amcache);
relation->rd_amcache = NULL;
/*
* If it's a shared index, we might be called before backend startup
* has finished selecting a database, in which case we have no way to
* read pg_class yet. However, a shared index can never have any
* significant schema updates, so it's okay to ignore the invalidation
* signal. Just mark it valid and return without doing anything more.
*/
if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
{
relation->rd_isvalid = true;
return;
}
/*
* Read the pg_class row
*
......@@ -1658,18 +1706,6 @@ RelationReloadIndexInfo(Relation relation)
/* We must recalculate physical address in case it changed */
RelationInitPhysicalAddr(relation);
/*
* Must reset targblock, fsm_nblocks and vm_nblocks in case rel was
* truncated
*/
relation->rd_targblock = InvalidBlockNumber;
relation->rd_fsm_nblocks = InvalidBlockNumber;
relation->rd_vm_nblocks = InvalidBlockNumber;
/* Must free any AM cached data, too */
if (relation->rd_amcache)
pfree(relation->rd_amcache);
relation->rd_amcache = NULL;
/*
* For a non-system index, there are fields of the pg_index row that are
* allowed to change, so re-read that row and update the relcache entry.
......@@ -2304,10 +2340,12 @@ RelationBuildLocalRelation(const char *relname,
/*
* check for creation of a rel that must be nailed in cache.
*
* XXX this list had better match RelationCacheInitializePhase2's list.
* XXX this list had better match the relations specially handled in
* RelationCacheInitializePhase2/3.
*/
switch (relid)
{
case DatabaseRelationId:
case RelationRelationId:
case AttributeRelationId:
case ProcedureRelationId:
......@@ -2489,23 +2527,65 @@ RelationCacheInitialize(void)
/*
* RelationCacheInitializePhase2
*
* This is called as soon as the catcache and transaction system
* are functional. At this point we can actually read data from
* the system catalogs. We first try to read pre-computed relcache
* entries from the pg_internal.init file. If that's missing or
* broken, make phony entries for the minimum set of nailed-in-cache
* relations. Then (unless bootstrapping) make sure we have entries
* for the critical system indexes. Once we've done all this, we
* have enough infrastructure to open any system catalog or use any
* catcache. The last step is to rewrite pg_internal.init if needed.
* This is called to prepare for access to pg_database during startup.
* We must at least set up a nailed reldesc for pg_database. Ideally
* we'd like to have reldescs for its indexes, too. We attempt to
* load this information from the shared relcache init file. If that's
* missing or broken, just make a phony entry for pg_database.
* RelationCacheInitializePhase3 will clean up as needed.
*/
void
RelationCacheInitializePhase2(void)
{
MemoryContext oldcxt;
/*
* In bootstrap mode, pg_database isn't there yet anyway, so do nothing.
*/
if (IsBootstrapProcessingMode())
return;
/*
* switch to cache memory context
*/
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
/*
* Try to load the shared relcache cache file. If unsuccessful,
* bootstrap the cache with a pre-made descriptor for pg_database.
*/
if (!load_relcache_init_file(true))
{
formrdesc("pg_database", true,
true, Natts_pg_database, Desc_pg_database);
#define NUM_CRITICAL_SHARED_RELS 1 /* fix if you change list above */
}
MemoryContextSwitchTo(oldcxt);
}
/*
* RelationCacheInitializePhase3
*
* This is called as soon as the catcache and transaction system
* are functional and we have determined MyDatabaseId. At this point
* we can actually read data from the database's system catalogs.
* We first try to read pre-computed relcache entries from the local
* relcache init file. If that's missing or broken, make phony entries
* for the minimum set of nailed-in-cache relations. Then (unless
* bootstrapping) make sure we have entries for the critical system
* indexes. Once we've done all this, we have enough infrastructure to
* open any system catalog or use any catcache. The last step is to
* rewrite the cache files if needed.
*/
void
RelationCacheInitializePhase3(void)
{
HASH_SEQ_STATUS status;
RelIdCacheEnt *idhentry;
MemoryContext oldcxt;
bool needNewCacheFile = false;
bool needNewCacheFile = !criticalSharedRelcachesBuilt;
/*
* switch to cache memory context
......@@ -2513,25 +2593,25 @@ RelationCacheInitializePhase2(void)
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
/*
* Try to load the relcache cache file. If unsuccessful, bootstrap the
* cache with pre-made descriptors for the critical "nailed-in" system
* catalogs.
* Try to load the local relcache cache file. If unsuccessful,
* bootstrap the cache with pre-made descriptors for the critical
* "nailed-in" system catalogs.
*/
if (IsBootstrapProcessingMode() ||
!load_relcache_init_file())
!load_relcache_init_file(false))
{
needNewCacheFile = true;
formrdesc("pg_class", PG_CLASS_RELTYPE_OID,
formrdesc("pg_class", false,
true, Natts_pg_class, Desc_pg_class);
formrdesc("pg_attribute", PG_ATTRIBUTE_RELTYPE_OID,
formrdesc("pg_attribute", false,
false, Natts_pg_attribute, Desc_pg_attribute);
formrdesc("pg_proc", PG_PROC_RELTYPE_OID,
formrdesc("pg_proc", false,
true, Natts_pg_proc, Desc_pg_proc);
formrdesc("pg_type", PG_TYPE_RELTYPE_OID,
formrdesc("pg_type", false,
true, Natts_pg_type, Desc_pg_type);
#define NUM_CRITICAL_RELS 4 /* fix if you change list above */
#define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
}
MemoryContextSwitchTo(oldcxt);
......@@ -2567,35 +2647,39 @@ RelationCacheInitializePhase2(void)
*/
if (!criticalRelcachesBuilt)
{
Relation ird;
#define LOAD_CRIT_INDEX(indexoid) \
do { \
LockRelationOid(indexoid, AccessShareLock); \
ird = RelationBuildDesc(indexoid, NULL); \
if (ird == NULL) \
elog(PANIC, "could not open critical system index %u", \
indexoid); \
ird->rd_isnailed = true; \
ird->rd_refcnt = 1; \
UnlockRelationOid(indexoid, AccessShareLock); \
} while (0)
LOAD_CRIT_INDEX(ClassOidIndexId);
LOAD_CRIT_INDEX(AttributeRelidNumIndexId);
LOAD_CRIT_INDEX(IndexRelidIndexId);
LOAD_CRIT_INDEX(OpclassOidIndexId);
LOAD_CRIT_INDEX(AccessMethodStrategyIndexId);
LOAD_CRIT_INDEX(AccessMethodProcedureIndexId);
LOAD_CRIT_INDEX(OperatorOidIndexId);
LOAD_CRIT_INDEX(RewriteRelRulenameIndexId);
LOAD_CRIT_INDEX(TriggerRelidNameIndexId);
#define NUM_CRITICAL_INDEXES 9 /* fix if you change list above */
load_critical_index(ClassOidIndexId);
load_critical_index(AttributeRelidNumIndexId);
load_critical_index(IndexRelidIndexId);
load_critical_index(OpclassOidIndexId);
load_critical_index(AccessMethodStrategyIndexId);
load_critical_index(AccessMethodProcedureIndexId);
load_critical_index(OperatorOidIndexId);
load_critical_index(RewriteRelRulenameIndexId);
load_critical_index(TriggerRelidNameIndexId);
#define NUM_CRITICAL_LOCAL_INDEXES 9 /* fix if you change list above */
criticalRelcachesBuilt = true;
}
/*
* Process critical shared indexes too.
*
* DatabaseNameIndexId isn't critical for relcache loading, but rather
* for initial lookup of MyDatabaseId, without which we'll never find
* any non-shared catalogs at all. Autovacuum calls InitPostgres with
* a database OID, so it instead depends on DatabaseOidIndexId.
*/
if (!criticalSharedRelcachesBuilt)
{
load_critical_index(DatabaseNameIndexId);
load_critical_index(DatabaseOidIndexId);
#define NUM_CRITICAL_SHARED_INDEXES 2 /* fix if you change list above */
criticalSharedRelcachesBuilt = true;
}
/*
* Now, scan all the relcache entries and update anything that might be
* wrong in the results from formrdesc or the relcache cache file. If we
......@@ -2658,7 +2742,8 @@ RelationCacheInitializePhase2(void)
}
/*
* Lastly, write out a new relcache cache file if one is needed.
* Lastly, write out new relcache cache files if needed. We don't bother
* to distinguish cases where only one of the two needs an update.
*/
if (needNewCacheFile)
{
......@@ -2666,15 +2751,36 @@ RelationCacheInitializePhase2(void)
* Force all the catcaches to finish initializing and thereby open the
* catalogs and indexes they use. This will preload the relcache with
* entries for all the most important system catalogs and indexes, so
* that the init file will be most useful for future backends.
* that the init files will be most useful for future backends.
*/
InitCatalogCachePhase2();
/* now write the file */
write_relcache_init_file();
/* reset initFileRelationIds list; we'll fill it during write */
initFileRelationIds = NIL;
/* now write the files */
write_relcache_init_file(true);
write_relcache_init_file(false);
}
}
/*
* Load one critical system index into the relcache
*/
static void
load_critical_index(Oid indexoid)
{
Relation ird;
LockRelationOid(indexoid, AccessShareLock);
ird = RelationBuildDesc(indexoid, NULL);
if (ird == NULL)
elog(PANIC, "could not open critical system index %u", indexoid);
ird->rd_isnailed = true;
ird->rd_refcnt = 1;
UnlockRelationOid(indexoid, AccessShareLock);
}
/*
* GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
* GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
......@@ -2688,7 +2794,8 @@ RelationCacheInitializePhase2(void)
* extracting fields.
*/
static TupleDesc
BuildHardcodedDescriptor(int natts, Form_pg_attribute attrs, bool hasoids)
BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs,
bool hasoids)
{
TupleDesc result;
MemoryContext oldcxt;
......@@ -2745,6 +2852,9 @@ GetPgIndexDescriptor(void)
return pgindexdesc;
}
/*
* Load any default attribute value definitions for the relation.
*/
static void
AttrDefaultFetch(Relation relation)
{
......@@ -2810,6 +2920,9 @@ AttrDefaultFetch(Relation relation)
ndef - found, RelationGetRelationName(relation));
}
/*
* Load any check constraints for the relation.
*/
static void
CheckConstraintFetch(Relation relation)
{
......@@ -3310,7 +3423,10 @@ RelationGetIndexAttrBitmap(Relation relation)
* relation descriptors using sequential scans and write 'em to
* the initialization file for use by subsequent backends.
*
* We could dispense with the initialization file and just build the
* As of Postgres 8.5, there is one local initialization file in each
* database, plus one shared initialization file for shared catalogs.
*
* We could dispense with the initialization files and just build the
* critical reldescs the hard way on every backend startup, but that
* slows down backend startup noticeably.
*
......@@ -3318,24 +3434,26 @@ RelationGetIndexAttrBitmap(Relation relation)
* just the ones that are absolutely critical; this allows us to speed
* up backend startup by not having to build such entries the hard way.
* Presently, all the catalog and index entries that are referred to
* by catcaches are stored in the initialization file.
* by catcaches are stored in the initialization files.
*
* The same mechanism that detects when catcache and relcache entries
* need to be invalidated (due to catalog updates) also arranges to
* unlink the initialization file when its contents may be out of date.
* The file will then be rebuilt during the next backend startup.
* unlink the initialization files when the contents may be out of date.
* The files will then be rebuilt during the next backend startup.
*/
/*
* load_relcache_init_file -- attempt to load cache from the init file
* load_relcache_init_file -- attempt to load cache from the shared
* or local cache init file
*
* If successful, return TRUE and set criticalRelcachesBuilt to true.
* If successful, return TRUE and set criticalRelcachesBuilt or
* criticalSharedRelcachesBuilt to true.
* If not successful, return FALSE.
*
* NOTE: we assume we are already switched into CacheMemoryContext.
*/
static bool
load_relcache_init_file(void)
load_relcache_init_file(bool shared)
{
FILE *fp;
char initfilename[MAXPGPATH];
......@@ -3348,8 +3466,12 @@ load_relcache_init_file(void)
magic;
int i;
snprintf(initfilename, sizeof(initfilename), "%s/%s",
DatabasePath, RELCACHE_INIT_FILENAME);
if (shared)
snprintf(initfilename, sizeof(initfilename), "global/%s",
RELCACHE_INIT_FILENAME);
else
snprintf(initfilename, sizeof(initfilename), "%s/%s",
DatabasePath, RELCACHE_INIT_FILENAME);
fp = AllocateFile(initfilename, PG_BINARY_R);
if (fp == NULL)
......@@ -3364,7 +3486,6 @@ load_relcache_init_file(void)
rels = (Relation *) palloc(max_rels * sizeof(Relation));
num_rels = 0;
nailed_rels = nailed_indexes = 0;
initFileRelationIds = NIL;
/* check for correct magic number (compatible version) */
if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
......@@ -3588,7 +3709,7 @@ load_relcache_init_file(void)
/*
* Rules and triggers are not saved (mainly because the internal
* format is complex and subject to change). They must be rebuilt if
* needed by RelationCacheInitializePhase2. This is not expected to
* needed by RelationCacheInitializePhase3. This is not expected to
* be a big performance hit since few system catalogs have such. Ditto
* for index expressions and predicates.
*/
......@@ -3632,9 +3753,18 @@ load_relcache_init_file(void)
* get the right number of nailed items? (This is a useful crosscheck in
* case the set of critical rels or indexes changes.)
*/
if (nailed_rels != NUM_CRITICAL_RELS ||
nailed_indexes != NUM_CRITICAL_INDEXES)
goto read_failed;
if (shared)
{
if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
goto read_failed;
}
else
{
if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
goto read_failed;
}
/*
* OK, all appears well.
......@@ -3645,14 +3775,18 @@ load_relcache_init_file(void)
{
RelationCacheInsert(rels[relno]);
/* also make a list of their OIDs, for RelationIdIsInInitFile */
initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
initFileRelationIds);
if (!shared)
initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
initFileRelationIds);
}
pfree(rels);
FreeFile(fp);
criticalRelcachesBuilt = true;
if (shared)
criticalSharedRelcachesBuilt = true;
else
criticalRelcachesBuilt = true;
return true;
/*
......@@ -3669,10 +3803,10 @@ read_failed:
/*
* Write out a new initialization file with the current contents
* of the relcache.
* of the relcache (either shared rels or local rels, as indicated).
*/
static void
write_relcache_init_file(void)
write_relcache_init_file(bool shared)
{
FILE *fp;
char tempfilename[MAXPGPATH];
......@@ -3688,10 +3822,20 @@ write_relcache_init_file(void)
* another backend starting at about the same time might crash trying to
* read the partially-complete file.
*/
snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
DatabasePath, RELCACHE_INIT_FILENAME);
if (shared)
{
snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
RELCACHE_INIT_FILENAME, MyProcPid);
snprintf(finalfilename, sizeof(finalfilename), "global/%s",
RELCACHE_INIT_FILENAME);
}
else
{
snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
DatabasePath, RELCACHE_INIT_FILENAME);
}
unlink(tempfilename); /* in case it exists w/wrong permissions */
......@@ -3719,17 +3863,19 @@ write_relcache_init_file(void)
elog(FATAL, "could not write init file");
/*
* Write all the reldescs (in no particular order).
* Write all the appropriate reldescs (in no particular order).
*/
hash_seq_init(&status, RelationIdCache);
initFileRelationIds = NIL;
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
{
Relation rel = idhentry->reldesc;
Form_pg_class relform = rel->rd_rel;
/* ignore if not correct group */
if (relform->relisshared != shared)
continue;
/* first write the relcache entry proper */
write_item(rel, sizeof(RelationData), fp);
......@@ -3788,10 +3934,13 @@ write_relcache_init_file(void)
}
/* also make a list of their OIDs, for RelationIdIsInInitFile */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
initFileRelationIds = lcons_oid(RelationGetRelid(rel),
initFileRelationIds);
MemoryContextSwitchTo(oldcxt);
if (!shared)
{
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
initFileRelationIds = lcons_oid(RelationGetRelid(rel),
initFileRelationIds);
MemoryContextSwitchTo(oldcxt);
}
}
if (FreeFile(fp))
......@@ -3852,7 +4001,7 @@ write_item(const void *data, Size len, FILE *fp)
/*
* Detect whether a given relation (identified by OID) is one of the ones
* we store in the init file.
* we store in the local relcache init file.
*
* Note that we effectively assume that all backends running in a database
* would choose to store the same set of relations in the init file;
......@@ -3868,7 +4017,7 @@ RelationIdIsInInitFile(Oid relationId)
/*
* Invalidate (remove) the init file during commit of a transaction that
* changed one or more of the relation cache entries that are kept in the
* init file.
* local init file.
*
* We actually need to remove the init file twice: once just before sending
* the SI messages that include relcache inval for such relations, and once
......@@ -3883,6 +4032,13 @@ RelationIdIsInInitFile(Oid relationId)
*
* Ignore any failure to unlink the file, since it might not be there if
* no backend has been started since the last removal.
*
* Notice this deals only with the local init file, not the shared init file.
* The reason is that there can never be a "significant" change to the
* relcache entry of a shared relation; the most that could happen is
* updates of noncritical fields such as relpages/reltuples. So, while
* it's worth updating the shared init file from time to time, it can never
* be invalid enough to make it necessary to remove it.
*/
void
RelationCacheInitFileInvalidate(bool beforeSend)
......@@ -3914,23 +4070,94 @@ RelationCacheInitFileInvalidate(bool beforeSend)
}
/*
* Remove the init file for a given database during postmaster startup.
* Remove the init files during postmaster startup.
*
* We used to keep the init file across restarts, but that is unsafe in PITR
* We used to keep the init files across restarts, but that is unsafe in PITR
* scenarios, and even in simple crash-recovery cases there are windows for
* the init file to become out-of-sync with the database. So now we just
* remove it during startup and expect the first backend launch to rebuild it.
* Of course, this has to happen in each database of the cluster. For
* simplicity this is driven by flatfiles.c, which has to scan pg_database
* anyway.
* the init files to become out-of-sync with the database. So now we just
* remove them during startup and expect the first backend launch to rebuild
* them. Of course, this has to happen in each database of the cluster.
*/
void
RelationCacheInitFileRemove(const char *dbPath)
RelationCacheInitFileRemove(void)
{
const char *tblspcdir = "pg_tblspc";
DIR *dir;
struct dirent *de;
char path[MAXPGPATH];
/*
* We zap the shared cache file too. In theory it can't get out of sync
* enough to be a problem, but in data-corruption cases, who knows ...
*/
snprintf(path, sizeof(path), "global/%s",
RELCACHE_INIT_FILENAME);
unlink_initfile(path);
/* Scan everything in the default tablespace */
RelationCacheInitFileRemoveInDir("base");
/* Scan the tablespace link directory to find non-default tablespaces */
dir = AllocateDir(tblspcdir);
if (dir == NULL)
{
elog(LOG, "could not open tablespace link directory \"%s\": %m",
tblspcdir);
return;
}
while ((de = ReadDir(dir, tblspcdir)) != NULL)
{
if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
{
/* Scan the tablespace dir for per-database dirs */
snprintf(path, sizeof(path), "%s/%s",
tblspcdir, de->d_name);
RelationCacheInitFileRemoveInDir(path);
}
}
FreeDir(dir);
}
/* Process one per-tablespace directory for RelationCacheInitFileRemove */
static void
RelationCacheInitFileRemoveInDir(const char *tblspcpath)
{
DIR *dir;
struct dirent *de;
char initfilename[MAXPGPATH];
snprintf(initfilename, sizeof(initfilename), "%s/%s",
dbPath, RELCACHE_INIT_FILENAME);
unlink(initfilename);
/* ignore any error, since it might not be there at all */
/* Scan the tablespace directory to find per-database directories */
dir = AllocateDir(tblspcpath);
if (dir == NULL)
{
elog(LOG, "could not open tablespace directory \"%s\": %m",
tblspcpath);
return;
}
while ((de = ReadDir(dir, tblspcpath)) != NULL)
{
if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
{
/* Try to remove the init file in each database */
snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
unlink_initfile(initfilename);
}
}
FreeDir(dir);
}
static void
unlink_initfile(const char *initfilename)
{
if (unlink(initfilename) < 0)
{
/* It might not be there, but log any error other than ENOENT */
if (errno != ENOENT)
elog(LOG, "could not remove cache file \"%s\": %m", initfilename);
}
}
......@@ -23,7 +23,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/utils/init/flatfiles.c,v 1.36 2009/01/01 17:23:51 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/init/flatfiles.c,v 1.37 2009/08/12 20:53:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -37,7 +37,6 @@
#include "access/twophase_rmgr.h"
#include "access/xact.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
#include "catalog/pg_auth_members.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_database.h"
......@@ -51,7 +50,6 @@
#include "storage/pmsignal.h"
#include "utils/builtins.h"
#include "utils/flatfiles.h"
#include "utils/relcache.h"
#include "utils/resowner.h"
#include "utils/tqual.h"
......@@ -171,14 +169,9 @@ name_okay(const char *str)
*
* A side effect is to determine the oldest database's datfrozenxid
* so we can set or update the XID wrap limit.
*
* Also, if "startup" is true, we tell relcache.c to clear out the relcache
* init file in each database. That's a bit nonmodular, but scanning
* pg_database twice during system startup seems too high a price for keeping
* things better separated.
*/
static void
write_database_file(Relation drel, bool startup)
write_database_file(Relation drel)
{
char *filename,
*tempname;
......@@ -259,17 +252,6 @@ write_database_file(Relation drel, bool startup)
fputs_quote(datname, fp);
fprintf(fp, " %u %u %u\n",
datoid, dattablespace, datfrozenxid);
/*
* Also clear relcache init file for each DB if starting up.
*/
if (startup)
{
char *dbpath = GetDatabasePath(datoid, dattablespace);
RelationCacheInitFileRemove(dbpath);
pfree(dbpath);
}
}
heap_endscan(scan);
......@@ -688,9 +670,6 @@ write_auth_file(Relation rel_authid, Relation rel_authmem)
* policy means we need not force initdb to change the format of the
* flat files.
*
* We also cause relcache init files to be flushed, for largely the same
* reasons.
*
* In a standalone backend we pass database_only = true to skip processing
* the auth file. We won't need it, and building it could fail if there's
* something corrupt in the authid/authmem catalogs.
......@@ -720,7 +699,7 @@ BuildFlatFiles(bool database_only)
* No locking is needed because no one else is alive yet.
*/
rel_db = CreateFakeRelcacheEntry(rnode);
write_database_file(rel_db, true);
write_database_file(rel_db);
FreeFakeRelcacheEntry(rel_db);
if (!database_only)
......@@ -833,7 +812,7 @@ AtEOXact_UpdateFlatFiles(bool isCommit)
if (database_file_update_subid != InvalidSubTransactionId)
{
database_file_update_subid = InvalidSubTransactionId;
write_database_file(drel, false);
write_database_file(drel);
heap_close(drel, NoLock);
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.175 2009/06/11 14:49:05 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.176 2009/08/12 20:53:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -40,6 +40,7 @@
#include "storage/procarray.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
......@@ -128,17 +129,9 @@ ResetReindexProcessing(void)
void
SetDatabasePath(const char *path)
{
if (DatabasePath)
{
free(DatabasePath);
DatabasePath = NULL;
}
/* use strdup since this is done before memory contexts are set up */
if (path)
{
DatabasePath = strdup(path);
AssertState(DatabasePath);
}
/* This should happen only once per process */
Assert(!DatabasePath);
DatabasePath = MemoryContextStrdup(TopMemoryContext, path);
}
/*
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.193 2009/07/31 20:26:23 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.194 2009/08/12 20:53:30 tgl Exp $
*
*
*-------------------------------------------------------------------------
......@@ -19,20 +19,20 @@
#include <unistd.h>
#include "access/heapam.h"
#include "access/sysattr.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_database.h"
#include "catalog/pg_tablespace.h"
#include "libpq/hba.h"
#include "libpq/libpq-be.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/postmaster.h"
#include "storage/backendid.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
......@@ -43,19 +43,17 @@
#include "storage/sinvaladt.h"
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/flatfiles.h"
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/pg_locale.h"
#include "utils/plancache.h"
#include "utils/portal.h"
#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
static bool FindMyDatabase(const char *name, Oid *db_id, Oid *db_tablespace);
static bool FindMyDatabaseByOid(Oid dbid, char *dbname, Oid *db_tablespace);
static HeapTuple GetDatabaseTuple(const char *dbname);
static HeapTuple GetDatabaseTupleByOid(Oid dboid);
static void CheckMyDatabase(const char *name, bool am_superuser);
static void InitCommunication(void);
static void ShutdownPostgres(int code, Datum arg);
......@@ -66,90 +64,97 @@ static bool ThereIsAtLeastOneRole(void);
/*
* FindMyDatabase -- get the critical info needed to locate my database
* GetDatabaseTuple -- fetch the pg_database row for a database
*
* Find the named database in pg_database, return its database OID and the
* OID of its default tablespace. Return TRUE if found, FALSE if not.
*
* Since we are not yet up and running as a backend, we cannot look directly
* at pg_database (we can't obtain locks nor participate in transactions).
* So to get the info we need before starting up, we must look at the "flat
* file" copy of pg_database that is helpfully maintained by flatfiles.c.
* This is subject to various race conditions, so after we have the
* transaction infrastructure started, we have to recheck the information;
* see InitPostgres.
* This is used during backend startup when we don't yet have any access to
* system catalogs in general. In the worst case, we can seqscan pg_database
* using nothing but the hard-wired descriptor that relcache.c creates for
* pg_database. In more typical cases, relcache.c was able to load
* descriptors for both pg_database and its indexes from the shared relcache
* cache file, and so we can do an indexscan. criticalSharedRelcachesBuilt
* tells whether we got the cached descriptors.
*/
static bool
FindMyDatabase(const char *name, Oid *db_id, Oid *db_tablespace)
static HeapTuple
GetDatabaseTuple(const char *dbname)
{
bool result = false;
char *filename;
FILE *db_file;
char thisname[NAMEDATALEN];
TransactionId db_frozenxid;
filename = database_getflatfilename();
db_file = AllocateFile(filename, "r");
if (db_file == NULL)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", filename)));
HeapTuple tuple;
Relation relation;
SysScanDesc scan;
ScanKeyData key[1];
while (read_pg_database_line(db_file, thisname, db_id,
db_tablespace, &db_frozenxid))
{
if (strcmp(thisname, name) == 0)
{
result = true;
break;
}
}
/*
* form a scan key
*/
ScanKeyInit(&key[0],
Anum_pg_database_datname,
BTEqualStrategyNumber, F_NAMEEQ,
CStringGetDatum(dbname));
FreeFile(db_file);
pfree(filename);
/*
* Open pg_database and fetch a tuple. Force heap scan if we haven't yet
* built the critical shared relcache entries (i.e., we're starting up
* without a shared relcache cache file).
*/
relation = heap_open(DatabaseRelationId, AccessShareLock);
scan = systable_beginscan(relation, DatabaseNameIndexId,
criticalSharedRelcachesBuilt,
SnapshotNow,
1, key);
return result;
tuple = systable_getnext(scan);
/* Must copy tuple before releasing buffer */
if (HeapTupleIsValid(tuple))
tuple = heap_copytuple(tuple);
/* all done */
systable_endscan(scan);
heap_close(relation, AccessShareLock);
return tuple;
}
/*
* FindMyDatabaseByOid
*
* As above, but the actual database Id is known. Return its name and the
* tablespace OID. Return TRUE if found, FALSE if not. The same restrictions
* as FindMyDatabase apply.
* GetDatabaseTupleByOid -- as above, but search by database OID
*/
static bool
FindMyDatabaseByOid(Oid dbid, char *dbname, Oid *db_tablespace)
static HeapTuple
GetDatabaseTupleByOid(Oid dboid)
{
bool result = false;
char *filename;
FILE *db_file;
Oid db_id;
char thisname[NAMEDATALEN];
TransactionId db_frozenxid;
filename = database_getflatfilename();
db_file = AllocateFile(filename, "r");
if (db_file == NULL)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", filename)));
HeapTuple tuple;
Relation relation;
SysScanDesc scan;
ScanKeyData key[1];
while (read_pg_database_line(db_file, thisname, &db_id,
db_tablespace, &db_frozenxid))
{
if (dbid == db_id)
{
result = true;
strlcpy(dbname, thisname, NAMEDATALEN);
break;
}
}
/*
* form a scan key
*/
ScanKeyInit(&key[0],
ObjectIdAttributeNumber,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(dboid));
/*
* Open pg_database and fetch a tuple. Force heap scan if we haven't yet
* built the critical shared relcache entries (i.e., we're starting up
* without a shared relcache cache file).
*/
relation = heap_open(DatabaseRelationId, AccessShareLock);
scan = systable_beginscan(relation, DatabaseOidIndexId,
criticalSharedRelcachesBuilt,
SnapshotNow,
1, key);
FreeFile(db_file);
pfree(filename);
tuple = systable_getnext(scan);
return result;
/* Must copy tuple before releasing buffer */
if (HeapTupleIsValid(tuple))
tuple = heap_copytuple(tuple);
/* all done */
systable_endscan(scan);
heap_close(relation, AccessShareLock);
return tuple;
}
......@@ -164,7 +169,7 @@ CheckMyDatabase(const char *name, bool am_superuser)
char *collate;
char *ctype;
/* Fetch our real pg_database row */
/* Fetch our pg_database row normally, via syscache */
tup = SearchSysCache(DATABASEOID,
ObjectIdGetDatum(MyDatabaseId),
0, 0, 0);
......@@ -356,8 +361,9 @@ BaseInit(void)
* Initialize POSTGRES.
*
* The database can be specified by name, using the in_dbname parameter, or by
* OID, using the dboid parameter. In the latter case, the computed database
* name is passed out to the caller as a palloc'ed string in out_dbname.
* OID, using the dboid parameter. In the latter case, the actual database
* name can be returned to the caller in out_dbname. If out_dbname isn't
* NULL, it must point to a buffer of size NAMEDATALEN.
*
* In bootstrap mode no parameters are used.
*
......@@ -366,7 +372,7 @@ BaseInit(void)
* the startup transaction rather than doing a separate one in postgres.c.)
*
* As of PostgreSQL 8.2, we expect InitProcess() was already called, so we
* already have a PGPROC struct ... but it's not filled in yet.
* already have a PGPROC struct ... but it's not completely filled in yet.
*
* Note:
* Be very careful with the order of calls in the InitPostgres function.
......@@ -374,7 +380,7 @@ BaseInit(void)
*/
bool
InitPostgres(const char *in_dbname, Oid dboid, const char *username,
char **out_dbname)
char *out_dbname)
{
bool bootstrap = IsBootstrapProcessingMode();
bool autovacuum = IsAutoVacuumWorkerProcess();
......@@ -383,57 +389,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
char dbname[NAMEDATALEN];
/*
* Set up the global variables holding database id and path. But note we
* won't actually try to touch the database just yet.
*
* We take a shortcut in the bootstrap case, otherwise we have to look up
* the db name in pg_database.
*/
if (bootstrap)
{
MyDatabaseId = TemplateDbOid;
MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
}
else
{
/*
* Find tablespace of the database we're about to open. Since we're
* not yet up and running we have to use one of the hackish
* FindMyDatabase variants, which look in the flat-file copy of
* pg_database.
*
* If the in_dbname param is NULL, lookup database by OID.
*/
if (in_dbname == NULL)
{
if (!FindMyDatabaseByOid(dboid, dbname, &MyDatabaseTableSpace))
ereport(FATAL,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database %u does not exist", dboid)));
MyDatabaseId = dboid;
/* pass the database name to the caller */
*out_dbname = pstrdup(dbname);
}
else
{
if (!FindMyDatabase(in_dbname, &MyDatabaseId, &MyDatabaseTableSpace))
ereport(FATAL,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist",
in_dbname)));
/* our database name is gotten from the caller */
strlcpy(dbname, in_dbname, NAMEDATALEN);
}
}
fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);
SetDatabasePath(fullpath);
/*
* Finish filling in the PGPROC struct, and add it to the ProcArray. (We
* need to know MyDatabaseId before we can do this, since it's entered
* into the PGPROC struct.)
* Add my PGPROC struct to the ProcArray.
*
* Once I have done this, I am visible to other backends!
*/
......@@ -507,10 +463,69 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
}
/*
* Now that we have a transaction, we can take locks. Take a writer's
* lock on the database we are trying to connect to. If there is a
* concurrently running DROP DATABASE on that database, this will block us
* until it finishes (and has updated the flat file copy of pg_database).
* Load relcache entries for the shared system catalogs. This must
* create at least an entry for pg_database.
*/
RelationCacheInitializePhase2();
/*
* Set up the global variables holding database id and default tablespace.
* But note we won't actually try to touch the database just yet.
*
* We take a shortcut in the bootstrap case, otherwise we have to look up
* the db's entry in pg_database.
*/
if (bootstrap)
{
MyDatabaseId = TemplateDbOid;
MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
}
else if (in_dbname != NULL)
{
HeapTuple tuple;
Form_pg_database dbform;
tuple = GetDatabaseTuple(in_dbname);
if (!HeapTupleIsValid(tuple))
ereport(FATAL,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", in_dbname)));
dbform = (Form_pg_database) GETSTRUCT(tuple);
MyDatabaseId = HeapTupleGetOid(tuple);
MyDatabaseTableSpace = dbform->dattablespace;
/* take database name from the caller, just for paranoia */
strlcpy(dbname, in_dbname, sizeof(dbname));
}
else
{
/* caller specified database by OID */
HeapTuple tuple;
Form_pg_database dbform;
tuple = GetDatabaseTupleByOid(dboid);
if (!HeapTupleIsValid(tuple))
ereport(FATAL,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database %u does not exist", dboid)));
dbform = (Form_pg_database) GETSTRUCT(tuple);
MyDatabaseId = HeapTupleGetOid(tuple);
MyDatabaseTableSpace = dbform->dattablespace;
Assert(MyDatabaseId == dboid);
strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname));
/* pass the database name back to the caller */
if (out_dbname)
strcpy(out_dbname, dbname);
}
/* Now we can mark our PGPROC entry with the database ID */
/* (We assume this is an atomic store so no lock is needed) */
MyProc->databaseId = MyDatabaseId;
/*
* Now, take a writer's lock on the database we are trying to connect to.
* If there is a concurrently running DROP DATABASE on that database,
* this will block us until it finishes (and has committed its update of
* pg_database).
*
* Note that the lock is not held long, only until the end of this startup
* transaction. This is OK since we are already advertising our use of
......@@ -528,21 +543,21 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
RowExclusiveLock);
/*
* Recheck the flat file copy of pg_database to make sure the target
* database hasn't gone away. If there was a concurrent DROP DATABASE,
* this ensures we will die cleanly without creating a mess.
* Recheck pg_database to make sure the target database hasn't gone away.
* If there was a concurrent DROP DATABASE, this ensures we will die
* cleanly without creating a mess.
*/
if (!bootstrap)
{
Oid dbid2;
Oid tsid2;
HeapTuple tuple;
if (!FindMyDatabase(dbname, &dbid2, &tsid2) ||
dbid2 != MyDatabaseId || tsid2 != MyDatabaseTableSpace)
tuple = GetDatabaseTuple(dbname);
if (!HeapTupleIsValid(tuple) ||
MyDatabaseId != HeapTupleGetOid(tuple) ||
MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace)
ereport(FATAL,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist",
dbname),
errmsg("database \"%s\" does not exist", dbname),
errdetail("It seems to have just been dropped or renamed.")));
}
......@@ -550,6 +565,8 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
* Now we should be able to access the database directory safely. Verify
* it's there and looks reasonable.
*/
fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);
if (!bootstrap)
{
if (access(fullpath, F_OK) == -1)
......@@ -571,13 +588,15 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
ValidatePgVersion(fullpath);
}
SetDatabasePath(fullpath);
/*
* It's now possible to do real access to the system catalogs.
*
* Load relcache entries for the system catalogs. This must create at
* least the minimum set of "nailed-in" cache entries.
*/
RelationCacheInitializePhase2();
RelationCacheInitializePhase3();
/*
* Figure out our postgres user id, and see if we are a superuser.
......@@ -612,7 +631,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
initialize_acl();
/*
* Read the real pg_database row for our database, check permissions and
* Re-read the pg_database row for our database, check permissions and
* set up database-specific GUC settings. We can't do this until all the
* database-access infrastructure is up. (Also, it wants to know if the
* user is a superuser, so the above stuff has to happen first.)
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_attribute.h,v 1.151 2009/08/04 04:04:11 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_attribute.h,v 1.152 2009/08/12 20:53:30 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
......@@ -465,6 +465,29 @@ DATA(insert ( 1259 xmax 28 0 0 4 -5 0 -1 -1 t p i t f f t 0 _null_));
DATA(insert ( 1259 cmax 29 0 0 4 -6 0 -1 -1 t p i t f f t 0 _null_));
DATA(insert ( 1259 tableoid 26 0 0 4 -7 0 -1 -1 t p i t f f t 0 _null_));
/* ----------------
* pg_database
*
* pg_database is not bootstrapped in the same way as the other relations that
* have hardwired pg_attribute entries in this file. However, we do need
* a "Schema_xxx" macro for it --- see relcache.c.
* ----------------
*/
#define Schema_pg_database \
{ 1262, {"datname"}, 19, -1, 0, NAMEDATALEN, 1, 0, -1, -1, false, 'p', 'c', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datdba"}, 26, -1, 0, 4, 2, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
{ 1262, {"encoding"}, 23, -1, 0, 4, 3, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datcollate"}, 19, -1, 0, NAMEDATALEN, 4, 0, -1, -1, false, 'p', 'c', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datctype"}, 19, -1, 0, NAMEDATALEN, 5, 0, -1, -1, false, 'p', 'c', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datistemplate"}, 16, -1, 0, 1, 6, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datallowconn"}, 16, -1, 0, 1, 7, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datconnlimit"}, 23, -1, 0, 4, 8, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datlastsysoid"}, 26, -1, 0, 4, 9, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datfrozenxid"}, 28, -1, 0, 4, 10, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
{ 1262, {"dattablespace"}, 26, -1, 0, 4, 11, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
{ 1262, {"datconfig"}, 1009, -1, 0, -1, 12, 1, -1, -1, false, 'x', 'i', false, false, false, true, 0, { 0 } }, \
{ 1262, {"datacl"}, 1034, -1, 0, -1, 13, 1, -1, -1, false, 'x', 'i', false, false, false, true, 0, { 0 } }
/* ----------------
* pg_index
*
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_type.h,v 1.207 2009/07/11 21:15:32 petere Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_type.h,v 1.208 2009/08/12 20:53:30 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
......@@ -333,13 +333,9 @@ DESCR("array of oids, used in system tables");
/* hand-built rowtype entries for bootstrapped catalogs: */
DATA(insert OID = 71 ( pg_type PGNSP PGUID -1 f c C f t \054 1247 0 0 record_in record_out record_recv record_send - - - d x f 0 -1 0 _null_ _null_ ));
#define PG_TYPE_RELTYPE_OID 71
DATA(insert OID = 75 ( pg_attribute PGNSP PGUID -1 f c C f t \054 1249 0 0 record_in record_out record_recv record_send - - - d x f 0 -1 0 _null_ _null_ ));
#define PG_ATTRIBUTE_RELTYPE_OID 75
DATA(insert OID = 81 ( pg_proc PGNSP PGUID -1 f c C f t \054 1255 0 0 record_in record_out record_recv record_send - - - d x f 0 -1 0 _null_ _null_ ));
#define PG_PROC_RELTYPE_OID 81
DATA(insert OID = 83 ( pg_class PGNSP PGUID -1 f c C f t \054 1259 0 0 record_in record_out record_recv record_send - - - d x f 0 -1 0 _null_ _null_ ));
#define PG_CLASS_RELTYPE_OID 83
/* OIDS 100 - 199 */
DATA(insert OID = 142 ( xml PGNSP PGUID -1 f b U f t \054 0 0 143 xml_in xml_out xml_recv xml_send - - - i x f 0 -1 0 _null_ _null_ ));
......
......@@ -13,7 +13,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/miscadmin.h,v 1.211 2009/06/11 14:49:08 momjian Exp $
* $PostgreSQL: pgsql/src/include/miscadmin.h,v 1.212 2009/08/12 20:53:30 tgl Exp $
*
* NOTES
* some of the information in this file should be moved to other files.
......@@ -324,7 +324,7 @@ extern ProcessingMode Mode;
/* in utils/init/postinit.c */
extern bool InitPostgres(const char *in_dbname, Oid dboid, const char *username,
char **out_dbname);
char *out_dbname);
extern void BaseInit(void);
/* in utils/init/miscinit.c */
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.112 2009/02/23 09:28:50 heikki Exp $
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.113 2009/08/12 20:53:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -84,7 +84,9 @@ struct PGPROC
* vacuum must not remove tuples deleted by
* xid >= xmin ! */
int pid; /* This backend's process id, or 0 */
int pid; /* Backend's process ID; 0 if prepared xact */
/* These fields are zero while a backend is still starting up: */
BackendId backendId; /* This backend's backend ID (if assigned) */
Oid databaseId; /* OID of database this backend is using */
Oid roleId; /* OID of role using this backend */
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.63 2009/01/01 17:24:02 momjian Exp $
* $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.64 2009/08/12 20:53:31 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -54,6 +54,7 @@ extern void RelationInitIndexAccessInfo(Relation relation);
*/
extern void RelationCacheInitialize(void);
extern void RelationCacheInitializePhase2(void);
extern void RelationCacheInitializePhase3(void);
/*
* Routine to create a relcache entry for an about-to-be-created relation
......@@ -81,13 +82,15 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
extern void RelationCacheMarkNewRelfilenode(Relation rel);
/*
* Routines to help manage rebuilding of relcache init file
* Routines to help manage rebuilding of relcache init files
*/
extern bool RelationIdIsInInitFile(Oid relationId);
extern void RelationCacheInitFileInvalidate(bool beforeSend);
extern void RelationCacheInitFileRemove(const char *dbPath);
extern void RelationCacheInitFileRemove(void);
/* should be used only by relcache.c and catcache.c */
extern bool criticalRelcachesBuilt;
/* should be used only by relcache.c and postinit.c */
extern bool criticalSharedRelcachesBuilt;
#endif /* RELCACHE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment