Commit 24ee8af5 authored by Tom Lane's avatar Tom Lane

Rework temp_tablespaces patch so that temp tablespaces are assigned separately

for each temp file, rather than once per sort or hashjoin; this allows
spreading the data of a large sort or join across multiple tablespaces.
(I remain dubious that this will make any difference in practice, but certain
people insisted.)  Arrange to cache the results of parsing the GUC variable
instead of recomputing from scratch on every demand, and push usage of the
cache down to the bottommost fd.c level.
parent 2d9d7a6b
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.125 2007/06/03 17:05:29 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.126 2007/06/07 19:19:56 tgl Exp $ -->
<chapter Id="runtime-config"> <chapter Id="runtime-config">
<title>Server Configuration</title> <title>Server Configuration</title>
...@@ -3510,16 +3510,22 @@ SELECT * FROM parent WHERE key = 2400; ...@@ -3510,16 +3510,22 @@ SELECT * FROM parent WHERE key = 2400;
<para> <para>
The value is a list of names of tablespaces. When there is more than The value is a list of names of tablespaces. When there is more than
one name in the list, <productname>PostgreSQL</> chooses a random one name in the list, <productname>PostgreSQL</> chooses a random
member of the list each time a temporary object is to be created. member of the list each time a temporary object is to be created;
except that within a transaction, successively created temporary
objects are placed in successive tablespaces from the list.
If any element of the list is an empty string,
<productname>PostgreSQL</> will automatically use the default
tablespace of the current database instead.
</para> </para>
<para> <para>
If any element of the list is an empty string or does not match the When <varname>temp_tablespaces</> is set interactively, specifying a
name of any existing tablespace, <productname>PostgreSQL</> will nonexistent tablespace is an error, as is specifying a tablespace for
automatically use the default tablespace of the current database which the user does not have <literal>CREATE</> privilege. However,
instead. If a nondefault tablespace when using a previously set value, nonexistent tablespaces are
is specified, the user must have <literal>CREATE</> privilege ignored, as are tablespaces for which the user lacks
for it, or creation attempts will fail. <literal>CREATE</> privilege. In particular, this rule applies when
using a value set in <filename>postgresql.conf</>.
</para> </para>
<para> <para>
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.47 2007/06/03 17:06:59 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.48 2007/06/07 19:19:56 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -63,6 +63,7 @@ ...@@ -63,6 +63,7 @@
#include "utils/fmgroids.h" #include "utils/fmgroids.h"
#include "utils/guc.h" #include "utils/guc.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
#include "utils/memutils.h"
/* GUC variables */ /* GUC variables */
...@@ -72,7 +73,6 @@ char *temp_tablespaces = NULL; ...@@ -72,7 +73,6 @@ char *temp_tablespaces = NULL;
static bool remove_tablespace_directories(Oid tablespaceoid, bool redo); static bool remove_tablespace_directories(Oid tablespaceoid, bool redo);
static void set_short_version(const char *path); static void set_short_version(const char *path);
static Oid getTempTablespace(void);
/* /*
...@@ -921,9 +921,12 @@ GetDefaultTablespace(bool forTemp) ...@@ -921,9 +921,12 @@ GetDefaultTablespace(bool forTemp)
{ {
Oid result; Oid result;
/* The temp-table case is handled by getTempTablespace() */ /* The temp-table case is handled elsewhere */
if (forTemp) if (forTemp)
return getTempTablespace(); {
PrepareTempTablespaces();
return GetNextTempTableSpace();
}
/* Fast path for default_tablespace == "" */ /* Fast path for default_tablespace == "" */
if (default_tablespace == NULL || default_tablespace[0] == '\0') if (default_tablespace == NULL || default_tablespace[0] == '\0')
...@@ -958,7 +961,6 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source) ...@@ -958,7 +961,6 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source)
{ {
char *rawname; char *rawname;
List *namelist; List *namelist;
ListCell *l;
/* Need a modifiable copy of string */ /* Need a modifiable copy of string */
rawname = pstrdup(newval); rawname = pstrdup(newval);
...@@ -975,24 +977,79 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source) ...@@ -975,24 +977,79 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source)
/* /*
* If we aren't inside a transaction, we cannot do database access so * If we aren't inside a transaction, we cannot do database access so
* cannot verify the individual names. Must accept the list on faith. * cannot verify the individual names. Must accept the list on faith.
* Fortunately, there's then also no need to pass the data to fd.c.
*/ */
if (source >= PGC_S_INTERACTIVE && IsTransactionState()) if (IsTransactionState())
{ {
/*
* If we error out below, or if we are called multiple times in one
* transaction, we'll leak a bit of TopTransactionContext memory.
* Doesn't seem worth worrying about.
*/
Oid *tblSpcs;
int numSpcs;
ListCell *l;
tblSpcs = (Oid *) MemoryContextAlloc(TopTransactionContext,
list_length(namelist) * sizeof(Oid));
numSpcs = 0;
foreach(l, namelist) foreach(l, namelist)
{ {
char *curname = (char *) lfirst(l); char *curname = (char *) lfirst(l);
Oid curoid;
AclResult aclresult;
/* Allow an empty string (signifying database default) */ /* Allow an empty string (signifying database default) */
if (curname[0] == '\0') if (curname[0] == '\0')
{
tblSpcs[numSpcs++] = InvalidOid;
continue; continue;
}
/* Else verify that name is a valid tablespace name */ /* Else verify that name is a valid tablespace name */
if (get_tablespace_oid(curname) == InvalidOid) curoid = get_tablespace_oid(curname);
ereport(ERROR, if (curoid == InvalidOid)
(errcode(ERRCODE_UNDEFINED_OBJECT), {
errmsg("tablespace \"%s\" does not exist", /*
curname))); * In an interactive SET command, we ereport for bad info.
* Otherwise, silently ignore any bad list elements.
*/
if (source >= PGC_S_INTERACTIVE)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("tablespace \"%s\" does not exist",
curname)));
continue;
}
/*
* Allow explicit specification of database's default tablespace
* in temp_tablespaces without triggering permissions checks.
*/
if (curoid == MyDatabaseTableSpace)
{
tblSpcs[numSpcs++] = InvalidOid;
continue;
}
/* Check permissions similarly */
aclresult = pg_tablespace_aclcheck(curoid, GetUserId(),
ACL_CREATE);
if (aclresult != ACLCHECK_OK)
{
if (source >= PGC_S_INTERACTIVE)
aclcheck_error(aclresult, ACL_KIND_TABLESPACE, curname);
continue;
}
tblSpcs[numSpcs++] = curoid;
} }
/* If actively "doing it", give the new list to fd.c */
if (doit)
SetTempTablespaces(tblSpcs, numSpcs);
else
pfree(tblSpcs);
} }
pfree(rawname); pfree(rawname);
...@@ -1002,69 +1059,34 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source) ...@@ -1002,69 +1059,34 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source)
} }
/* /*
* GetTempTablespace -- get the OID of the next temp tablespace to use * PrepareTempTablespaces -- prepare to use temp tablespaces
*
* May return InvalidOid to indicate "use the database's default tablespace".
* *
* This is different from GetDefaultTablespace(true) in just two ways: * If we have not already done so in the current transaction, parse the
* 1. We check privileges here instead of leaving it to the caller. * temp_tablespaces GUC variable and tell fd.c which tablespace(s) to use
* 2. It's safe to call this outside a transaction (we just return InvalidOid). * for temp files.
* The transaction state check is used so that this can be called from
* low-level places that might conceivably run outside a transaction.
*/ */
Oid void
GetTempTablespace(void) PrepareTempTablespaces(void)
{
Oid result;
/* Can't do catalog access unless within a transaction */
if (!IsTransactionState())
return InvalidOid;
/* OK, select a temp tablespace */
result = getTempTablespace();
/* Check permissions except when using database's default */
if (OidIsValid(result))
{
AclResult aclresult;
aclresult = pg_tablespace_aclcheck(result, GetUserId(),
ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
get_tablespace_name(result));
}
return result;
}
/*
* getTempTablespace -- get the OID of the next temp tablespace to use
*
* This has exactly the API defined for GetDefaultTablespace(true),
* in particular that caller is responsible for permissions checks.
*
* This exists to hide (and possibly optimize the use of) the
* temp_tablespaces GUC variable.
*/
static Oid
getTempTablespace(void)
{ {
Oid result;
char *rawname; char *rawname;
List *namelist; List *namelist;
int nnames; Oid *tblSpcs;
char *curname; int numSpcs;
ListCell *l;
if (temp_tablespaces == NULL) /* No work if already done in current transaction */
return InvalidOid; if (TempTablespacesAreSet())
return;
/* /*
* We re-parse the string on each call; this is a bit expensive, but * Can't do catalog access unless within a transaction. This is just
* we don't expect this function will be called many times per query, * a safety check in case this function is called by low-level code that
* so it's probably not worth being tenser. * could conceivably execute outside a transaction. Note that in such
* a scenario, fd.c will fall back to using the current database's default
* tablespace, which should always be OK.
*/ */
if (!IsTransactionState())
return;
/* Need a modifiable copy of string */ /* Need a modifiable copy of string */
rawname = pstrdup(temp_tablespaces); rawname = pstrdup(temp_tablespaces);
...@@ -1073,51 +1095,60 @@ getTempTablespace(void) ...@@ -1073,51 +1095,60 @@ getTempTablespace(void)
if (!SplitIdentifierString(rawname, ',', &namelist)) if (!SplitIdentifierString(rawname, ',', &namelist))
{ {
/* syntax error in name list */ /* syntax error in name list */
SetTempTablespaces(NULL, 0);
pfree(rawname); pfree(rawname);
list_free(namelist); list_free(namelist);
return InvalidOid; return;
} }
nnames = list_length(namelist);
/* Fast path for temp_tablespaces == "" */ /* Store tablespace OIDs in an array in TopTransactionContext */
if (nnames == 0) tblSpcs = (Oid *) MemoryContextAlloc(TopTransactionContext,
list_length(namelist) * sizeof(Oid));
numSpcs = 0;
foreach(l, namelist)
{ {
pfree(rawname); char *curname = (char *) lfirst(l);
list_free(namelist); Oid curoid;
return InvalidOid; AclResult aclresult;
}
/* Select a random element */ /* Allow an empty string (signifying database default) */
if (nnames == 1) /* no need for a random() call */ if (curname[0] == '\0')
curname = (char *) linitial(namelist); {
else tblSpcs[numSpcs++] = InvalidOid;
curname = (char *) list_nth(namelist, random() % nnames); continue;
}
/* /* Else verify that name is a valid tablespace name */
* Empty string means "database's default", else look up the tablespace. curoid = get_tablespace_oid(curname);
* if (curoid == InvalidOid)
* It is tempting to cache this lookup for more speed, but then we would {
* fail to detect the case where the tablespace was dropped since the GUC /* Silently ignore any bad list elements */
* variable was set. Note also that we don't complain if the value fails continue;
* to refer to an existing tablespace; we just silently return InvalidOid, }
* causing the new object to be created in the database's tablespace.
*/
if (curname[0] == '\0')
result = InvalidOid;
else
result = get_tablespace_oid(curname);
/* /*
* Allow explicit specification of database's default tablespace in * Allow explicit specification of database's default tablespace
* temp_tablespaces without triggering permissions checks. * in temp_tablespaces without triggering permissions checks.
*/ */
if (result == MyDatabaseTableSpace) if (curoid == MyDatabaseTableSpace)
result = InvalidOid; {
tblSpcs[numSpcs++] = InvalidOid;
continue;
}
/* Check permissions similarly */
aclresult = pg_tablespace_aclcheck(curoid, GetUserId(),
ACL_CREATE);
if (aclresult != ACLCHECK_OK)
continue;
tblSpcs[numSpcs++] = curoid;
}
SetTempTablespaces(tblSpcs, numSpcs);
pfree(rawname); pfree(rawname);
list_free(namelist); list_free(namelist);
return result;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.113 2007/06/03 17:07:14 tgl Exp $ * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.114 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -267,7 +267,6 @@ ExecHashTableCreate(Hash *node, List *hashOperators) ...@@ -267,7 +267,6 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
hashtable->totalTuples = 0; hashtable->totalTuples = 0;
hashtable->innerBatchFile = NULL; hashtable->innerBatchFile = NULL;
hashtable->outerBatchFile = NULL; hashtable->outerBatchFile = NULL;
hashtable->hashTblSpc = InvalidOid;
hashtable->spaceUsed = 0; hashtable->spaceUsed = 0;
hashtable->spaceAllowed = work_mem * 1024L; hashtable->spaceAllowed = work_mem * 1024L;
...@@ -327,8 +326,8 @@ ExecHashTableCreate(Hash *node, List *hashOperators) ...@@ -327,8 +326,8 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
hashtable->outerBatchFile = (BufFile **) hashtable->outerBatchFile = (BufFile **)
palloc0(nbatch * sizeof(BufFile *)); palloc0(nbatch * sizeof(BufFile *));
/* The files will not be opened until needed... */ /* The files will not be opened until needed... */
/* ... but we want to choose the tablespace only once */ /* ... but make sure we have temp tablespaces established for them */
hashtable->hashTblSpc = GetTempTablespace(); PrepareTempTablespaces();
} }
/* /*
...@@ -510,8 +509,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable) ...@@ -510,8 +509,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
palloc0(nbatch * sizeof(BufFile *)); palloc0(nbatch * sizeof(BufFile *));
hashtable->outerBatchFile = (BufFile **) hashtable->outerBatchFile = (BufFile **)
palloc0(nbatch * sizeof(BufFile *)); palloc0(nbatch * sizeof(BufFile *));
/* time to choose the tablespace, too */ /* time to establish the temp tablespaces, too */
hashtable->hashTblSpc = GetTempTablespace(); PrepareTempTablespaces();
} }
else else
{ {
...@@ -564,8 +563,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable) ...@@ -564,8 +563,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
{ {
/* dump it out */ /* dump it out */
Assert(batchno > curbatch); Assert(batchno > curbatch);
ExecHashJoinSaveTuple(hashtable, ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),
HJTUPLE_MINTUPLE(tuple),
tuple->hashvalue, tuple->hashvalue,
&hashtable->innerBatchFile[batchno]); &hashtable->innerBatchFile[batchno]);
/* and remove from hash table */ /* and remove from hash table */
...@@ -657,8 +655,7 @@ ExecHashTableInsert(HashJoinTable hashtable, ...@@ -657,8 +655,7 @@ ExecHashTableInsert(HashJoinTable hashtable,
* put the tuple into a temp file for later batches * put the tuple into a temp file for later batches
*/ */
Assert(batchno > hashtable->curbatch); Assert(batchno > hashtable->curbatch);
ExecHashJoinSaveTuple(hashtable, ExecHashJoinSaveTuple(tuple,
tuple,
hashvalue, hashvalue,
&hashtable->innerBatchFile[batchno]); &hashtable->innerBatchFile[batchno]);
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.90 2007/06/03 17:07:26 tgl Exp $ * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.91 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -223,8 +223,7 @@ ExecHashJoin(HashJoinState *node) ...@@ -223,8 +223,7 @@ ExecHashJoin(HashJoinState *node)
* in the corresponding outer-batch file. * in the corresponding outer-batch file.
*/ */
Assert(batchno > hashtable->curbatch); Assert(batchno > hashtable->curbatch);
ExecHashJoinSaveTuple(hashtable, ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
ExecFetchSlotMinimalTuple(outerTupleSlot),
hashvalue, hashvalue,
&hashtable->outerBatchFile[batchno]); &hashtable->outerBatchFile[batchno]);
node->hj_NeedNewOuter = true; node->hj_NeedNewOuter = true;
...@@ -755,8 +754,7 @@ start_over: ...@@ -755,8 +754,7 @@ start_over:
* will get messed up. * will get messed up.
*/ */
void void
ExecHashJoinSaveTuple(HashJoinTable hashtable, ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
MinimalTuple tuple, uint32 hashvalue,
BufFile **fileptr) BufFile **fileptr)
{ {
BufFile *file = *fileptr; BufFile *file = *fileptr;
...@@ -765,7 +763,7 @@ ExecHashJoinSaveTuple(HashJoinTable hashtable, ...@@ -765,7 +763,7 @@ ExecHashJoinSaveTuple(HashJoinTable hashtable,
if (file == NULL) if (file == NULL)
{ {
/* First write to this batch file, so open it. */ /* First write to this batch file, so open it. */
file = BufFileCreateTemp(false, hashtable->hashTblSpc); file = BufFileCreateTemp(false);
*fileptr = file; *fileptr = file;
} }
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.27 2007/06/03 17:07:30 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.28 2007/06/07 19:19:57 tgl Exp $
* *
* NOTES: * NOTES:
* *
...@@ -41,6 +41,8 @@ ...@@ -41,6 +41,8 @@
* The maximum safe file size is presumed to be RELSEG_SIZE * BLCKSZ. * The maximum safe file size is presumed to be RELSEG_SIZE * BLCKSZ.
* Note we adhere to this limit whether or not LET_OS_MANAGE_FILESIZE * Note we adhere to this limit whether or not LET_OS_MANAGE_FILESIZE
* is defined, although md.c ignores it when that symbol is defined. * is defined, although md.c ignores it when that symbol is defined.
* The reason for doing this is that we'd like large temporary BufFiles
* to be spread across multiple tablespaces when available.
*/ */
#define MAX_PHYSICAL_FILESIZE (RELSEG_SIZE * BLCKSZ) #define MAX_PHYSICAL_FILESIZE (RELSEG_SIZE * BLCKSZ)
...@@ -60,7 +62,6 @@ struct BufFile ...@@ -60,7 +62,6 @@ struct BufFile
* offsets[i] is the current seek position of files[i]. We use this to * offsets[i] is the current seek position of files[i]. We use this to
* avoid making redundant FileSeek calls. * avoid making redundant FileSeek calls.
*/ */
Oid tblspcOid; /* tablespace to use (InvalidOid = default) */
bool isTemp; /* can only add files if this is TRUE */ bool isTemp; /* can only add files if this is TRUE */
bool isInterXact; /* keep open over transactions? */ bool isInterXact; /* keep open over transactions? */
...@@ -86,7 +87,7 @@ static int BufFileFlush(BufFile *file); ...@@ -86,7 +87,7 @@ static int BufFileFlush(BufFile *file);
/* /*
* Create a BufFile given the first underlying physical file. * Create a BufFile given the first underlying physical file.
* NOTE: caller must set tblspcOid, isTemp, isInterXact if appropriate. * NOTE: caller must set isTemp and isInterXact if appropriate.
*/ */
static BufFile * static BufFile *
makeBufFile(File firstfile) makeBufFile(File firstfile)
...@@ -98,7 +99,6 @@ makeBufFile(File firstfile) ...@@ -98,7 +99,6 @@ makeBufFile(File firstfile)
file->files[0] = firstfile; file->files[0] = firstfile;
file->offsets = (long *) palloc(sizeof(long)); file->offsets = (long *) palloc(sizeof(long));
file->offsets[0] = 0L; file->offsets[0] = 0L;
file->tblspcOid = InvalidOid;
file->isTemp = false; file->isTemp = false;
file->isInterXact = false; file->isInterXact = false;
file->dirty = false; file->dirty = false;
...@@ -119,7 +119,7 @@ extendBufFile(BufFile *file) ...@@ -119,7 +119,7 @@ extendBufFile(BufFile *file)
File pfile; File pfile;
Assert(file->isTemp); Assert(file->isTemp);
pfile = OpenTemporaryFile(file->isInterXact, file->tblspcOid); pfile = OpenTemporaryFile(file->isInterXact);
Assert(pfile >= 0); Assert(pfile >= 0);
file->files = (File *) repalloc(file->files, file->files = (File *) repalloc(file->files,
...@@ -137,23 +137,21 @@ extendBufFile(BufFile *file) ...@@ -137,23 +137,21 @@ extendBufFile(BufFile *file)
* written to it). * written to it).
* *
* If interXact is true, the temp file will not be automatically deleted * If interXact is true, the temp file will not be automatically deleted
* at end of transaction. If tblspcOid is not InvalidOid, the temp file * at end of transaction.
* is created in the specified tablespace instead of the default one.
* *
* Note: if interXact is true, the caller had better be calling us in a * Note: if interXact is true, the caller had better be calling us in a
* memory context that will survive across transaction boundaries. * memory context that will survive across transaction boundaries.
*/ */
BufFile * BufFile *
BufFileCreateTemp(bool interXact, Oid tblspcOid) BufFileCreateTemp(bool interXact)
{ {
BufFile *file; BufFile *file;
File pfile; File pfile;
pfile = OpenTemporaryFile(interXact, tblspcOid); pfile = OpenTemporaryFile(interXact);
Assert(pfile >= 0); Assert(pfile >= 0);
file = makeBufFile(pfile); file = makeBufFile(pfile);
file->tblspcOid = tblspcOid;
file->isTemp = true; file->isTemp = true;
file->isInterXact = interXact; file->isInterXact = interXact;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.138 2007/06/03 17:07:31 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.139 2007/06/07 19:19:57 tgl Exp $
* *
* NOTES: * NOTES:
* *
...@@ -185,6 +185,14 @@ static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS]; ...@@ -185,6 +185,14 @@ static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS];
*/ */
static long tempFileCounter = 0; static long tempFileCounter = 0;
/*
* Array of OIDs of temp tablespaces. When numTempTableSpaces is -1,
* this has not been set in the current transaction.
*/
static Oid *tempTableSpaces = NULL;
static int numTempTableSpaces = -1;
static int nextTempTableSpace = 0;
/*-------------------- /*--------------------
* *
...@@ -840,21 +848,28 @@ PathNameOpenFile(FileName fileName, int fileFlags, int fileMode) ...@@ -840,21 +848,28 @@ PathNameOpenFile(FileName fileName, int fileFlags, int fileMode)
* that created them, so this should be false -- but if you need * that created them, so this should be false -- but if you need
* "somewhat" temporary storage, this might be useful. In either case, * "somewhat" temporary storage, this might be useful. In either case,
* the file is removed when the File is explicitly closed. * the file is removed when the File is explicitly closed.
*
* tblspcOid: the Oid of the tablespace where the temp file should be created.
* If InvalidOid, or if the tablespace can't be found, we silently fall back
* to the database's default tablespace.
*/ */
File File
OpenTemporaryFile(bool interXact, Oid tblspcOid) OpenTemporaryFile(bool interXact)
{ {
File file = 0; File file = 0;
/* /*
* If caller specified a tablespace, try to create there. * If some temp tablespace(s) have been given to us, try to use the next
* one. If a given tablespace can't be found, we silently fall back
* to the database's default tablespace.
*
* BUT: if the temp file is slated to outlive the current transaction,
* force it into the database's default tablespace, so that it will
* not pose a threat to possible tablespace drop attempts.
*/ */
if (OidIsValid(tblspcOid)) if (numTempTableSpaces > 0 && !interXact)
file = OpenTemporaryFileInTablespace(tblspcOid, false); {
Oid tblspcOid = GetNextTempTableSpace();
if (OidIsValid(tblspcOid))
file = OpenTemporaryFileInTablespace(tblspcOid, false);
}
/* /*
* If not, or if tablespace is bad, create in database's default * If not, or if tablespace is bad, create in database's default
...@@ -1530,6 +1545,69 @@ closeAllVfds(void) ...@@ -1530,6 +1545,69 @@ closeAllVfds(void)
} }
} }
/*
* SetTempTablespaces
*
* Define a list (actually an array) of OIDs of tablespaces to use for
* temporary files. This list will be used until end of transaction,
* unless this function is called again before then. It is caller's
* responsibility that the passed-in array has adequate lifespan (typically
* it'd be allocated in TopTransactionContext).
*/
void
SetTempTablespaces(Oid *tableSpaces, int numSpaces)
{
Assert(numSpaces >= 0);
tempTableSpaces = tableSpaces;
numTempTableSpaces = numSpaces;
/*
* Select a random starting point in the list. This is to minimize
* conflicts between backends that are most likely sharing the same
* list of temp tablespaces. Note that if we create multiple temp
* files in the same transaction, we'll advance circularly through
* the list --- this ensures that large temporary sort files are
* nicely spread across all available tablespaces.
*/
if (numSpaces > 1)
nextTempTableSpace = random() % numSpaces;
else
nextTempTableSpace = 0;
}
/*
* TempTablespacesAreSet
*
* Returns TRUE if SetTempTablespaces has been called in current transaction.
* (This is just so that tablespaces.c doesn't need its own per-transaction
* state.)
*/
bool
TempTablespacesAreSet(void)
{
return (numTempTableSpaces >= 0);
}
/*
* GetNextTempTableSpace
*
* Select the next temp tablespace to use. A result of InvalidOid means
* to use the current database's default tablespace.
*/
Oid
GetNextTempTableSpace(void)
{
if (numTempTableSpaces > 0)
{
/* Advance nextTempTableSpace counter with wraparound */
if (++nextTempTableSpace >= numTempTableSpaces)
nextTempTableSpace = 0;
return tempTableSpaces[nextTempTableSpace];
}
return InvalidOid;
}
/* /*
* AtEOSubXact_Files * AtEOSubXact_Files
* *
...@@ -1583,11 +1661,14 @@ AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid, ...@@ -1583,11 +1661,14 @@ AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
* particularly care which). All still-open per-transaction temporary file * particularly care which). All still-open per-transaction temporary file
* VFDs are closed, which also causes the underlying files to be * VFDs are closed, which also causes the underlying files to be
* deleted. Furthermore, all "allocated" stdio files are closed. * deleted. Furthermore, all "allocated" stdio files are closed.
* We also forget any transaction-local temp tablespace list.
*/ */
void void
AtEOXact_Files(void) AtEOXact_Files(void)
{ {
CleanupTempFiles(false); CleanupTempFiles(false);
tempTableSpaces = NULL;
numTempTableSpaces = -1;
} }
/* /*
......
...@@ -70,14 +70,13 @@ ...@@ -70,14 +70,13 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.24 2007/06/03 17:08:23 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.25 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include "commands/tablespace.h"
#include "storage/buffile.h" #include "storage/buffile.h"
#include "utils/logtape.h" #include "utils/logtape.h"
...@@ -529,7 +528,7 @@ LogicalTapeSetCreate(int ntapes) ...@@ -529,7 +528,7 @@ LogicalTapeSetCreate(int ntapes)
Assert(ntapes > 0); Assert(ntapes > 0);
lts = (LogicalTapeSet *) palloc(sizeof(LogicalTapeSet) + lts = (LogicalTapeSet *) palloc(sizeof(LogicalTapeSet) +
(ntapes - 1) *sizeof(LogicalTape)); (ntapes - 1) *sizeof(LogicalTape));
lts->pfile = BufFileCreateTemp(false, GetTempTablespace()); lts->pfile = BufFileCreateTemp(false);
lts->nFileBlocks = 0L; lts->nFileBlocks = 0L;
lts->forgetFreeSpace = false; lts->forgetFreeSpace = false;
lts->blocksSorted = true; /* a zero-length array is sorted ... */ lts->blocksSorted = true; /* a zero-length array is sorted ... */
......
...@@ -91,7 +91,7 @@ ...@@ -91,7 +91,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.76 2007/05/04 21:29:53 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.77 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -104,6 +104,7 @@ ...@@ -104,6 +104,7 @@
#include "access/nbtree.h" #include "access/nbtree.h"
#include "catalog/pg_amop.h" #include "catalog/pg_amop.h"
#include "catalog/pg_operator.h" #include "catalog/pg_operator.h"
#include "commands/tablespace.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/datum.h" #include "utils/datum.h"
#include "utils/logtape.h" #include "utils/logtape.h"
...@@ -1479,6 +1480,12 @@ inittapes(Tuplesortstate *state) ...@@ -1479,6 +1480,12 @@ inittapes(Tuplesortstate *state)
if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem)
USEMEM(state, tapeSpace); USEMEM(state, tapeSpace);
/*
* Make sure that the temp file(s) underlying the tape set are created in
* suitable temp tablespaces.
*/
PrepareTempTablespaces();
/* /*
* Create the tape set and allocate the per-tape data arrays. * Create the tape set and allocate the per-tape data arrays.
*/ */
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.32 2007/06/03 17:08:26 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.33 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -424,15 +424,11 @@ tuplestore_puttuple_common(Tuplestorestate *state, void *tuple) ...@@ -424,15 +424,11 @@ tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
return; return;
/* /*
* Nope; time to switch to tape-based operation. * Nope; time to switch to tape-based operation. Make sure that
* * the temp file(s) are created in suitable temp tablespaces.
* If the temp table is slated to outlive the current transaction,
* force it into my database's default tablespace, so that it will
* not pose a threat to possible tablespace drop attempts.
*/ */
state->myfile = BufFileCreateTemp(state->interXact, PrepareTempTablespaces();
state->interXact ? InvalidOid : state->myfile = BufFileCreateTemp(state->interXact);
GetTempTablespace());
state->status = TSS_WRITEFILE; state->status = TSS_WRITEFILE;
dumptuples(state); dumptuples(state);
break; break;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/commands/tablespace.h,v 1.17 2007/06/03 17:08:29 tgl Exp $ * $PostgreSQL: pgsql/src/include/commands/tablespace.h,v 1.18 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -41,7 +41,8 @@ extern void AlterTableSpaceOwner(const char *name, Oid newOwnerId); ...@@ -41,7 +41,8 @@ extern void AlterTableSpaceOwner(const char *name, Oid newOwnerId);
extern void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo); extern void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo);
extern Oid GetDefaultTablespace(bool forTemp); extern Oid GetDefaultTablespace(bool forTemp);
extern Oid GetTempTablespace(void);
extern void PrepareTempTablespaces(void);
extern Oid get_tablespace_oid(const char *tablespacename); extern Oid get_tablespace_oid(const char *tablespacename);
extern char *get_tablespace_name(Oid spc_oid); extern char *get_tablespace_name(Oid spc_oid);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.46 2007/06/03 17:08:32 tgl Exp $ * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.47 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -102,8 +102,6 @@ typedef struct HashJoinTableData ...@@ -102,8 +102,6 @@ typedef struct HashJoinTableData
BufFile **innerBatchFile; /* buffered virtual temp file per batch */ BufFile **innerBatchFile; /* buffered virtual temp file per batch */
BufFile **outerBatchFile; /* buffered virtual temp file per batch */ BufFile **outerBatchFile; /* buffered virtual temp file per batch */
Oid hashTblSpc; /* tablespace to put temp files in */
/* /*
* Info about the datatype-specific hash functions for the datatypes being * Info about the datatype-specific hash functions for the datatypes being
* hashed. These are arrays of the same length as the number of hash join * hashed. These are arrays of the same length as the number of hash join
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.35 2007/06/03 17:08:32 tgl Exp $ * $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.36 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,8 +23,7 @@ extern TupleTableSlot *ExecHashJoin(HashJoinState *node); ...@@ -23,8 +23,7 @@ extern TupleTableSlot *ExecHashJoin(HashJoinState *node);
extern void ExecEndHashJoin(HashJoinState *node); extern void ExecEndHashJoin(HashJoinState *node);
extern void ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt); extern void ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt);
extern void ExecHashJoinSaveTuple(HashJoinTable hashtable, extern void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr);
BufFile **fileptr);
#endif /* NODEHASHJOIN_H */ #endif /* NODEHASHJOIN_H */
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/buffile.h,v 1.21 2007/06/03 17:08:32 tgl Exp $ * $PostgreSQL: pgsql/src/include/storage/buffile.h,v 1.22 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -34,7 +34,7 @@ typedef struct BufFile BufFile; ...@@ -34,7 +34,7 @@ typedef struct BufFile BufFile;
* prototypes for functions in buffile.c * prototypes for functions in buffile.c
*/ */
extern BufFile *BufFileCreateTemp(bool interXact, Oid tblspcOid); extern BufFile *BufFileCreateTemp(bool interXact);
extern void BufFileClose(BufFile *file); extern void BufFileClose(BufFile *file);
extern size_t BufFileRead(BufFile *file, void *ptr, size_t size); extern size_t BufFileRead(BufFile *file, void *ptr, size_t size);
extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size); extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.58 2007/06/03 17:08:33 tgl Exp $ * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.59 2007/06/07 19:19:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -60,7 +60,7 @@ extern int max_files_per_process; ...@@ -60,7 +60,7 @@ extern int max_files_per_process;
/* Operations on virtual Files --- equivalent to Unix kernel file ops */ /* Operations on virtual Files --- equivalent to Unix kernel file ops */
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode); extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
extern File OpenTemporaryFile(bool interXact, Oid tblspcOid); extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file); extern void FileClose(File file);
extern void FileUnlink(File file); extern void FileUnlink(File file);
extern int FileRead(File file, char *buffer, int amount); extern int FileRead(File file, char *buffer, int amount);
...@@ -85,10 +85,14 @@ extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode); ...@@ -85,10 +85,14 @@ extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
extern void InitFileAccess(void); extern void InitFileAccess(void);
extern void set_max_safe_fds(void); extern void set_max_safe_fds(void);
extern void closeAllVfds(void); extern void closeAllVfds(void);
extern void SetTempTablespaces(Oid *tableSpaces, int numSpaces);
extern bool TempTablespacesAreSet(void);
extern Oid GetNextTempTableSpace(void);
extern void AtEOXact_Files(void); extern void AtEOXact_Files(void);
extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid, extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
SubTransactionId parentSubid); SubTransactionId parentSubid);
extern void RemovePgTempFiles(void); extern void RemovePgTempFiles(void);
extern int pg_fsync(int fd); extern int pg_fsync(int fd);
extern int pg_fsync_no_writethrough(int fd); extern int pg_fsync_no_writethrough(int fd);
extern int pg_fsync_writethrough(int fd); extern int pg_fsync_writethrough(int fd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment