Commit e5adcb78 authored by Peter Geoghegan's avatar Peter Geoghegan

Refactor nbtree insertion scankeys.

Use dedicated struct to represent nbtree insertion scan keys.  Having a
dedicated struct makes the difference between search type scankeys and
insertion scankeys a lot clearer, and simplifies the signature of
several related functions.  This is based on a suggestion by Andrey
Lepikhov.

Streamline how unique index insertions cache binary search progress.
Cache the state of in-progress binary searches within _bt_check_unique()
for later instead of having callers avoid repeating the binary search in
an ad-hoc manner.  This makes it easy to add a new optimization:
_bt_check_unique() now falls out of its loop immediately in the common
case where it's already clear that there couldn't possibly be a
duplicate.

The new _bt_check_unique() scheme makes it a lot easier to manage cached
binary search effort afterwards, from within _bt_findinsertloc().  This
is needed for the upcoming patch to make nbtree tuples unique by
treating heap TID as a final tiebreaker column.  Unique key binary
searches need to restore lower and upper bounds.  They cannot simply
continue to use the >= lower bound as the offset to insert at, because
the heap TID tiebreaker column must be used in comparisons for the
restored binary search (unlike the original _bt_check_unique() binary
search, where scankey's heap TID column must be omitted).

Author: Peter Geoghegan, Heikki Linnakangas
Reviewed-By: Heikki Linnakangas, Andrey Lepikhov
Discussion: https://postgr.es/m/CAH2-WzmE6AhUdk9NdWBf4K3HjWXZBX3+umC7mH7+WDrKcRtsOw@mail.gmail.com
parent 550b9d26
...@@ -127,9 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel, ...@@ -127,9 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state, static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
BtreeLevel level); BtreeLevel level);
static void bt_target_page_check(BtreeCheckState *state); static void bt_target_page_check(BtreeCheckState *state);
static ScanKey bt_right_page_check_scankey(BtreeCheckState *state); static BTScanInsert bt_right_page_check_scankey(BtreeCheckState *state);
static void bt_downlink_check(BtreeCheckState *state, BlockNumber childblock, static void bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey,
ScanKey targetkey); BlockNumber childblock);
static void bt_downlink_missing_check(BtreeCheckState *state); static void bt_downlink_missing_check(BtreeCheckState *state);
static void bt_tuple_present_callback(Relation index, HeapTuple htup, static void bt_tuple_present_callback(Relation index, HeapTuple htup,
Datum *values, bool *isnull, Datum *values, bool *isnull,
...@@ -139,14 +139,14 @@ static IndexTuple bt_normalize_tuple(BtreeCheckState *state, ...@@ -139,14 +139,14 @@ static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
static inline bool offset_is_negative_infinity(BTPageOpaque opaque, static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
OffsetNumber offset); OffsetNumber offset);
static inline bool invariant_leq_offset(BtreeCheckState *state, static inline bool invariant_leq_offset(BtreeCheckState *state,
ScanKey key, BTScanInsert key,
OffsetNumber upperbound); OffsetNumber upperbound);
static inline bool invariant_geq_offset(BtreeCheckState *state, static inline bool invariant_geq_offset(BtreeCheckState *state,
ScanKey key, BTScanInsert key,
OffsetNumber lowerbound); OffsetNumber lowerbound);
static inline bool invariant_leq_nontarget_offset(BtreeCheckState *state, static inline bool invariant_leq_nontarget_offset(BtreeCheckState *state,
Page other, BTScanInsert key,
ScanKey key, Page nontarget,
OffsetNumber upperbound); OffsetNumber upperbound);
static Page palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum); static Page palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum);
...@@ -838,8 +838,8 @@ bt_target_page_check(BtreeCheckState *state) ...@@ -838,8 +838,8 @@ bt_target_page_check(BtreeCheckState *state)
{ {
ItemId itemid; ItemId itemid;
IndexTuple itup; IndexTuple itup;
ScanKey skey;
size_t tupsize; size_t tupsize;
BTScanInsert skey;
CHECK_FOR_INTERRUPTS(); CHECK_FOR_INTERRUPTS();
...@@ -1030,7 +1030,7 @@ bt_target_page_check(BtreeCheckState *state) ...@@ -1030,7 +1030,7 @@ bt_target_page_check(BtreeCheckState *state)
*/ */
else if (offset == max) else if (offset == max)
{ {
ScanKey rightkey; BTScanInsert rightkey;
/* Get item in next/right page */ /* Get item in next/right page */
rightkey = bt_right_page_check_scankey(state); rightkey = bt_right_page_check_scankey(state);
...@@ -1082,7 +1082,7 @@ bt_target_page_check(BtreeCheckState *state) ...@@ -1082,7 +1082,7 @@ bt_target_page_check(BtreeCheckState *state)
{ {
BlockNumber childblock = BTreeInnerTupleGetDownLink(itup); BlockNumber childblock = BTreeInnerTupleGetDownLink(itup);
bt_downlink_check(state, childblock, skey); bt_downlink_check(state, skey, childblock);
} }
} }
...@@ -1111,11 +1111,12 @@ bt_target_page_check(BtreeCheckState *state) ...@@ -1111,11 +1111,12 @@ bt_target_page_check(BtreeCheckState *state)
* Note that !readonly callers must reverify that target page has not * Note that !readonly callers must reverify that target page has not
* been concurrently deleted. * been concurrently deleted.
*/ */
static ScanKey static BTScanInsert
bt_right_page_check_scankey(BtreeCheckState *state) bt_right_page_check_scankey(BtreeCheckState *state)
{ {
BTPageOpaque opaque; BTPageOpaque opaque;
ItemId rightitem; ItemId rightitem;
IndexTuple firstitup;
BlockNumber targetnext; BlockNumber targetnext;
Page rightpage; Page rightpage;
OffsetNumber nline; OffsetNumber nline;
...@@ -1303,8 +1304,8 @@ bt_right_page_check_scankey(BtreeCheckState *state) ...@@ -1303,8 +1304,8 @@ bt_right_page_check_scankey(BtreeCheckState *state)
* Return first real item scankey. Note that this relies on right page * Return first real item scankey. Note that this relies on right page
* memory remaining allocated. * memory remaining allocated.
*/ */
return _bt_mkscankey(state->rel, firstitup = (IndexTuple) PageGetItem(rightpage, rightitem);
(IndexTuple) PageGetItem(rightpage, rightitem)); return _bt_mkscankey(state->rel, firstitup);
} }
/* /*
...@@ -1317,8 +1318,8 @@ bt_right_page_check_scankey(BtreeCheckState *state) ...@@ -1317,8 +1318,8 @@ bt_right_page_check_scankey(BtreeCheckState *state)
* verification this way around is much more practical. * verification this way around is much more practical.
*/ */
static void static void
bt_downlink_check(BtreeCheckState *state, BlockNumber childblock, bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey,
ScanKey targetkey) BlockNumber childblock)
{ {
OffsetNumber offset; OffsetNumber offset;
OffsetNumber maxoffset; OffsetNumber maxoffset;
...@@ -1423,8 +1424,7 @@ bt_downlink_check(BtreeCheckState *state, BlockNumber childblock, ...@@ -1423,8 +1424,7 @@ bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
if (offset_is_negative_infinity(copaque, offset)) if (offset_is_negative_infinity(copaque, offset))
continue; continue;
if (!invariant_leq_nontarget_offset(state, child, if (!invariant_leq_nontarget_offset(state, targetkey, child, offset))
targetkey, offset))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED), (errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("down-link lower bound invariant violated for index \"%s\"", errmsg("down-link lower bound invariant violated for index \"%s\"",
...@@ -1864,13 +1864,12 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset) ...@@ -1864,13 +1864,12 @@ offset_is_negative_infinity(BTPageOpaque opaque, OffsetNumber offset)
* to corruption. * to corruption.
*/ */
static inline bool static inline bool
invariant_leq_offset(BtreeCheckState *state, ScanKey key, invariant_leq_offset(BtreeCheckState *state, BTScanInsert key,
OffsetNumber upperbound) OffsetNumber upperbound)
{ {
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
int32 cmp; int32 cmp;
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, upperbound); cmp = _bt_compare(state->rel, key, state->target, upperbound);
return cmp <= 0; return cmp <= 0;
} }
...@@ -1883,13 +1882,12 @@ invariant_leq_offset(BtreeCheckState *state, ScanKey key, ...@@ -1883,13 +1882,12 @@ invariant_leq_offset(BtreeCheckState *state, ScanKey key,
* to corruption. * to corruption.
*/ */
static inline bool static inline bool
invariant_geq_offset(BtreeCheckState *state, ScanKey key, invariant_geq_offset(BtreeCheckState *state, BTScanInsert key,
OffsetNumber lowerbound) OffsetNumber lowerbound)
{ {
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
int32 cmp; int32 cmp;
cmp = _bt_compare(state->rel, nkeyatts, key, state->target, lowerbound); cmp = _bt_compare(state->rel, key, state->target, lowerbound);
return cmp >= 0; return cmp >= 0;
} }
...@@ -1905,14 +1903,12 @@ invariant_geq_offset(BtreeCheckState *state, ScanKey key, ...@@ -1905,14 +1903,12 @@ invariant_geq_offset(BtreeCheckState *state, ScanKey key,
* to corruption. * to corruption.
*/ */
static inline bool static inline bool
invariant_leq_nontarget_offset(BtreeCheckState *state, invariant_leq_nontarget_offset(BtreeCheckState *state, BTScanInsert key,
Page nontarget, ScanKey key, Page nontarget, OffsetNumber upperbound)
OffsetNumber upperbound)
{ {
int16 nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
int32 cmp; int32 cmp;
cmp = _bt_compare(state->rel, nkeyatts, key, nontarget, upperbound); cmp = _bt_compare(state->rel, key, nontarget, upperbound);
return cmp <= 0; return cmp <= 0;
} }
......
...@@ -598,19 +598,22 @@ scankey point to comparison functions that return boolean, such as int4lt. ...@@ -598,19 +598,22 @@ scankey point to comparison functions that return boolean, such as int4lt.
There might be more than one scankey entry for a given index column, or There might be more than one scankey entry for a given index column, or
none at all. (We require the keys to appear in index column order, but none at all. (We require the keys to appear in index column order, but
the order of multiple keys for a given column is unspecified.) An the order of multiple keys for a given column is unspecified.) An
insertion scankey uses the same array-of-ScanKey data structure, but the insertion scankey ("BTScanInsert" data structure) uses a similar
sk_func pointers point to btree comparison support functions (ie, 3-way array-of-ScanKey data structure, but the sk_func pointers point to btree
comparators that return int4 values interpreted as <0, =0, >0). In an comparison support functions (ie, 3-way comparators that return int4 values
insertion scankey there is exactly one entry per index column. Insertion interpreted as <0, =0, >0). In an insertion scankey there is at most one
scankeys are built within the btree code (eg, by _bt_mkscankey()) and are entry per index column. There is also other data about the rules used to
used to locate the starting point of a scan, as well as for locating the locate where to begin the scan, such as whether or not the scan is a
place to insert a new index tuple. (Note: in the case of an insertion "nextkey" scan. Insertion scankeys are built within the btree code (eg, by
scankey built from a search scankey, there might be fewer keys than _bt_mkscankey()) and are used to locate the starting point of a scan, as
index columns, indicating that we have no constraints for the remaining well as for locating the place to insert a new index tuple. (Note: in the
index columns.) After we have located the starting point of a scan, the case of an insertion scankey built from a search scankey or built from a
original search scankey is consulted as each index entry is sequentially truncated pivot tuple, there might be fewer keys than index columns,
scanned to decide whether to return the entry and whether the scan can indicating that we have no constraints for the remaining index columns.)
stop (see _bt_checkkeys()). After we have located the starting point of a scan, the original search
scankey is consulted as each index entry is sequentially scanned to decide
whether to return the entry and whether the scan can stop (see
_bt_checkkeys()).
We use term "pivot" index tuples to distinguish tuples which don't point We use term "pivot" index tuples to distinguish tuples which don't point
to heap tuples, but rather used for tree navigation. Pivot tuples includes to heap tuples, but rather used for tree navigation. Pivot tuples includes
......
This diff is collapsed.
...@@ -1371,7 +1371,7 @@ _bt_pagedel(Relation rel, Buffer buf) ...@@ -1371,7 +1371,7 @@ _bt_pagedel(Relation rel, Buffer buf)
*/ */
if (!stack) if (!stack)
{ {
ScanKey itup_scankey; BTScanInsert itup_key;
ItemId itemid; ItemId itemid;
IndexTuple targetkey; IndexTuple targetkey;
Buffer lbuf; Buffer lbuf;
...@@ -1421,12 +1421,10 @@ _bt_pagedel(Relation rel, Buffer buf) ...@@ -1421,12 +1421,10 @@ _bt_pagedel(Relation rel, Buffer buf)
} }
/* we need an insertion scan key for the search, so build one */ /* we need an insertion scan key for the search, so build one */
itup_scankey = _bt_mkscankey(rel, targetkey); itup_key = _bt_mkscankey(rel, targetkey);
/* find the leftmost leaf page containing this key */ /* get stack to leaf page by searching index */
stack = _bt_search(rel, stack = _bt_search(rel, itup_key, &lbuf, BT_READ, NULL);
IndexRelationGetNumberOfKeyAttributes(rel), /* don't need a lock or second pin on the page */
itup_scankey, false, &lbuf, BT_READ, NULL);
/* don't need a pin on the page */
_bt_relbuf(rel, lbuf); _bt_relbuf(rel, lbuf);
/* /*
......
This diff is collapsed.
...@@ -263,6 +263,7 @@ typedef struct BTWriteState ...@@ -263,6 +263,7 @@ typedef struct BTWriteState
{ {
Relation heap; Relation heap;
Relation index; Relation index;
BTScanInsert inskey; /* generic insertion scankey */
bool btws_use_wal; /* dump pages to WAL? */ bool btws_use_wal; /* dump pages to WAL? */
BlockNumber btws_pages_alloced; /* # pages allocated */ BlockNumber btws_pages_alloced; /* # pages allocated */
BlockNumber btws_pages_written; /* # pages written out */ BlockNumber btws_pages_written; /* # pages written out */
...@@ -540,6 +541,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2) ...@@ -540,6 +541,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
wstate.heap = btspool->heap; wstate.heap = btspool->heap;
wstate.index = btspool->index; wstate.index = btspool->index;
wstate.inskey = _bt_mkscankey(wstate.index, NULL);
/* /*
* We need to log index creation in WAL iff WAL archiving/streaming is * We need to log index creation in WAL iff WAL archiving/streaming is
...@@ -1085,7 +1087,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) ...@@ -1085,7 +1087,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
TupleDesc tupdes = RelationGetDescr(wstate->index); TupleDesc tupdes = RelationGetDescr(wstate->index);
int i, int i,
keysz = IndexRelationGetNumberOfKeyAttributes(wstate->index); keysz = IndexRelationGetNumberOfKeyAttributes(wstate->index);
ScanKey indexScanKey = NULL;
SortSupport sortKeys; SortSupport sortKeys;
if (merge) if (merge)
...@@ -1098,7 +1099,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) ...@@ -1098,7 +1099,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
/* the preparation of merge */ /* the preparation of merge */
itup = tuplesort_getindextuple(btspool->sortstate, true); itup = tuplesort_getindextuple(btspool->sortstate, true);
itup2 = tuplesort_getindextuple(btspool2->sortstate, true); itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
indexScanKey = _bt_mkscankey_nodata(wstate->index);
/* Prepare SortSupport data for each column */ /* Prepare SortSupport data for each column */
sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData)); sortKeys = (SortSupport) palloc0(keysz * sizeof(SortSupportData));
...@@ -1106,7 +1106,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) ...@@ -1106,7 +1106,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
for (i = 0; i < keysz; i++) for (i = 0; i < keysz; i++)
{ {
SortSupport sortKey = sortKeys + i; SortSupport sortKey = sortKeys + i;
ScanKey scanKey = indexScanKey + i; ScanKey scanKey = wstate->inskey->scankeys + i;
int16 strategy; int16 strategy;
sortKey->ssup_cxt = CurrentMemoryContext; sortKey->ssup_cxt = CurrentMemoryContext;
...@@ -1125,8 +1125,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) ...@@ -1125,8 +1125,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey); PrepareSortSupportFromIndexRel(wstate->index, strategy, sortKey);
} }
_bt_freeskey(indexScanKey);
for (;;) for (;;)
{ {
load1 = true; /* load BTSpool next ? */ load1 = true; /* load BTSpool next ? */
......
...@@ -56,34 +56,37 @@ static bool _bt_check_rowcompare(ScanKey skey, ...@@ -56,34 +56,37 @@ static bool _bt_check_rowcompare(ScanKey skey,
* Build an insertion scan key that contains comparison data from itup * Build an insertion scan key that contains comparison data from itup
* as well as comparator routines appropriate to the key datatypes. * as well as comparator routines appropriate to the key datatypes.
* *
* The result is intended for use with _bt_compare(). * Result is intended for use with _bt_compare(). Callers that don't
* need to fill out the insertion scankey arguments (e.g. they use an
* ad-hoc comparison routine) can pass a NULL index tuple.
*/ */
ScanKey BTScanInsert
_bt_mkscankey(Relation rel, IndexTuple itup) _bt_mkscankey(Relation rel, IndexTuple itup)
{ {
BTScanInsert key;
ScanKey skey; ScanKey skey;
TupleDesc itupdesc; TupleDesc itupdesc;
int indnatts PG_USED_FOR_ASSERTS_ONLY;
int indnkeyatts; int indnkeyatts;
int16 *indoption; int16 *indoption;
int tupnatts;
int i; int i;
itupdesc = RelationGetDescr(rel); itupdesc = RelationGetDescr(rel);
indnatts = IndexRelationGetNumberOfAttributes(rel);
indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
indoption = rel->rd_indoption; indoption = rel->rd_indoption;
tupnatts = itup ? BTreeTupleGetNAtts(itup, rel) : 0;
Assert(indnkeyatts > 0); Assert(tupnatts <= IndexRelationGetNumberOfAttributes(rel));
Assert(indnkeyatts <= indnatts);
Assert(BTreeTupleGetNAtts(itup, rel) == indnatts ||
BTreeTupleGetNAtts(itup, rel) == indnkeyatts);
/* /*
* We'll execute search using scan key constructed on key columns. Non-key * We'll execute search using scan key constructed on key columns. Non-key
* (INCLUDE index) columns are always omitted from scan keys. * (INCLUDE index) columns are always omitted from scan keys.
*/ */
skey = (ScanKey) palloc(indnkeyatts * sizeof(ScanKeyData)); key = palloc(offsetof(BTScanInsertData, scankeys) +
sizeof(ScanKeyData) * indnkeyatts);
key->nextkey = false;
key->keysz = Min(indnkeyatts, tupnatts);
skey = key->scankeys;
for (i = 0; i < indnkeyatts; i++) for (i = 0; i < indnkeyatts; i++)
{ {
FmgrInfo *procinfo; FmgrInfo *procinfo;
...@@ -96,56 +99,20 @@ _bt_mkscankey(Relation rel, IndexTuple itup) ...@@ -96,56 +99,20 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
* comparison can be needed. * comparison can be needed.
*/ */
procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
arg = index_getattr(itup, i + 1, itupdesc, &null);
flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT);
ScanKeyEntryInitializeWithInfo(&skey[i],
flags,
(AttrNumber) (i + 1),
InvalidStrategy,
InvalidOid,
rel->rd_indcollation[i],
procinfo,
arg);
}
return skey;
}
/*
* _bt_mkscankey_nodata
* Build an insertion scan key that contains 3-way comparator routines
* appropriate to the key datatypes, but no comparison data. The
* comparison data ultimately used must match the key datatypes.
*
* The result cannot be used with _bt_compare(), unless comparison
* data is first stored into the key entries. Currently this
* routine is only called by nbtsort.c and tuplesort.c, which have
* their own comparison routines.
*/
ScanKey
_bt_mkscankey_nodata(Relation rel)
{
ScanKey skey;
int indnkeyatts;
int16 *indoption;
int i;
indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel);
indoption = rel->rd_indoption;
skey = (ScanKey) palloc(indnkeyatts * sizeof(ScanKeyData));
for (i = 0; i < indnkeyatts; i++)
{
FmgrInfo *procinfo;
int flags;
/* /*
* We can use the cached (default) support procs since no cross-type * Key arguments built when caller provides no tuple are
* comparison can be needed. * defensively represented as NULL values. They should never be
* used.
*/ */
procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); if (i < tupnatts)
flags = SK_ISNULL | (indoption[i] << SK_BT_INDOPTION_SHIFT); arg = index_getattr(itup, i + 1, itupdesc, &null);
else
{
arg = (Datum) 0;
null = true;
}
flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT);
ScanKeyEntryInitializeWithInfo(&skey[i], ScanKeyEntryInitializeWithInfo(&skey[i],
flags, flags,
(AttrNumber) (i + 1), (AttrNumber) (i + 1),
...@@ -153,19 +120,10 @@ _bt_mkscankey_nodata(Relation rel) ...@@ -153,19 +120,10 @@ _bt_mkscankey_nodata(Relation rel)
InvalidOid, InvalidOid,
rel->rd_indcollation[i], rel->rd_indcollation[i],
procinfo, procinfo,
(Datum) 0); arg);
} }
return skey; return key;
}
/*
* free a scan key made by either _bt_mkscankey or _bt_mkscankey_nodata.
*/
void
_bt_freeskey(ScanKey skey)
{
pfree(skey);
} }
/* /*
......
...@@ -884,7 +884,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, ...@@ -884,7 +884,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
{ {
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
randomAccess); randomAccess);
ScanKey indexScanKey; BTScanInsert indexScanKey;
MemoryContext oldcontext; MemoryContext oldcontext;
int i; int i;
...@@ -919,7 +919,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, ...@@ -919,7 +919,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ state->tupDesc = tupDesc; /* assume we need not copy tupDesc */
indexScanKey = _bt_mkscankey_nodata(indexRel); indexScanKey = _bt_mkscankey(indexRel, NULL);
if (state->indexInfo->ii_Expressions != NULL) if (state->indexInfo->ii_Expressions != NULL)
{ {
...@@ -945,7 +945,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, ...@@ -945,7 +945,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
for (i = 0; i < state->nKeys; i++) for (i = 0; i < state->nKeys; i++)
{ {
SortSupport sortKey = state->sortKeys + i; SortSupport sortKey = state->sortKeys + i;
ScanKey scanKey = indexScanKey + i; ScanKey scanKey = indexScanKey->scankeys + i;
int16 strategy; int16 strategy;
sortKey->ssup_cxt = CurrentMemoryContext; sortKey->ssup_cxt = CurrentMemoryContext;
...@@ -964,7 +964,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, ...@@ -964,7 +964,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
} }
_bt_freeskey(indexScanKey); pfree(indexScanKey);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
...@@ -981,7 +981,7 @@ tuplesort_begin_index_btree(Relation heapRel, ...@@ -981,7 +981,7 @@ tuplesort_begin_index_btree(Relation heapRel,
{ {
Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
randomAccess); randomAccess);
ScanKey indexScanKey; BTScanInsert indexScanKey;
MemoryContext oldcontext; MemoryContext oldcontext;
int i; int i;
...@@ -1014,7 +1014,7 @@ tuplesort_begin_index_btree(Relation heapRel, ...@@ -1014,7 +1014,7 @@ tuplesort_begin_index_btree(Relation heapRel,
state->indexRel = indexRel; state->indexRel = indexRel;
state->enforceUnique = enforceUnique; state->enforceUnique = enforceUnique;
indexScanKey = _bt_mkscankey_nodata(indexRel); indexScanKey = _bt_mkscankey(indexRel, NULL);
/* Prepare SortSupport data for each column */ /* Prepare SortSupport data for each column */
state->sortKeys = (SortSupport) palloc0(state->nKeys * state->sortKeys = (SortSupport) palloc0(state->nKeys *
...@@ -1023,7 +1023,7 @@ tuplesort_begin_index_btree(Relation heapRel, ...@@ -1023,7 +1023,7 @@ tuplesort_begin_index_btree(Relation heapRel,
for (i = 0; i < state->nKeys; i++) for (i = 0; i < state->nKeys; i++)
{ {
SortSupport sortKey = state->sortKeys + i; SortSupport sortKey = state->sortKeys + i;
ScanKey scanKey = indexScanKey + i; ScanKey scanKey = indexScanKey->scankeys + i;
int16 strategy; int16 strategy;
sortKey->ssup_cxt = CurrentMemoryContext; sortKey->ssup_cxt = CurrentMemoryContext;
...@@ -1042,7 +1042,7 @@ tuplesort_begin_index_btree(Relation heapRel, ...@@ -1042,7 +1042,7 @@ tuplesort_begin_index_btree(Relation heapRel,
PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
} }
_bt_freeskey(indexScanKey); pfree(indexScanKey);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
......
...@@ -319,6 +319,64 @@ typedef struct BTStackData ...@@ -319,6 +319,64 @@ typedef struct BTStackData
typedef BTStackData *BTStack; typedef BTStackData *BTStack;
/*
* BTScanInsert is the btree-private state needed to find an initial position
* for an indexscan, or to insert new tuples -- an "insertion scankey" (not to
* be confused with a search scankey). It's used to descend a B-Tree using
* _bt_search.
*
* When nextkey is false (the usual case), _bt_search and _bt_binsrch will
* locate the first item >= scankey. When nextkey is true, they will locate
* the first item > scan key.
*
* scankeys is an array of scan key entries for attributes that are compared.
* keysz is the size of the array. During insertion, there must be a scan key
* for every attribute, but when starting a regular index scan some can be
* omitted. The array is used as a flexible array member, though it's sized
* in a way that makes it possible to use stack allocations. See
* nbtree/README for full details.
*/
typedef struct BTScanInsertData
{
bool nextkey;
int keysz; /* Size of scankeys array */
ScanKeyData scankeys[INDEX_MAX_KEYS]; /* Must appear last */
} BTScanInsertData;
typedef BTScanInsertData *BTScanInsert;
/*
* BTInsertStateData is a working area used during insertion.
*
* This is filled in after descending the tree to the first leaf page the new
* tuple might belong on. Tracks the current position while performing
* uniqueness check, before we have determined which exact page to insert
* to.
*
* (This should be private to nbtinsert.c, but it's also used by
* _bt_binsrch_insert)
*/
typedef struct BTInsertStateData
{
IndexTuple itup; /* Item we're inserting */
Size itemsz; /* Size of itup -- should be MAXALIGN()'d */
BTScanInsert itup_key; /* Insertion scankey */
/* Buffer containing leaf page we're likely to insert itup on */
Buffer buf;
/*
* Cache of bounds within the current buffer. Only used for insertions
* where _bt_check_unique is called. See _bt_binsrch_insert and
* _bt_findinsertloc for details.
*/
bool bounds_valid;
OffsetNumber low;
OffsetNumber stricthigh;
} BTInsertStateData;
typedef BTInsertStateData *BTInsertState;
/* /*
* BTScanOpaqueData is the btree-private state needed for an indexscan. * BTScanOpaqueData is the btree-private state needed for an indexscan.
* This consists of preprocessed scan keys (see _bt_preprocess_keys() for * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
...@@ -558,16 +616,12 @@ extern int _bt_pagedel(Relation rel, Buffer buf); ...@@ -558,16 +616,12 @@ extern int _bt_pagedel(Relation rel, Buffer buf);
/* /*
* prototypes for functions in nbtsearch.c * prototypes for functions in nbtsearch.c
*/ */
extern BTStack _bt_search(Relation rel, extern BTStack _bt_search(Relation rel, BTScanInsert key, Buffer *bufP,
int keysz, ScanKey scankey, bool nextkey, int access, Snapshot snapshot);
Buffer *bufP, int access, Snapshot snapshot); extern Buffer _bt_moveright(Relation rel, BTScanInsert key, Buffer buf,
extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz, bool forupdate, BTStack stack, int access, Snapshot snapshot);
ScanKey scankey, bool nextkey, bool forupdate, BTStack stack, extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate);
int access, Snapshot snapshot); extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum);
extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
ScanKey scankey, bool nextkey);
extern int32 _bt_compare(Relation rel, int keysz, ScanKey scankey,
Page page, OffsetNumber offnum);
extern bool _bt_first(IndexScanDesc scan, ScanDirection dir); extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_next(IndexScanDesc scan, ScanDirection dir); extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
...@@ -576,9 +630,7 @@ extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, ...@@ -576,9 +630,7 @@ extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
/* /*
* prototypes for functions in nbtutils.c * prototypes for functions in nbtutils.c
*/ */
extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup); extern BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup);
extern ScanKey _bt_mkscankey_nodata(Relation rel);
extern void _bt_freeskey(ScanKey skey);
extern void _bt_freestack(BTStack stack); extern void _bt_freestack(BTStack stack);
extern void _bt_preprocess_array_keys(IndexScanDesc scan); extern void _bt_preprocess_array_keys(IndexScanDesc scan);
extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir); extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment