Commit 93ee38ea authored by Peter Geoghegan's avatar Peter Geoghegan

Teach pageinspect about nbtree deduplication.

Add a new bt_metap() column to display the metapage's allequalimage
field.  Also add three new columns to contrib/pageinspect's
bt_page_items() function:

* Add a boolean column ("dead") that displays the LP_DEAD bit value for
each non-pivot tuple.

* Add a TID column ("htid") that displays a single heap TID value for
each tuple.  This is the TID that is returned by BTreeTupleGetHeapTID(),
so comparable values are shown for pivot tuples, plain non-pivot tuples,
and posting list tuples.

* Add a TID array column ("tids") that displays TIDs from each tuple's
posting list, if any.  This works just like the "tids" column from
pageinspect's gin_leafpage_items() function.

No version bump for the pageinspect extension, since there hasn't been a
stable Postgres release since the last version bump (the last bump was
part of commit 58b4cb30).

Author: Peter Geoghegan
Discussion: https://postgr.es/m/CAH2-WzmSMmU2eNvY9+a4MNP+z02h6sa-uxZvN3un6jY02ZVBSw@mail.gmail.com
parent 58c47ccf
...@@ -31,9 +31,11 @@ ...@@ -31,9 +31,11 @@
#include "access/relation.h" #include "access/relation.h"
#include "catalog/namespace.h" #include "catalog/namespace.h"
#include "catalog/pg_am.h" #include "catalog/pg_am.h"
#include "catalog/pg_type.h"
#include "funcapi.h" #include "funcapi.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "pageinspect.h" #include "pageinspect.h"
#include "utils/array.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/rel.h" #include "utils/rel.h"
#include "utils/varlena.h" #include "utils/varlena.h"
...@@ -45,6 +47,8 @@ PG_FUNCTION_INFO_V1(bt_page_stats); ...@@ -45,6 +47,8 @@ PG_FUNCTION_INFO_V1(bt_page_stats);
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX) #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
#define DatumGetItemPointer(X) ((ItemPointer) DatumGetPointer(X))
#define ItemPointerGetDatum(X) PointerGetDatum(X)
/* note: BlockNumber is unsigned, hence can't be negative */ /* note: BlockNumber is unsigned, hence can't be negative */
#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \ #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
...@@ -243,6 +247,9 @@ struct user_args ...@@ -243,6 +247,9 @@ struct user_args
{ {
Page page; Page page;
OffsetNumber offset; OffsetNumber offset;
bool leafpage;
bool rightmost;
TupleDesc tupd;
}; };
/*------------------------------------------------------- /*-------------------------------------------------------
...@@ -252,17 +259,25 @@ struct user_args ...@@ -252,17 +259,25 @@ struct user_args
* ------------------------------------------------------ * ------------------------------------------------------
*/ */
static Datum static Datum
bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset) bt_page_print_tuples(FuncCallContext *fctx, struct user_args *uargs)
{ {
char *values[6]; Page page = uargs->page;
OffsetNumber offset = uargs->offset;
bool leafpage = uargs->leafpage;
bool rightmost = uargs->rightmost;
bool ispivottuple;
Datum values[9];
bool nulls[9];
HeapTuple tuple; HeapTuple tuple;
ItemId id; ItemId id;
IndexTuple itup; IndexTuple itup;
int j; int j;
int off; int off;
int dlen; int dlen;
char *dump; char *dump,
*datacstring;
char *ptr; char *ptr;
ItemPointer htid;
id = PageGetItemId(page, offset); id = PageGetItemId(page, offset);
...@@ -272,18 +287,49 @@ bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset) ...@@ -272,18 +287,49 @@ bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset)
itup = (IndexTuple) PageGetItem(page, id); itup = (IndexTuple) PageGetItem(page, id);
j = 0; j = 0;
values[j++] = psprintf("%d", offset); memset(nulls, 0, sizeof(nulls));
values[j++] = psprintf("(%u,%u)", values[j++] = DatumGetInt16(offset);
ItemPointerGetBlockNumberNoCheck(&itup->t_tid), values[j++] = ItemPointerGetDatum(&itup->t_tid);
ItemPointerGetOffsetNumberNoCheck(&itup->t_tid)); values[j++] = Int32GetDatum((int) IndexTupleSize(itup));
values[j++] = psprintf("%d", (int) IndexTupleSize(itup)); values[j++] = BoolGetDatum(IndexTupleHasNulls(itup));
values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f'); values[j++] = BoolGetDatum(IndexTupleHasVarwidths(itup));
values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
/*
* Make sure that "data" column does not include posting list or pivot
* tuple representation of heap TID(s).
*
* Note: BTreeTupleIsPivot() won't work reliably on !heapkeyspace indexes
* (those built before BTREE_VERSION 4), but we have no way of determining
* if this page came from a !heapkeyspace index. We may only have a bytea
* nbtree page image to go on, so in general there is no metapage that we
* can check.
*
* That's okay here because BTreeTupleIsPivot() can only return false for
* a !heapkeyspace pivot, never true for a !heapkeyspace non-pivot. Since
* heap TID isn't part of the keyspace in a !heapkeyspace index anyway,
* there cannot possibly be a pivot tuple heap TID representation that we
* fail to make an adjustment for. A !heapkeyspace index can have
* BTreeTupleIsPivot() return true (due to things like suffix truncation
* for INCLUDE indexes in Postgres v11), but when that happens
* BTreeTupleGetHeapTID() can be trusted to work reliably (i.e. return
* NULL).
*
* Note: BTreeTupleIsPosting() always works reliably, even with
* !heapkeyspace indexes.
*/
if (BTreeTupleIsPosting(itup))
dlen -= IndexTupleSize(itup) - BTreeTupleGetPostingOffset(itup);
else if (BTreeTupleIsPivot(itup) && BTreeTupleGetHeapTID(itup) != NULL)
dlen -= MAXALIGN(sizeof(ItemPointerData));
if (dlen < 0 || dlen > INDEX_SIZE_MASK)
elog(ERROR, "invalid tuple length %d for tuple at offset number %u",
dlen, offset);
dump = palloc0(dlen * 3 + 1); dump = palloc0(dlen * 3 + 1);
values[j] = dump; datacstring = dump;
for (off = 0; off < dlen; off++) for (off = 0; off < dlen; off++)
{ {
if (off > 0) if (off > 0)
...@@ -291,8 +337,62 @@ bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset) ...@@ -291,8 +337,62 @@ bt_page_print_tuples(FuncCallContext *fctx, Page page, OffsetNumber offset)
sprintf(dump, "%02x", *(ptr + off) & 0xff); sprintf(dump, "%02x", *(ptr + off) & 0xff);
dump += 2; dump += 2;
} }
values[j++] = CStringGetTextDatum(datacstring);
pfree(datacstring);
/*
* We need to work around the BTreeTupleIsPivot() !heapkeyspace limitation
* again. Deduce whether or not tuple must be a pivot tuple based on
* whether or not the page is a leaf page, as well as the page offset
* number of the tuple.
*/
ispivottuple = (!leafpage || (!rightmost && offset == P_HIKEY));
/* LP_DEAD bit can never be set for pivot tuples, so show a NULL there */
if (!ispivottuple)
values[j++] = BoolGetDatum(ItemIdIsDead(id));
else
{
Assert(!ItemIdIsDead(id));
nulls[j++] = true;
}
htid = BTreeTupleGetHeapTID(itup);
if (ispivottuple && !BTreeTupleIsPivot(itup))
{
/* Don't show bogus heap TID in !heapkeyspace pivot tuple */
htid = NULL;
}
if (htid)
values[j++] = ItemPointerGetDatum(htid);
else
nulls[j++] = true;
if (BTreeTupleIsPosting(itup))
{
/* Build an array of item pointers */
ItemPointer tids;
Datum *tids_datum;
int nposting;
tids = BTreeTupleGetPosting(itup);
nposting = BTreeTupleGetNPosting(itup);
tids_datum = (Datum *) palloc(nposting * sizeof(Datum));
for (int i = 0; i < nposting; i++)
tids_datum[i] = ItemPointerGetDatum(&tids[i]);
values[j++] = PointerGetDatum(construct_array(tids_datum,
nposting,
TIDOID,
sizeof(ItemPointerData),
false, 's'));
pfree(tids_datum);
}
else
nulls[j++] = true;
tuple = BuildTupleFromCStrings(fctx->attinmeta, values); /* Build and return the result tuple */
tuple = heap_form_tuple(uargs->tupd, values, nulls);
return HeapTupleGetDatum(tuple); return HeapTupleGetDatum(tuple);
} }
...@@ -378,12 +478,15 @@ bt_page_items(PG_FUNCTION_ARGS) ...@@ -378,12 +478,15 @@ bt_page_items(PG_FUNCTION_ARGS)
elog(NOTICE, "page is deleted"); elog(NOTICE, "page is deleted");
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
uargs->leafpage = P_ISLEAF(opaque);
uargs->rightmost = P_RIGHTMOST(opaque);
/* Build a tuple descriptor for our result type */ /* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type"); elog(ERROR, "return type must be a row type");
tupleDesc = BlessTupleDesc(tupleDesc);
fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc); uargs->tupd = tupleDesc;
fctx->user_fctx = uargs; fctx->user_fctx = uargs;
...@@ -395,7 +498,7 @@ bt_page_items(PG_FUNCTION_ARGS) ...@@ -395,7 +498,7 @@ bt_page_items(PG_FUNCTION_ARGS)
if (fctx->call_cntr < fctx->max_calls) if (fctx->call_cntr < fctx->max_calls)
{ {
result = bt_page_print_tuples(fctx, uargs->page, uargs->offset); result = bt_page_print_tuples(fctx, uargs);
uargs->offset++; uargs->offset++;
SRF_RETURN_NEXT(fctx, result); SRF_RETURN_NEXT(fctx, result);
} }
...@@ -463,12 +566,15 @@ bt_page_items_bytea(PG_FUNCTION_ARGS) ...@@ -463,12 +566,15 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
elog(NOTICE, "page is deleted"); elog(NOTICE, "page is deleted");
fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
uargs->leafpage = P_ISLEAF(opaque);
uargs->rightmost = P_RIGHTMOST(opaque);
/* Build a tuple descriptor for our result type */ /* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type"); elog(ERROR, "return type must be a row type");
tupleDesc = BlessTupleDesc(tupleDesc);
fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc); uargs->tupd = tupleDesc;
fctx->user_fctx = uargs; fctx->user_fctx = uargs;
...@@ -480,7 +586,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS) ...@@ -480,7 +586,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS)
if (fctx->call_cntr < fctx->max_calls) if (fctx->call_cntr < fctx->max_calls)
{ {
result = bt_page_print_tuples(fctx, uargs->page, uargs->offset); result = bt_page_print_tuples(fctx, uargs);
uargs->offset++; uargs->offset++;
SRF_RETURN_NEXT(fctx, result); SRF_RETURN_NEXT(fctx, result);
} }
...@@ -510,7 +616,7 @@ bt_metap(PG_FUNCTION_ARGS) ...@@ -510,7 +616,7 @@ bt_metap(PG_FUNCTION_ARGS)
BTMetaPageData *metad; BTMetaPageData *metad;
TupleDesc tupleDesc; TupleDesc tupleDesc;
int j; int j;
char *values[8]; char *values[9];
Buffer buffer; Buffer buffer;
Page page; Page page;
HeapTuple tuple; HeapTuple tuple;
...@@ -557,17 +663,21 @@ bt_metap(PG_FUNCTION_ARGS) ...@@ -557,17 +663,21 @@ bt_metap(PG_FUNCTION_ARGS)
/* /*
* Get values of extended metadata if available, use default values * Get values of extended metadata if available, use default values
* otherwise. * otherwise. Note that we rely on the assumption that btm_allequalimage
* is initialized to zero with indexes that were built on versions prior
* to Postgres 13 (just like _bt_metaversion()).
*/ */
if (metad->btm_version >= BTREE_NOVAC_VERSION) if (metad->btm_version >= BTREE_NOVAC_VERSION)
{ {
values[j++] = psprintf("%u", metad->btm_oldest_btpo_xact); values[j++] = psprintf("%u", metad->btm_oldest_btpo_xact);
values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples); values[j++] = psprintf("%f", metad->btm_last_cleanup_num_heap_tuples);
values[j++] = metad->btm_allequalimage ? "t" : "f";
} }
else else
{ {
values[j++] = "0"; values[j++] = "0";
values[j++] = "-1"; values[j++] = "-1";
values[j++] = "f";
} }
tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
......
...@@ -12,6 +12,7 @@ fastroot | 1 ...@@ -12,6 +12,7 @@ fastroot | 1
fastlevel | 0 fastlevel | 0
oldest_xact | 0 oldest_xact | 0
last_cleanup_num_tuples | -1 last_cleanup_num_tuples | -1
allequalimage | t
SELECT * FROM bt_page_stats('test1_a_idx', 0); SELECT * FROM bt_page_stats('test1_a_idx', 0);
ERROR: block 0 is a meta page ERROR: block 0 is a meta page
...@@ -41,6 +42,9 @@ itemlen | 16 ...@@ -41,6 +42,9 @@ itemlen | 16
nulls | f nulls | f
vars | f vars | f
data | 01 00 00 00 00 00 00 01 data | 01 00 00 00 00 00 00 01
dead | f
htid | (0,1)
tids |
SELECT * FROM bt_page_items('test1_a_idx', 2); SELECT * FROM bt_page_items('test1_a_idx', 2);
ERROR: block number out of range ERROR: block number out of range
...@@ -54,6 +58,9 @@ itemlen | 16 ...@@ -54,6 +58,9 @@ itemlen | 16
nulls | f nulls | f
vars | f vars | f
data | 01 00 00 00 00 00 00 01 data | 01 00 00 00 00 00 00 01
dead | f
htid | (0,1)
tids |
SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 2)); SELECT * FROM bt_page_items(get_raw_page('test1_a_idx', 2));
ERROR: block number 2 is out of range for relation "test1_a_idx" ERROR: block number 2 is out of range for relation "test1_a_idx"
......
...@@ -14,3 +14,56 @@ CREATE FUNCTION heap_tuple_infomask_flags( ...@@ -14,3 +14,56 @@ CREATE FUNCTION heap_tuple_infomask_flags(
RETURNS record RETURNS record
AS 'MODULE_PATHNAME', 'heap_tuple_infomask_flags' AS 'MODULE_PATHNAME', 'heap_tuple_infomask_flags'
LANGUAGE C STRICT PARALLEL SAFE; LANGUAGE C STRICT PARALLEL SAFE;
--
-- bt_metap()
--
DROP FUNCTION bt_metap(text);
CREATE FUNCTION bt_metap(IN relname text,
OUT magic int4,
OUT version int4,
OUT root int4,
OUT level int4,
OUT fastroot int4,
OUT fastlevel int4,
OUT oldest_xact int4,
OUT last_cleanup_num_tuples real,
OUT allequalimage boolean)
AS 'MODULE_PATHNAME', 'bt_metap'
LANGUAGE C STRICT PARALLEL SAFE;
--
-- bt_page_items(text, int4)
--
DROP FUNCTION bt_page_items(text, int4);
CREATE FUNCTION bt_page_items(IN relname text, IN blkno int4,
OUT itemoffset smallint,
OUT ctid tid,
OUT itemlen smallint,
OUT nulls bool,
OUT vars bool,
OUT data text,
OUT dead boolean,
OUT htid tid,
OUT tids tid[])
RETURNS SETOF record
AS 'MODULE_PATHNAME', 'bt_page_items'
LANGUAGE C STRICT PARALLEL SAFE;
--
-- bt_page_items(bytea)
--
DROP FUNCTION bt_page_items(bytea);
CREATE FUNCTION bt_page_items(IN page bytea,
OUT itemoffset smallint,
OUT ctid tid,
OUT itemlen smallint,
OUT nulls bool,
OUT vars bool,
OUT data text,
OUT dead boolean,
OUT htid tid,
OUT tids tid[])
RETURNS SETOF record
AS 'MODULE_PATHNAME', 'bt_page_items_bytea'
LANGUAGE C STRICT PARALLEL SAFE;
...@@ -300,13 +300,14 @@ test=# SELECT t_ctid, raw_flags, combined_flags ...@@ -300,13 +300,14 @@ test=# SELECT t_ctid, raw_flags, combined_flags
test=# SELECT * FROM bt_metap('pg_cast_oid_index'); test=# SELECT * FROM bt_metap('pg_cast_oid_index');
-[ RECORD 1 ]-----------+------- -[ RECORD 1 ]-----------+-------
magic | 340322 magic | 340322
version | 3 version | 4
root | 1 root | 1
level | 0 level | 0
fastroot | 1 fastroot | 1
fastlevel | 0 fastlevel | 0
oldest_xact | 582 oldest_xact | 582
last_cleanup_num_tuples | 1000 last_cleanup_num_tuples | 1000
allequalimage | f
</screen> </screen>
</para> </para>
</listitem> </listitem>
...@@ -329,11 +330,11 @@ test=# SELECT * FROM bt_page_stats('pg_cast_oid_index', 1); ...@@ -329,11 +330,11 @@ test=# SELECT * FROM bt_page_stats('pg_cast_oid_index', 1);
-[ RECORD 1 ]-+----- -[ RECORD 1 ]-+-----
blkno | 1 blkno | 1
type | l type | l
live_items | 256 live_items | 224
dead_items | 0 dead_items | 0
avg_item_size | 12 avg_item_size | 16
page_size | 8192 page_size | 8192
free_size | 4056 free_size | 3668
btpo_prev | 0 btpo_prev | 0
btpo_next | 0 btpo_next | 0
btpo | 0 btpo | 0
...@@ -356,33 +357,75 @@ btpo_flags | 3 ...@@ -356,33 +357,75 @@ btpo_flags | 3
<function>bt_page_items</function> returns detailed information about <function>bt_page_items</function> returns detailed information about
all of the items on a B-tree index page. For example: all of the items on a B-tree index page. For example:
<screen> <screen>
test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1); test=# SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
itemoffset | ctid | itemlen | nulls | vars | data FROM bt_page_items(get_raw_page('tenk2_hundred', 5));
------------+---------+---------+-------+------+------------- itemoffset | ctid | itemlen | nulls | vars | data | dead | htid | some_tids
1 | (0,1) | 12 | f | f | 23 27 00 00 ------------+-----------+---------+-------+------+-------------------------+------+--------+---------------------
2 | (0,2) | 12 | f | f | 24 27 00 00 1 | (16,1) | 16 | f | f | 30 00 00 00 00 00 00 00 | | |
3 | (0,3) | 12 | f | f | 25 27 00 00 2 | (16,8292) | 616 | f | f | 24 00 00 00 00 00 00 00 | f | (1,6) | {"(1,6)","(10,22)"}
4 | (0,4) | 12 | f | f | 26 27 00 00 3 | (16,8292) | 616 | f | f | 25 00 00 00 00 00 00 00 | f | (1,18) | {"(1,18)","(4,22)"}
5 | (0,5) | 12 | f | f | 27 27 00 00 4 | (16,8292) | 616 | f | f | 26 00 00 00 00 00 00 00 | f | (4,18) | {"(4,18)","(6,17)"}
6 | (0,6) | 12 | f | f | 28 27 00 00 5 | (16,8292) | 616 | f | f | 27 00 00 00 00 00 00 00 | f | (1,2) | {"(1,2)","(1,19)"}
7 | (0,7) | 12 | f | f | 29 27 00 00 6 | (16,8292) | 616 | f | f | 28 00 00 00 00 00 00 00 | f | (2,24) | {"(2,24)","(4,11)"}
8 | (0,8) | 12 | f | f | 2a 27 00 00 7 | (16,8292) | 616 | f | f | 29 00 00 00 00 00 00 00 | f | (2,17) | {"(2,17)","(11,2)"}
8 | (16,8292) | 616 | f | f | 2a 00 00 00 00 00 00 00 | f | (0,25) | {"(0,25)","(3,20)"}
9 | (16,8292) | 616 | f | f | 2b 00 00 00 00 00 00 00 | f | (0,10) | {"(0,10)","(0,14)"}
10 | (16,8292) | 616 | f | f | 2c 00 00 00 00 00 00 00 | f | (1,3) | {"(1,3)","(3,9)"}
11 | (16,8292) | 616 | f | f | 2d 00 00 00 00 00 00 00 | f | (6,28) | {"(6,28)","(11,1)"}
12 | (16,8292) | 616 | f | f | 2e 00 00 00 00 00 00 00 | f | (0,27) | {"(0,27)","(1,13)"}
13 | (16,8292) | 616 | f | f | 2f 00 00 00 00 00 00 00 | f | (4,17) | {"(4,17)","(4,21)"}
(13 rows)
</screen> </screen>
In a B-tree leaf page, <structfield>ctid</structfield> points to a heap tuple. This is a B-tree leaf page. All tuples that point to the table
In an internal page, the block number part of <structfield>ctid</structfield> happen to be posting list tuples (all of which store a total of
points to another page in the index itself, while the offset part 100 6 byte TIDs). There is also a <quote>high key</quote> tuple
(the second number) is ignored and is usually 1. at <literal>itemoffset</literal> number 1.
<structfield>ctid</structfield> is used to store encoded
information about each tuple in this example, though leaf page
tuples often store a heap TID directly in the
<structfield>ctid</structfield> field instead.
<structfield>tids</structfield> is the list of TIDs stored as a
posting list.
</para>
<para>
In an internal page (not shown), the block number part of
<structfield>ctid</structfield> is a <quote>downlink</quote>,
which is a block number of another page in the index itself.
The offset part (the second number) of
<structfield>ctid</structfield> stores encoded information about
the tuple, such as the number of columns present (suffix
truncation may have removed unneeded suffix columns). Truncated
columns are treated as having the value <quote>minus
infinity</quote>.
</para>
<para>
<structfield>htid</structfield> shows a heap TID for the tuple,
regardless of the underlying tuple representation. This value
may match <structfield>ctid</structfield>, or may be decoded
from the alternative representations used by posting list tuples
and tuples from internal pages. Tuples in internal pages
usually have the implementation level heap TID column truncated
away, which is represented as a NULL
<structfield>htid</structfield> value.
</para> </para>
<para> <para>
Note that the first item on any non-rightmost page (any page with Note that the first item on any non-rightmost page (any page with
a non-zero value in the <structfield>btpo_next</structfield> field) is the a non-zero value in the <structfield>btpo_next</structfield> field) is the
page's <quote>high key</quote>, meaning its <structfield>data</structfield> page's <quote>high key</quote>, meaning its <structfield>data</structfield>
serves as an upper bound on all items appearing on the page, while serves as an upper bound on all items appearing on the page, while
its <structfield>ctid</structfield> field is meaningless. Also, on non-leaf its <structfield>ctid</structfield> field does not point to
pages, the first real data item (the first item that is not a high another block. Also, on internal pages, the first real data
key) is a <quote>minus infinity</quote> item, with no actual value item (the first item that is not a high key) reliably has every
in its <structfield>data</structfield> field. Such an item does have a valid column truncated away, leaving no actual value in its
downlink in its <structfield>ctid</structfield> field, however. <structfield>data</structfield> field. Such an item does have a
valid downlink in its <structfield>ctid</structfield> field,
however.
</para>
<para>
For more details about the structure of B-tree indexes, see
<xref linkend="btree-structure"/>. For more details about
deduplication and posting lists, see <xref
linkend="btree-deduplication"/>.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -402,17 +445,24 @@ test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1); ...@@ -402,17 +445,24 @@ test=# SELECT * FROM bt_page_items('pg_cast_oid_index', 1);
with <function>get_raw_page</function> should be passed as argument. So with <function>get_raw_page</function> should be passed as argument. So
the last example could also be rewritten like this: the last example could also be rewritten like this:
<screen> <screen>
test=# SELECT * FROM bt_page_items(get_raw_page('pg_cast_oid_index', 1)); test=# SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
itemoffset | ctid | itemlen | nulls | vars | data FROM bt_page_items(get_raw_page('tenk2_hundred', 5));
------------+---------+---------+-------+------+------------- itemoffset | ctid | itemlen | nulls | vars | data | dead | htid | some_tids
1 | (0,1) | 12 | f | f | 23 27 00 00 ------------+-----------+---------+-------+------+-------------------------+------+--------+---------------------
2 | (0,2) | 12 | f | f | 24 27 00 00 1 | (16,1) | 16 | f | f | 30 00 00 00 00 00 00 00 | | |
3 | (0,3) | 12 | f | f | 25 27 00 00 2 | (16,8292) | 616 | f | f | 24 00 00 00 00 00 00 00 | f | (1,6) | {"(1,6)","(10,22)"}
4 | (0,4) | 12 | f | f | 26 27 00 00 3 | (16,8292) | 616 | f | f | 25 00 00 00 00 00 00 00 | f | (1,18) | {"(1,18)","(4,22)"}
5 | (0,5) | 12 | f | f | 27 27 00 00 4 | (16,8292) | 616 | f | f | 26 00 00 00 00 00 00 00 | f | (4,18) | {"(4,18)","(6,17)"}
6 | (0,6) | 12 | f | f | 28 27 00 00 5 | (16,8292) | 616 | f | f | 27 00 00 00 00 00 00 00 | f | (1,2) | {"(1,2)","(1,19)"}
7 | (0,7) | 12 | f | f | 29 27 00 00 6 | (16,8292) | 616 | f | f | 28 00 00 00 00 00 00 00 | f | (2,24) | {"(2,24)","(4,11)"}
8 | (0,8) | 12 | f | f | 2a 27 00 00 7 | (16,8292) | 616 | f | f | 29 00 00 00 00 00 00 00 | f | (2,17) | {"(2,17)","(11,2)"}
8 | (16,8292) | 616 | f | f | 2a 00 00 00 00 00 00 00 | f | (0,25) | {"(0,25)","(3,20)"}
9 | (16,8292) | 616 | f | f | 2b 00 00 00 00 00 00 00 | f | (0,10) | {"(0,10)","(0,14)"}
10 | (16,8292) | 616 | f | f | 2c 00 00 00 00 00 00 00 | f | (1,3) | {"(1,3)","(3,9)"}
11 | (16,8292) | 616 | f | f | 2d 00 00 00 00 00 00 00 | f | (6,28) | {"(6,28)","(11,1)"}
12 | (16,8292) | 616 | f | f | 2e 00 00 00 00 00 00 00 | f | (0,27) | {"(0,27)","(1,13)"}
13 | (16,8292) | 616 | f | f | 2f 00 00 00 00 00 00 00 | f | (4,17) | {"(4,17)","(4,21)"}
(13 rows)
</screen> </screen>
All the other details are the same as explained in the previous item. All the other details are the same as explained in the previous item.
</para> </para>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment