Commit 2ab57e08 authored by Tom Lane's avatar Tom Lane

Rewrite the key-combination logic in GIN's keyGetItem() and scanGetItem()

routines to make them behave better in the presence of "lossy" index pointers.
The previous coding was outright incorrect for some cases, as recently
reported by Artur Dabrowski: scanGetItem would fail to return index entries in
cases where one index key had multiple exact pointers on the same page as
another key had a lossy pointer.  Also, keyGetItem was extremely inefficient
for cases where a single index key generates multiple "entry" streams, such as
an @@ operator with a multiple-clause tsquery.  The presence of a lossy page
pointer in any one stream defeated its ability to use the opclass
consistentFn, resulting in probing many heap pages that didn't really need to
be visited.  In Artur's example case, a query like
	WHERE tsvector @@ to_tsquery('a & b')
was about 50X slower than the theoretically equivalent
	WHERE tsvector @@ to_tsquery('a') AND tsvector @@ to_tsquery('b')
The way that I chose to fix this was to have GIN call the consistentFn
twice with both TRUE and FALSE values for the in-doubt entry stream,
returning a hit if either call produces TRUE, but not if they both return
FALSE.  The code handles this for the case of a single in-doubt entry stream,
but punts (falling back to the stupid behavior) if there's more than one lossy
reference to the same page.  The idea could be scaled up to deal with multiple
lossy references, but I think that would probably be wasted complexity.  At
least to judge by Artur's example, such cases don't occur often enough to be
worth trying to optimize.

Back-patch to 8.4.  8.3 did not have lossy GIN index pointers, so not
subject to these problems.
parent 8a4dc94c
This diff is collapsed.
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.26 2010/01/18 11:50:43 teodor Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.27 2010/07/31 00:30:54 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -51,7 +51,7 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query ...@@ -51,7 +51,7 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query
key->extra_data = extra_data; key->extra_data = extra_data;
key->query = query; key->query = query;
key->firstCall = TRUE; key->firstCall = TRUE;
ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber); ItemPointerSetMin(&key->curItem);
for (i = 0; i < nEntryValues; i++) for (i = 0; i < nEntryValues; i++)
{ {
...@@ -59,7 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query ...@@ -59,7 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query
key->scanEntry[i].entry = entryValues[i]; key->scanEntry[i].entry = entryValues[i];
key->scanEntry[i].attnum = attnum; key->scanEntry[i].attnum = attnum;
key->scanEntry[i].extra_data = (extra_data) ? extra_data[i] : NULL; key->scanEntry[i].extra_data = (extra_data) ? extra_data[i] : NULL;
ItemPointerSet(&(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber); ItemPointerSetMin(&key->scanEntry[i].curItem);
key->scanEntry[i].isFinished = FALSE;
key->scanEntry[i].offset = InvalidOffsetNumber; key->scanEntry[i].offset = InvalidOffsetNumber;
key->scanEntry[i].buffer = InvalidBuffer; key->scanEntry[i].buffer = InvalidBuffer;
key->scanEntry[i].partialMatch = NULL; key->scanEntry[i].partialMatch = NULL;
...@@ -100,14 +101,15 @@ resetScanKeys(GinScanKey keys, uint32 nkeys) ...@@ -100,14 +101,15 @@ resetScanKeys(GinScanKey keys, uint32 nkeys)
GinScanKey key = keys + i; GinScanKey key = keys + i;
key->firstCall = TRUE; key->firstCall = TRUE;
ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber); ItemPointerSetMin(&key->curItem);
for (j = 0; j < key->nentries; j++) for (j = 0; j < key->nentries; j++)
{ {
if (key->scanEntry[j].buffer != InvalidBuffer) if (key->scanEntry[j].buffer != InvalidBuffer)
ReleaseBuffer(key->scanEntry[i].buffer); ReleaseBuffer(key->scanEntry[i].buffer);
ItemPointerSet(&(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber); ItemPointerSetMin(&key->scanEntry[j].curItem);
key->scanEntry[j].isFinished = FALSE;
key->scanEntry[j].offset = InvalidOffsetNumber; key->scanEntry[j].offset = InvalidOffsetNumber;
key->scanEntry[j].buffer = InvalidBuffer; key->scanEntry[j].buffer = InvalidBuffer;
key->scanEntry[j].list = NULL; key->scanEntry[j].list = NULL;
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* *
* Copyright (c) 2006-2010, PostgreSQL Global Development Group * Copyright (c) 2006-2010, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.38 2010/02/26 02:01:20 momjian Exp $ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.39 2010/07/31 00:30:54 tgl Exp $
*-------------------------------------------------------------------------- *--------------------------------------------------------------------------
*/ */
#ifndef GIN_H #ifndef GIN_H
...@@ -520,6 +520,8 @@ typedef struct GinScanKeyData ...@@ -520,6 +520,8 @@ typedef struct GinScanKeyData
OffsetNumber attnum; OffsetNumber attnum;
ItemPointerData curItem; ItemPointerData curItem;
bool recheckCurItem;
bool firstCall; bool firstCall;
bool isFinished; bool isFinished;
} GinScanKeyData; } GinScanKeyData;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment