Commit 9563d5b5 authored by Tom Lane's avatar Tom Lane

Add regression test case exercising the sorting path for hash index build.

We've broken this code path at least twice in the past, so it's prudent
to have a test case that covers it.  To allow exercising the code path
without creating a very large (and slow to run) test case, redefine the
sort threshold to be bounded by maintenance_work_mem as well as the number
of available buffers.  While at it, fix an ancient oversight that when
building a temp index, the number of available buffers is not NBuffers but
NLocBuffer.  Also, if assertions are enabled, apply a direct test that the
sort actually does return the tuples in the expected order.

Peter Geoghegan

Patch: <CAM3SWZTBAo4hjbBd780+MrOKiKp_TMo1N3A0Rw9_im8gbD7fQA@mail.gmail.com>
parent 27814890
......@@ -22,6 +22,7 @@
#include "access/relscan.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "optimizer/plancat.h"
#include "utils/index_selfuncs.h"
#include "utils/rel.h"
......@@ -97,6 +98,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
double reltuples;
double allvisfrac;
uint32 num_buckets;
long sort_threshold;
HashBuildState buildstate;
/*
......@@ -120,12 +122,24 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
* then we'll thrash horribly. To prevent that scenario, we can sort the
* tuples by (expected) bucket number. However, such a sort is useless
* overhead when the index does fit in RAM. We choose to sort if the
* initial index size exceeds NBuffers.
* initial index size exceeds maintenance_work_mem, or the number of
* buffers usable for the index, whichever is less. (Limiting by the
* number of buffers should reduce thrashing between PG buffers and kernel
* buffers, which seems useful even if no physical I/O results. Limiting
* by maintenance_work_mem is useful to allow easy testing of the sort
* code path, and may be useful to DBAs as an additional control knob.)
*
* NOTE: this test will need adjustment if a bucket is ever different from
* one page.
* one page. Also, "initial index size" accounting does not include the
* metapage, nor the first bitmap page.
*/
if (num_buckets >= (uint32) NBuffers)
sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
sort_threshold = Min(sort_threshold, NBuffers);
else
sort_threshold = Min(sort_threshold, NLocBuffer);
if (num_buckets >= (uint32) sort_threshold)
buildstate.spool = _h_spoolinit(heap, index, num_buckets);
else
buildstate.spool = NULL;
......
......@@ -37,6 +37,7 @@ struct HSpool
{
Tuplesortstate *sortstate; /* state data for tuplesort.c */
Relation index;
uint32 hash_mask; /* bitmask for hash codes */
};
......@@ -47,7 +48,6 @@ HSpool *
_h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
{
HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool));
uint32 hash_mask;
hspool->index = index;
......@@ -60,7 +60,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
* we could just compute num_buckets - 1. We prefer not to assume that
* here, though.
*/
hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
hspool->hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1;
/*
* We size the sort area as maintenance_work_mem rather than work_mem to
......@@ -69,7 +69,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
*/
hspool->sortstate = tuplesort_begin_index_hash(heap,
index,
hash_mask,
hspool->hash_mask,
maintenance_work_mem,
false);
......@@ -105,12 +105,29 @@ _h_indexbuild(HSpool *hspool)
{
IndexTuple itup;
bool should_free;
#ifdef USE_ASSERT_CHECKING
uint32 hashkey = 0;
#endif
tuplesort_performsort(hspool->sortstate);
while ((itup = tuplesort_getindextuple(hspool->sortstate,
true, &should_free)) != NULL)
{
/*
* Technically, it isn't critical that hash keys be found in sorted
* order, since this sorting is only used to increase locality of
* access as a performance optimization. It still seems like a good
* idea to test tuplesort.c's handling of hash index tuple sorts
* through an assertion, though.
*/
#ifdef USE_ASSERT_CHECKING
uint32 lasthashkey = hashkey;
hashkey = _hash_get_indextuple_hashkey(itup) & hspool->hash_mask;
Assert(hashkey >= lasthashkey);
#endif
_hash_doinsert(hspool->index, itup);
if (should_free)
pfree(itup);
......
......@@ -2346,6 +2346,13 @@ CREATE UNLOGGED TABLE unlogged_hash_table (id int4);
CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops);
DROP TABLE unlogged_hash_table;
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
-- Test hash index build tuplesorting. Force hash tuplesort using low
-- maintenance_work_mem setting and fillfactor:
SET maintenance_work_mem = '1MB';
CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
WARNING: hash indexes are not WAL-logged and their use is discouraged
DROP INDEX hash_tuplesort_idx;
RESET maintenance_work_mem;
--
-- Test functional index
--
......
......@@ -690,6 +690,13 @@ DROP TABLE unlogged_hash_table;
-- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
-- Test hash index build tuplesorting. Force hash tuplesort using low
-- maintenance_work_mem setting and fillfactor:
SET maintenance_work_mem = '1MB';
CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
DROP INDEX hash_tuplesort_idx;
RESET maintenance_work_mem;
--
-- Test functional index
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment