Commit df816f6a authored by Heikki Linnakangas's avatar Heikki Linnakangas

Add IntegerSet, to hold large sets of 64-bit ints efficiently.

The set is implemented as a B-tree, with a compact representation at leaf
items, using Simple-8b algorithm, so that clusters of nearby values use
less memory.

The IntegerSet isn't used for anything yet, aside from the test code, but
we have two patches in the works that would benefit from this: A patch to
allow GiST vacuum to delete empty pages, and a patch to reduce heap
VACUUM's memory usage, by storing the list of dead TIDs more efficiently
and lifting the 1 GB limit on its size.

This includes a unit test module, in src/test/modules/test_integerset.
It can be used to verify correctness, as a regression test, but if you run
it manully, it can also print memory usage and execution time of some of
the tests.

Author: Heikki Linnakangas, Andrey Borodin
Reviewed-by: Julien Rouhaud
Discussion: https://www.postgresql.org/message-id/b5e82599-1966-5783-733c-1a947ddb729f@iki.fi
parent 5e1963fb
......@@ -13,6 +13,6 @@ top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = binaryheap.o bipartite_match.o bloomfilter.o dshash.o hyperloglog.o \
ilist.o knapsack.o pairingheap.o rbtree.o stringinfo.o
ilist.o integerset.o knapsack.o pairingheap.o rbtree.o stringinfo.o
include $(top_srcdir)/src/backend/common.mk
......@@ -13,6 +13,8 @@ hyperloglog.c - a streaming cardinality estimator
ilist.c - single and double-linked lists
integerset.c - a data structure for holding large set of integers
knapsack.c - knapsack problem solver
pairingheap.c - a pairing heap
......
This diff is collapsed.
/*
* integerset.h
* In-memory data structure to hold a large set of integers efficiently
*
* Portions Copyright (c) 2012-2019, PostgreSQL Global Development Group
*
* src/include/lib/integerset.h
*/
#ifndef INTEGERSET_H
#define INTEGERSET_H
typedef struct IntegerSet IntegerSet;
extern IntegerSet *intset_create(void);
extern void intset_add_member(IntegerSet *intset, uint64 x);
extern bool intset_is_member(IntegerSet *intset, uint64 x);
extern uint64 intset_num_entries(IntegerSet *intset);
extern uint64 intset_memory_usage(IntegerSet *intset);
extern void intset_begin_iterate(IntegerSet *intset);
extern bool intset_iterate_next(IntegerSet *intset, uint64 *next);
#endif /* INTEGERSET_H */
......@@ -12,6 +12,7 @@ SUBDIRS = \
test_bloomfilter \
test_ddl_deparse \
test_extensions \
test_integerset \
test_parser \
test_pg_dump \
test_predtest \
......
# Generated subdirectories
/log/
/results/
/tmp_check/
# src/test/modules/test_integerset/Makefile
MODULE_big = test_integerset
OBJS = test_integerset.o $(WIN32RES)
PGFILEDESC = "test_integerset - test code for src/backend/lib/integerset.c"
EXTENSION = test_integerset
DATA = test_integerset--1.0.sql
REGRESS = test_integerset
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = src/test/modules/test_integerset
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
test_integerset contains unit tests for testing the integer set implementation,
in src/backend/lib/integerset.c
The tests verify the correctness of the implemention, but they can also be
as a micro-benchmark: If you set the 'intset_tests_stats' flag in
test_integerset.c, the tests will print extra information about execution time
and memory usage.
CREATE EXTENSION test_integerset;
--
-- These tests don't produce any interesting output. We're checking that
-- the operations complete without crashing or hanging and that none of their
-- internal sanity tests fail. They print progress information as INFOs,
-- which are not interesting for automated tests, so suppress those.
--
SET client_min_messages = 'warning';
SELECT test_integerset();
test_integerset
-----------------
(1 row)
CREATE EXTENSION test_integerset;
--
-- These tests don't produce any interesting output. We're checking that
-- the operations complete without crashing or hanging and that none of their
-- internal sanity tests fail. They print progress information as INFOs,
-- which are not interesting for automated tests, so suppress those.
--
SET client_min_messages = 'warning';
SELECT test_integerset();
/* src/test/modules/test_integerset/test_integerset--1.0.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION test_integerset" to load this file. \quit
CREATE FUNCTION test_integerset()
RETURNS pg_catalog.void STRICT
AS 'MODULE_PATHNAME' LANGUAGE C;
This diff is collapsed.
comment = 'Test code for integerset'
default_version = '1.0'
module_pathname = '$libdir/test_integerset'
relocatable = true
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment