Commit 40bede54 authored by Fujii Masao's avatar Fujii Masao

Move pg_lzcompress.c to src/common.

The meta data of PGLZ symbolized by PGLZ_Header is removed, to make
the compression and decompression code independent on the backend-only
varlena facility. PGLZ_Header is being used to store some meta data
related to the data being compressed like the raw length of the uncompressed
record or some varlena-related data, making it unpluggable once PGLZ is
stored in src/common as it contains some backend-only code paths with
the management of varlena structures. The APIs of PGLZ are reworked
at the same time to do only compression and decompression of buffers
without the meta-data layer, simplifying its use for a more general usage.

On-disk format is preserved as well, so there is no incompatibility with
previous major versions of PostgreSQL for TOAST entries.

Exposing compression and decompression APIs of pglz makes possible its
use by extensions and contrib modules. Especially this commit is required
for upcoming WAL compression feature so that the WAL reader facility can
decompress the WAL data by using pglz_decompress.

Michael Paquier, reviewed by me.
parent 237795a7
...@@ -35,9 +35,9 @@ ...@@ -35,9 +35,9 @@
#include "access/tuptoaster.h" #include "access/tuptoaster.h"
#include "access/xact.h" #include "access/xact.h"
#include "catalog/catalog.h" #include "catalog/catalog.h"
#include "common/pg_lzcompress.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/fmgroids.h" #include "utils/fmgroids.h"
#include "utils/pg_lzcompress.h"
#include "utils/rel.h" #include "utils/rel.h"
#include "utils/typcache.h" #include "utils/typcache.h"
#include "utils/tqual.h" #include "utils/tqual.h"
...@@ -45,6 +45,26 @@ ...@@ -45,6 +45,26 @@
#undef TOAST_DEBUG #undef TOAST_DEBUG
/*
* The information at the start of the compressed toast data.
*/
typedef struct toast_compress_header
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int32 rawsize;
} toast_compress_header;
/*
* Utilities for manipulation of header information for compressed
* toast entries.
*/
#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) ptr)->rawsize)
#define TOAST_COMPRESS_RAWDATA(ptr) \
(((char *) ptr) + TOAST_COMPRESS_HDRSZ)
#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
(((toast_compress_header *) ptr)->rawsize = len)
static void toast_delete_datum(Relation rel, Datum value); static void toast_delete_datum(Relation rel, Datum value);
static Datum toast_save_datum(Relation rel, Datum value, static Datum toast_save_datum(Relation rel, Datum value,
struct varlena * oldexternal, int options); struct varlena * oldexternal, int options);
...@@ -53,6 +73,7 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid); ...@@ -53,6 +73,7 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
static struct varlena *toast_fetch_datum(struct varlena * attr); static struct varlena *toast_fetch_datum(struct varlena * attr);
static struct varlena *toast_fetch_datum_slice(struct varlena * attr, static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
int32 sliceoffset, int32 length); int32 sliceoffset, int32 length);
static struct varlena *toast_decompress_datum(struct varlena * attr);
static int toast_open_indexes(Relation toastrel, static int toast_open_indexes(Relation toastrel,
LOCKMODE lock, LOCKMODE lock,
Relation **toastidxs, Relation **toastidxs,
...@@ -138,11 +159,8 @@ heap_tuple_untoast_attr(struct varlena * attr) ...@@ -138,11 +159,8 @@ heap_tuple_untoast_attr(struct varlena * attr)
/* If it's compressed, decompress it */ /* If it's compressed, decompress it */
if (VARATT_IS_COMPRESSED(attr)) if (VARATT_IS_COMPRESSED(attr))
{ {
PGLZ_Header *tmp = (PGLZ_Header *) attr; struct varlena *tmp = attr;
attr = toast_decompress_datum(tmp);
attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
pglz_decompress(tmp, VARDATA(attr));
pfree(tmp); pfree(tmp);
} }
} }
...@@ -163,11 +181,7 @@ heap_tuple_untoast_attr(struct varlena * attr) ...@@ -163,11 +181,7 @@ heap_tuple_untoast_attr(struct varlena * attr)
/* /*
* This is a compressed value inside of the main tuple * This is a compressed value inside of the main tuple
*/ */
PGLZ_Header *tmp = (PGLZ_Header *) attr; attr = toast_decompress_datum(attr);
attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ);
pglz_decompress(tmp, VARDATA(attr));
} }
else if (VARATT_IS_SHORT(attr)) else if (VARATT_IS_SHORT(attr))
{ {
...@@ -234,14 +248,10 @@ heap_tuple_untoast_attr_slice(struct varlena * attr, ...@@ -234,14 +248,10 @@ heap_tuple_untoast_attr_slice(struct varlena * attr,
if (VARATT_IS_COMPRESSED(preslice)) if (VARATT_IS_COMPRESSED(preslice))
{ {
PGLZ_Header *tmp = (PGLZ_Header *) preslice; struct varlena *tmp = preslice;
Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ; preslice = toast_decompress_datum(tmp);
preslice = (struct varlena *) palloc(size);
SET_VARSIZE(preslice, size);
pglz_decompress(tmp, VARDATA(preslice));
if (tmp != (PGLZ_Header *) attr) if (tmp != attr)
pfree(tmp); pfree(tmp);
} }
...@@ -1228,6 +1238,7 @@ toast_compress_datum(Datum value) ...@@ -1228,6 +1238,7 @@ toast_compress_datum(Datum value)
{ {
struct varlena *tmp; struct varlena *tmp;
int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
int32 len;
Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
...@@ -1240,7 +1251,8 @@ toast_compress_datum(Datum value) ...@@ -1240,7 +1251,8 @@ toast_compress_datum(Datum value)
valsize > PGLZ_strategy_default->max_input_size) valsize > PGLZ_strategy_default->max_input_size)
return PointerGetDatum(NULL); return PointerGetDatum(NULL);
tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize)); tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
TOAST_COMPRESS_HDRSZ);
/* /*
* We recheck the actual size even if pglz_compress() reports success, * We recheck the actual size even if pglz_compress() reports success,
...@@ -1252,10 +1264,15 @@ toast_compress_datum(Datum value) ...@@ -1252,10 +1264,15 @@ toast_compress_datum(Datum value)
* only one header byte and no padding if the value is short enough. So * only one header byte and no padding if the value is short enough. So
* we insist on a savings of more than 2 bytes to ensure we have a gain. * we insist on a savings of more than 2 bytes to ensure we have a gain.
*/ */
if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize, len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)),
(PGLZ_Header *) tmp, PGLZ_strategy_default) && valsize,
VARSIZE(tmp) < valsize - 2) TOAST_COMPRESS_RAWDATA(tmp),
PGLZ_strategy_default);
if (len >= 0 &&
len + TOAST_COMPRESS_HDRSZ < valsize - 2)
{ {
TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize);
SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ);
/* successful compression */ /* successful compression */
return PointerGetDatum(tmp); return PointerGetDatum(tmp);
} }
...@@ -2100,6 +2117,32 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) ...@@ -2100,6 +2117,32 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
return result; return result;
} }
/* ----------
* toast_decompress_datum -
*
* Decompress a compressed version of a varlena datum
*/
static struct varlena *
toast_decompress_datum(struct varlena * attr)
{
struct varlena *result;
Assert(VARATT_IS_COMPRESSED(attr));
result = (struct varlena *)
palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
VARDATA(result),
TOAST_COMPRESS_RAWSIZE(attr)) < 0)
elog(ERROR, "compressed data is corrupted");
return result;
}
/* ---------- /* ----------
* toast_open_indexes * toast_open_indexes
* *
......
...@@ -25,8 +25,8 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ ...@@ -25,8 +25,8 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \
jsonfuncs.o like.o lockfuncs.o mac.o misc.o nabstime.o name.o \ jsonfuncs.o like.o lockfuncs.o mac.o misc.o nabstime.o name.o \
network.o network_gist.o network_selfuncs.o \ network.o network_gist.o network_selfuncs.o \
numeric.o numutils.o oid.o oracle_compat.o \ numeric.o numutils.o oid.o oracle_compat.o \
orderedsetaggs.o pg_lzcompress.o pg_locale.o pg_lsn.o \ orderedsetaggs.o pg_locale.o pg_lsn.o pgstatfuncs.o \
pgstatfuncs.o pseudotypes.o quote.o rangetypes.o rangetypes_gist.o \ pseudotypes.o quote.o rangetypes.o rangetypes_gist.o \
rangetypes_selfuncs.o rangetypes_spgist.o rangetypes_typanalyze.o \ rangetypes_selfuncs.o rangetypes_spgist.o rangetypes_typanalyze.o \
regexp.o regproc.o ri_triggers.o rowtypes.o ruleutils.o \ regexp.o regproc.o ri_triggers.o rowtypes.o ruleutils.o \
selfuncs.o tid.o timestamp.o trigfuncs.o \ selfuncs.o tid.o timestamp.o trigfuncs.o \
......
...@@ -23,7 +23,8 @@ include $(top_builddir)/src/Makefile.global ...@@ -23,7 +23,8 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -DFRONTEND $(CPPFLAGS) override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
LIBS += $(PTHREAD_LIBS) LIBS += $(PTHREAD_LIBS)
OBJS_COMMON = exec.o pgfnames.o psprintf.o relpath.o rmtree.o string.o username.o wait_error.o OBJS_COMMON = exec.o pg_lzcompress.o pgfnames.o psprintf.o relpath.o \
rmtree.o string.o username.o wait_error.o
OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o
......
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
* *
* Entry routines: * Entry routines:
* *
* bool * int32
* pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, * pglz_compress(const char *source, int32 slen, char *dest,
* const PGLZ_Strategy *strategy); * const PGLZ_Strategy *strategy);
* *
* source is the input data to be compressed. * source is the input data to be compressed.
...@@ -23,44 +23,43 @@ ...@@ -23,44 +23,43 @@
* the compression algorithm. If NULL, the compiled * the compression algorithm. If NULL, the compiled
* in default strategy is used. * in default strategy is used.
* *
* The return value is TRUE if compression succeeded, * The return value is the number of bytes written in the
* FALSE if not; in the latter case the contents of dest * buffer dest, or -1 if compression fails; in the latter
* are undefined. * case the contents of dest are undefined.
* *
* void * int32
* pglz_decompress(const PGLZ_Header *source, char *dest) * pglz_decompress(const char *source, int32 slen, char *dest,
* int32 rawsize)
* *
* source is the compressed input. * source is the compressed input.
* *
* slen is the length of the compressed input.
*
* dest is the area where the uncompressed data will be * dest is the area where the uncompressed data will be
* written to. It is the callers responsibility to * written to. It is the callers responsibility to
* provide enough space. The required amount can be * provide enough space.
* obtained with the macro PGLZ_RAW_SIZE(source).
* *
* The data is written to buff exactly as it was handed * The data is written to buff exactly as it was handed
* to pglz_compress(). No terminating zero byte is added. * to pglz_compress(). No terminating zero byte is added.
* *
* The decompression algorithm and internal data format: * rawsize is the length of the uncompressed data.
* *
* PGLZ_Header is defined as * The return value is the number of bytes written in the
* buffer dest, or -1 if decompression fails.
* *
* typedef struct PGLZ_Header { * The decompression algorithm and internal data format:
* int32 vl_len_;
* int32 rawsize;
* }
* *
* The header is followed by the compressed data itself. * It is made with the compressed data itself.
* *
* The data representation is easiest explained by describing * The data representation is easiest explained by describing
* the process of decompression. * the process of decompression.
* *
* If VARSIZE(x) == rawsize + sizeof(PGLZ_Header), then the data * If compressed_size == rawsize, then the data
* is stored uncompressed as plain bytes. Thus, the decompressor * is stored uncompressed as plain bytes. Thus, the decompressor
* simply copies rawsize bytes from the location after the * simply copies rawsize bytes to the destination.
* header to the destination.
* *
* Otherwise the first byte after the header tells what to do * Otherwise the first byte tells what to do the next 8 times.
* the next 8 times. We call this the control byte. * We call this the control byte.
* *
* An unset bit in the control byte means, that one uncompressed * An unset bit in the control byte means, that one uncompressed
* byte follows, which is copied from input to output. * byte follows, which is copied from input to output.
...@@ -169,14 +168,18 @@ ...@@ -169,14 +168,18 @@
* *
* Copyright (c) 1999-2015, PostgreSQL Global Development Group * Copyright (c) 1999-2015, PostgreSQL Global Development Group
* *
* src/backend/utils/adt/pg_lzcompress.c * src/common/pg_lzcompress.c
* ---------- * ----------
*/ */
#ifndef FRONTEND
#include "postgres.h" #include "postgres.h"
#else
#include "postgres_fe.h"
#endif
#include <limits.h> #include <limits.h>
#include "utils/pg_lzcompress.h" #include "common/pg_lzcompress.h"
/* ---------- /* ----------
...@@ -492,14 +495,15 @@ pglz_find_match(int16 *hstart, const char *input, const char *end, ...@@ -492,14 +495,15 @@ pglz_find_match(int16 *hstart, const char *input, const char *end,
/* ---------- /* ----------
* pglz_compress - * pglz_compress -
* *
* Compresses source into dest using strategy. * Compresses source into dest using strategy. Returns the number of
* bytes written in buffer dest, or -1 if compression fails.
* ---------- * ----------
*/ */
bool int32
pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, pglz_compress(const char *source, int32 slen, char *dest,
const PGLZ_Strategy *strategy) const PGLZ_Strategy *strategy)
{ {
unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header); unsigned char *bp = (unsigned char *) dest;
unsigned char *bstart = bp; unsigned char *bstart = bp;
int hist_next = 1; int hist_next = 1;
bool hist_recycle = false; bool hist_recycle = false;
...@@ -533,12 +537,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, ...@@ -533,12 +537,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
if (strategy->match_size_good <= 0 || if (strategy->match_size_good <= 0 ||
slen < strategy->min_input_size || slen < strategy->min_input_size ||
slen > strategy->max_input_size) slen > strategy->max_input_size)
return false; return -1;
/*
* Save the original source size in the header.
*/
dest->rawsize = slen;
/* /*
* Limit the match parameters to the supported range. * Limit the match parameters to the supported range.
...@@ -611,7 +610,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, ...@@ -611,7 +610,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
* allow 4 slop bytes. * allow 4 slop bytes.
*/ */
if (bp - bstart >= result_max) if (bp - bstart >= result_max)
return false; return -1;
/* /*
* If we've emitted more than first_success_by bytes without finding * If we've emitted more than first_success_by bytes without finding
...@@ -620,7 +619,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, ...@@ -620,7 +619,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
* pre-compressed data). * pre-compressed data).
*/ */
if (!found_match && bp - bstart >= strategy->first_success_by) if (!found_match && bp - bstart >= strategy->first_success_by)
return false; return -1;
/* /*
* Try to find a match in the history * Try to find a match in the history
...@@ -664,35 +663,34 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, ...@@ -664,35 +663,34 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
*ctrlp = ctrlb; *ctrlp = ctrlb;
result_size = bp - bstart; result_size = bp - bstart;
if (result_size >= result_max) if (result_size >= result_max)
return false; return -1;
/*
* Success - need only fill in the actual length of the compressed datum.
*/
SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header));
return true; /* success */
return result_size;
} }
/* ---------- /* ----------
* pglz_decompress - * pglz_decompress -
* *
* Decompresses source into dest. * Decompresses source into dest. Returns the number of bytes
* decompressed in the destination buffer, or -1 if decompression
* fails.
* ---------- * ----------
*/ */
void int32
pglz_decompress(const PGLZ_Header *source, char *dest) pglz_decompress(const char *source, int32 slen, char *dest,
int32 rawsize)
{ {
const unsigned char *sp; const unsigned char *sp;
const unsigned char *srcend; const unsigned char *srcend;
unsigned char *dp; unsigned char *dp;
unsigned char *destend; unsigned char *destend;
sp = ((const unsigned char *) source) + sizeof(PGLZ_Header); sp = (const unsigned char *) source;
srcend = ((const unsigned char *) source) + VARSIZE(source); srcend = ((const unsigned char *) source) + slen;
dp = (unsigned char *) dest; dp = (unsigned char *) dest;
destend = dp + source->rawsize; destend = dp + rawsize;
while (sp < srcend && dp < destend) while (sp < srcend && dp < destend)
{ {
...@@ -771,9 +769,10 @@ pglz_decompress(const PGLZ_Header *source, char *dest) ...@@ -771,9 +769,10 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
* Check we decompressed the right amount. * Check we decompressed the right amount.
*/ */
if (dp != destend || sp != srcend) if (dp != destend || sp != srcend)
elog(ERROR, "compressed data is corrupt"); return -1;
/* /*
* That's it. * That's it.
*/ */
return rawsize;
} }
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Definitions for the builtin LZ compressor * Definitions for the builtin LZ compressor
* *
* src/include/utils/pg_lzcompress.h * src/include/common/pg_lzcompress.h
* ---------- * ----------
*/ */
...@@ -11,19 +11,6 @@ ...@@ -11,19 +11,6 @@
#define _PG_LZCOMPRESS_H_ #define _PG_LZCOMPRESS_H_
/* ----------
* PGLZ_Header -
*
* The information at the start of the compressed data.
* ----------
*/
typedef struct PGLZ_Header
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int32 rawsize;
} PGLZ_Header;
/* ---------- /* ----------
* PGLZ_MAX_OUTPUT - * PGLZ_MAX_OUTPUT -
* *
...@@ -31,16 +18,7 @@ typedef struct PGLZ_Header ...@@ -31,16 +18,7 @@ typedef struct PGLZ_Header
* We allow 4 bytes for overrun before detecting compression failure. * We allow 4 bytes for overrun before detecting compression failure.
* ---------- * ----------
*/ */
#define PGLZ_MAX_OUTPUT(_dlen) ((_dlen) + 4 + sizeof(PGLZ_Header)) #define PGLZ_MAX_OUTPUT(_dlen) ((_dlen) + 4)
/* ----------
* PGLZ_RAW_SIZE -
*
* Macro to determine the uncompressed data size contained
* in the entry.
* ----------
*/
#define PGLZ_RAW_SIZE(_lzdata) ((_lzdata)->rawsize)
/* ---------- /* ----------
...@@ -105,8 +83,9 @@ extern const PGLZ_Strategy *const PGLZ_strategy_always; ...@@ -105,8 +83,9 @@ extern const PGLZ_Strategy *const PGLZ_strategy_always;
* Global function declarations * Global function declarations
* ---------- * ----------
*/ */
extern bool pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, extern int32 pglz_compress(const char *source, int32 slen, char *dest,
const PGLZ_Strategy *strategy); const PGLZ_Strategy *strategy);
extern void pglz_decompress(const PGLZ_Header *source, char *dest); extern int32 pglz_decompress(const char *source, int32 slen, char *dest,
int32 rawsize);
#endif /* _PG_LZCOMPRESS_H_ */ #endif /* _PG_LZCOMPRESS_H_ */
...@@ -76,7 +76,8 @@ sub mkvcbuild ...@@ -76,7 +76,8 @@ sub mkvcbuild
push(@pgportfiles, 'rint.c') if ($vsVersion < '12.00'); push(@pgportfiles, 'rint.c') if ($vsVersion < '12.00');
our @pgcommonallfiles = qw( our @pgcommonallfiles = qw(
exec.c pgfnames.c psprintf.c relpath.c rmtree.c string.c username.c wait_error.c); exec.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
string.c username.c wait_error.c);
our @pgcommonfrontendfiles = (@pgcommonallfiles, qw(fe_memutils.c)); our @pgcommonfrontendfiles = (@pgcommonallfiles, qw(fe_memutils.c));
......
...@@ -1125,7 +1125,6 @@ PGEventResultCreate ...@@ -1125,7 +1125,6 @@ PGEventResultCreate
PGEventResultDestroy PGEventResultDestroy
PGFInfoFunction PGFInfoFunction
PGFunction PGFunction
PGLZ_Header
PGLZ_HistEntry PGLZ_HistEntry
PGLZ_Strategy PGLZ_Strategy
PGMessageField PGMessageField
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment