Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
79c3b71c
Commit
79c3b71c
authored
Nov 17, 1999
by
Jan Wieck
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
The new LZ compression and an lztext data type based on it.
Jan
parent
ddc33529
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1123 additions
and
5 deletions
+1123
-5
src/backend/utils/adt/Makefile
src/backend/utils/adt/Makefile
+2
-2
src/backend/utils/adt/lztext.c
src/backend/utils/adt/lztext.c
+266
-0
src/backend/utils/adt/pg_lzcompress.c
src/backend/utils/adt/pg_lzcompress.c
+669
-0
src/include/catalog/pg_proc.h
src/include/catalog/pg_proc.h
+23
-1
src/include/catalog/pg_type.h
src/include/catalog/pg_type.h
+6
-1
src/include/utils/builtins.h
src/include/utils/builtins.h
+10
-1
src/include/utils/lztext.h
src/include/utils/lztext.h
+22
-0
src/include/utils/pg_lzcompress.h
src/include/utils/pg_lzcompress.h
+125
-0
No files found.
src/backend/utils/adt/Makefile
View file @
79c3b71c
...
...
@@ -4,7 +4,7 @@
# Makefile for utils/adt
#
# IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/utils/adt/Makefile,v 1.2
6 1999/09/30 14:54:22
wieck Exp $
# $Header: /cvsroot/pgsql/src/backend/utils/adt/Makefile,v 1.2
7 1999/11/17 21:21:50
wieck Exp $
#
#-------------------------------------------------------------------------
...
...
@@ -35,7 +35,7 @@ OBJS = acl.o arrayfuncs.o arrayutils.o bool.o cash.o char.o chunk.o \
regexp.o regproc.o ruleutils.o selfuncs.o sets.o
\
tid.o timestamp.o varchar.o varlena.o version.o
\
network.o mac.o inet_net_ntop.o inet_net_pton.o
\
ri_triggers.o
ri_triggers.o
pg_lzcompress.o lztext.o
all
:
SUBSYS.o
...
...
src/backend/utils/adt/lztext.c
0 → 100644
View file @
79c3b71c
/* ----------
* lztext.c -
*
* $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/lztext.c,v 1.1 1999/11/17 21:21:50 wieck Exp $
*
* Text type with internal LZ compressed representation. Uses the
* standard PostgreSQL compression method.
* ----------
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <errno.h>
#include "postgres.h"
#include "utils/builtins.h"
#include "utils/palloc.h"
#include "utils/pg_lzcompress.h"
/* ----------
* lztextin -
*
* Input function for datatype lztext
* ----------
*/
lztext
*
lztextin
(
char
*
str
)
{
lztext
*
result
;
int32
rawsize
;
lztext
*
tmp
;
int
tmp_size
;
/* ----------
* Handle NULL
* ----------
*/
if
(
str
==
NULL
)
return
NULL
;
/* ----------
* Determine input size and eventually tuple size
* ----------
*/
rawsize
=
strlen
(
str
);
tmp_size
=
PGLZ_MAX_OUTPUT
(
rawsize
);
/* ----------
* Allocate a temporary result and compress into it
* ----------
*/
tmp
=
(
lztext
*
)
palloc
(
tmp_size
);
pglz_compress
(
str
,
rawsize
,
tmp
,
NULL
);
/* ----------
* If we miss less than x% bytes at the end of the temp value,
* so be it. Therefore we save a memcpy().
* ----------
*/
if
(
tmp_size
-
tmp
->
varsize
<
256
||
tmp_size
-
tmp
->
varsize
<
tmp_size
/
4
)
{
result
=
tmp
;
}
else
{
result
=
(
lztext
*
)
palloc
(
tmp
->
varsize
);
memcpy
(
result
,
tmp
,
tmp
->
varsize
);
pfree
(
tmp
);
}
return
result
;
}
/* ----------
* lztextout -
*
* Output function for data type lztext
* ----------
*/
char
*
lztextout
(
lztext
*
lz
)
{
char
*
result
;
/* ----------
* Handle NULL
* ----------
*/
if
(
lz
==
NULL
)
{
result
=
(
char
*
)
palloc
(
2
);
result
[
0
]
=
'-'
;
result
[
1
]
=
'\0'
;
return
result
;
}
/* ----------
* Allocate the result string - the required size is remembered
* in the lztext header so we don't need a temporary buffer or
* have to diddle with realloc's.
* ----------
*/
result
=
(
char
*
)
palloc
(
PGLZ_RAW_SIZE
(
lz
)
+
1
);
/* ----------
* Decompress and add terminating ZERO
* ----------
*/
pglz_decompress
(
lz
,
result
);
result
[
lz
->
rawsize
]
=
'\0'
;
/* ----------
* Return the result
* ----------
*/
return
result
;
}
/* ----------
* lztextlen -
*
* Logical length of lztext field (it's the uncompressed size
* of the original data).
* ----------
*/
int32
lztextlen
(
lztext
*
lz
)
{
/* ----------
* Handle NULL
* ----------
*/
if
(
lz
==
NULL
)
return
0
;
/* ----------
* without multibyte support, it's the remembered rawsize
* ----------
*/
return
lz
->
rawsize
;
}
/* ----------
* lztextoctetlen -
*
* Physical length of lztext field (it's the compressed size
* plus the rawsize field).
* ----------
*/
int32
lztextoctetlen
(
lztext
*
lz
)
{
/* ----------
* Handle NULL
* ----------
*/
if
(
lz
==
NULL
)
return
0
;
/* ----------
* Return the varsize minus the VARSIZE field itself.
* ----------
*/
return
lz
->
varsize
-
sizeof
(
int32
);
}
/* ----------
* text_lztext -
*
* Convert text to lztext
* ----------
*/
lztext
*
text_lztext
(
text
*
txt
)
{
lztext
*
result
;
int32
rawsize
;
lztext
*
tmp
;
int
tmp_size
;
char
*
str
;
/* ----------
* Handle NULL
* ----------
*/
if
(
txt
==
NULL
)
return
NULL
;
/* ----------
* Determine input size and eventually tuple size
* ----------
*/
rawsize
=
VARSIZE
(
txt
)
-
VARHDRSZ
;
str
=
VARDATA
(
txt
);
tmp_size
=
PGLZ_MAX_OUTPUT
(
rawsize
);
/* ----------
* Allocate a temporary result and compress into it
* ----------
*/
tmp
=
(
lztext
*
)
palloc
(
tmp_size
);
pglz_compress
(
str
,
rawsize
,
tmp
,
NULL
);
/* ----------
* If we miss less than x% bytes at the end of the temp value,
* so be it. Therefore we save a memcpy().
* ----------
*/
if
(
tmp_size
-
tmp
->
varsize
<
256
||
tmp_size
-
tmp
->
varsize
<
tmp_size
/
4
)
{
result
=
tmp
;
}
else
{
result
=
(
lztext
*
)
palloc
(
tmp
->
varsize
);
memcpy
(
result
,
tmp
,
tmp
->
varsize
);
pfree
(
tmp
);
}
return
result
;
}
/* ----------
* lztext_text -
*
* Convert lztext to text
* ----------
*/
text
*
lztext_text
(
lztext
*
lz
)
{
text
*
result
;
/* ----------
* Handle NULL
* ----------
*/
if
(
lz
==
NULL
)
return
NULL
;
/* ----------
* Allocate and initialize the text result
* ----------
*/
result
=
(
text
*
)
palloc
(
lz
->
rawsize
+
VARHDRSZ
+
1
);
VARSIZE
(
result
)
=
lz
->
rawsize
+
VARHDRSZ
;
/* ----------
* Decompress directly into the text data area.
* ----------
*/
pglz_decompress
(
lz
,
VARDATA
(
result
));
VARDATA
(
result
)[
lz
->
rawsize
]
=
0
;
return
result
;
}
src/backend/utils/adt/pg_lzcompress.c
0 → 100644
View file @
79c3b71c
/* ----------
* pg_lzcompress.c -
*
* $Header: /cvsroot/pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.1 1999/11/17 21:21:50 wieck Exp $
*
* This is an implementation of LZ compression for PostgreSQL.
* It uses a simple history table and generates 2-3 byte tags
* capable of backward copy information for 3-273 bytes with
* an offset of max. 4095.
*
* Entry routines:
*
* int
* pglz_compress(char *source, int slen, PGLZ_Header *dest,
* PGLZ_Strategy *strategy);
*
* source is the input data to be compressed.
*
* slen is the length of the input data.
*
* dest is the output area for the compressed result.
* It must be big enough to hold the worst case of
* compression failure and can be computed by the
* macro PGLZ_MAX_OUTPUT(slen). Don't be surprised,
* it is larger than the input data size.
*
* strategy is a pointer to some information controlling
* the compression algorithm. If NULL, the compiled
* in default strategy is used.
*
* The return value is the size of bytes written to buff.
*
* int
* pglz_decompress(PGLZ_Header *source, char *dest)
*
* source is the compressed input.
*
* dest is the area where the uncompressed data will be
* written to. It is the callers responsibility to
* provide enough space. The required amount can be
* obtained with the macro PGLZ_RAW_SIZE(source).
*
* The data is written to buff exactly as it was handed
* to pglz_compress(). No terminating zero byte is added.
*
* The return value is the size of bytes written to buff.
* Obviously the same as PGLZ_RAW_SIZE() returns.
*
* The compression algorithm and internal data format:
*
* PGLZ_Header is defined as
*
* typedef struct PGLZ_Header {
* int32 varsize;
* int32 rawsize;
* }
*
* The header is followed by the compressed data itself.
*
* The algorithm is easiest explained by describing the process
* of decompression.
*
* If varsize == rawsize + sizeof(PGLZ_Header), then the data
* is stored uncompressed as plain bytes. Thus, the decompressor
* simply copies rawsize bytes from the location after the
* header to the destination.
*
* Otherwise the first byte after the header tells what to do
* the next 8 times. We call this the control byte.
*
* An unset bit in the control byte means, that one uncompressed
* byte follows, which is copied from input to output.
*
* A set bit in the control byte means, that a tag of 2-3 bytes
* follows. A tag contains information to copy some bytes, that
* are already in the output buffer, to the current location in
* the output. Let's call the three tag bytes T1, T2 and T3. The
* position of the data to copy is coded as an offset from the
* actual output position.
*
* The offset is in the upper nibble of T1 and in T2.
* The length is in the lower nibble of T1.
*
* So the 16 bits of a 2 byte tag are coded as
*
* 7---T1--0 7---T2--0
* OOOO LLLL OOOO OOOO
*
* This limits the offset to 1-4095 (12 bits) and the length
* to 3-18 (4 bits) because 3 is allways added to it. To emit
* a tag of 2 bytes with a length of 2 only saves one control
* bit. But we loose one byte in the possible length of a tag.
*
* In the actual implementation, the 2 byte tag's length is
* limited to 3-17, because the value 0xF in the length nibble
* has special meaning. It means, that the next following
* byte (T3) has to be added to the length value of 18. That
* makes total limits of 1-4095 for offset and 3-273 for length.
*
* Now that we have successfully decoded a tag. We simply copy
* the output that occured <offset> bytes back to the current
* output location in the specified <length>. Thus, a
* sequence of 200 spaces (think about bpchar fields) could be
* coded in 4 bytes. One literal space and a three byte tag to
* copy 199 bytes with a -1 offset. Whow - that's a compression
* rate of 98%! Well, the implementation needs to save the
* original data size too, so we need another 4 bytes for it
* and end up with a total compression rate of 96%, what's still
* worth a Whow.
*
* Acknowledgements:
*
* Many thanks to Adisak Pochanayon, who's article about SLZ
* inspired me to write the PostgreSQL compression this way.
*
* Jan Wieck
* ----------
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include "postgres.h"
#include "utils/palloc.h"
#include "utils/pg_lzcompress.h"
/* ----------
* Local definitions
* ----------
*/
#define PGLZ_HISTORY_SIZE 8192
#define PGLZ_HISTORY_MASK 0x1fff
#define PGLZ_HISTORY_PREALLOC 8192
#define PGLZ_MAX_MATCH 273
/* ----------
* PGLZ_HistEntry -
*
* Linked list for the backward history lookup
* ----------
*/
typedef
struct
PGLZ_HistEntry
{
struct
PGLZ_HistEntry
*
next
;
char
*
pos
;
}
PGLZ_HistEntry
;
/* ----------
* The provided standard strategies
* ----------
*/
static
PGLZ_Strategy
strategy_default_data
=
{
256
,
/* Data chunks smaller 256 bytes are nott compressed */
6144
,
/* Data chunks greater equal 6K force compression */
/* except compressed result is greater uncompressed data */
20
,
/* Compression rates below 20% mean fallback to uncompressed */
/* storage except compression is forced by previous parameter */
128
,
/* Stop history lookup if a match of 128 bytes is found */
10
/* Lower good match size by 10% at every lookup loop iteration. */
};
PGLZ_Strategy
*
PGLZ_strategy_default
=
&
strategy_default_data
;
static
PGLZ_Strategy
strategy_allways_data
=
{
0
,
/* Chunks of any size are compressed */
0
,
/* */
0
,
/* We want to save at least one single byte */
128
,
/* Stop history lookup if a match of 128 bytes is found */
6
/* Look harder for a good match. */
};
PGLZ_Strategy
*
PGLZ_strategy_allways
=
&
strategy_allways_data
;
static
PGLZ_Strategy
strategy_never_data
=
{
0
,
/* */
0
,
/* */
0
,
/* */
0
,
/* Zero indicates "store uncompressed allways" */
0
/* */
};
PGLZ_Strategy
*
PGLZ_strategy_never
=
&
strategy_never_data
;
/* ----------
* pglz_hist_idx -
*
* Computes the history table slot for the lookup by the next 4
* characters in the input.
* ----------
*/
#if 1
#define pglz_hist_idx(_s,_e) ( \
(((_e) - (_s)) < 4) ? 0 : \
((((_s)[0] << 9) ^ ((_s)[1] << 6) ^ \
((_s)[2] << 3) ^ (_s)[3]) & (PGLZ_HISTORY_MASK)) \
)
#else
#define pglz_hist_idx(_s,_e) ( \
(((_e) - (_s)) < 2) ? 0 : \
((((_s)[0] << 8) ^ (_s)[1]) & (PGLZ_HISTORY_MASK)) \
)
#endif
/* ----------
* pglz_hist_add -
*
* Adds a new entry to the history table.
* ----------
*/
#define pglz_hist_add(_hs,_hn,_s,_e) { \
int __hindex = pglz_hist_idx((_s),(_e)); \
(_hn)->next = (_hs)[__hindex]; \
(_hn)->pos = (_s); \
(_hs)[__hindex] = (_hn)++; \
}
/* ----------
* pglz_out_ctrl -
*
* Outputs the last and allocates a new control byte if needed.
* ----------
*/
#define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) { \
if ((__ctrl & 0xff) == 0) \
{ \
*__ctrlp = __ctrlb; \
__ctrlp = __buf++; \
__ctrlb = 0; \
__ctrl = 1; \
} \
}
/* ----------
* pglz_out_literal -
*
* Outputs a literal byte to the destination buffer including the
* appropriate control bit.
* ----------
*/
#define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) { \
pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \
*_buf++ = (unsigned char)(_byte); \
_ctrl <<= 1; \
}
/* ----------
* pglz_out_tag -
*
* Outputs a backward reference tag of 2-4 bytes (depending on
* offset and length) to the destination buffer including the
* appropriate control bit.
* ----------
*/
#define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) { \
pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \
_ctrlb |= _ctrl; \
_ctrl <<= 1; \
if (_len > 17) \
{ \
_buf[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f); \
_buf[1] = (unsigned char)((_off & 0xff)); \
_buf[2] = (unsigned char)((_len) - 18); \
_buf += 3; \
} else { \
_buf[0] = (unsigned char)((((_off) & 0xf00) >> 4) | (_len - 3)); \
_buf[1] = (unsigned char)((_off) & 0xff); \
_buf += 2; \
} \
}
/* ----------
* pglz_find_match -
*
* Lookup the history table if the actual input stream matches
* another sequence of characters, starting somewhere earlier
* in the input buffer.
* ----------
*/
static
inline
int
pglz_find_match
(
PGLZ_HistEntry
**
hstart
,
char
*
input
,
char
*
end
,
int
*
lenp
,
int
*
offp
,
int
good_match
,
int
good_drop
)
{
PGLZ_HistEntry
*
hent
;
int32
len
=
0
;
int32
off
=
0
;
int32
thislen
;
int32
thisoff
;
char
*
ip
;
char
*
hp
;
/* ----------
* Traverse the linked history list until a good enough
* match is found.
* ----------
*/
hent
=
hstart
[
pglz_hist_idx
(
input
,
end
)];
while
(
hent
&&
len
<
good_match
)
{
/* ----------
* Be happy with lesser good matches the more entries we visited.
* ----------
*/
good_match
-=
(
good_match
*
good_drop
)
/
100
;
/* ----------
* Stop if the offset does not fit into our tag anymore.
* ----------
*/
thisoff
=
(
ip
=
input
)
-
(
hp
=
hent
->
pos
);
if
(
thisoff
>=
0x0fff
)
break
;
/* ----------
* Determine length of match. A better match must be larger than
* the best so far. And if we already have a match of 16 or more
* bytes, it's worth the call overhead to use memcmp() to check
* if this match is equal for the same size. After that we must
* fallback to character by character comparision to know the
* exact position where the diff occured.
* ----------
*/
if
(
len
>=
16
)
{
if
(
memcmp
(
ip
,
hp
,
len
)
!=
0
)
{
hent
=
hent
->
next
;
continue
;
}
thislen
=
len
;
ip
+=
len
;
hp
+=
len
;
}
else
{
thislen
=
0
;
}
while
(
ip
<
end
&&
*
ip
==
*
hp
&&
thislen
<
PGLZ_MAX_MATCH
)
{
thislen
++
;
ip
++
;
hp
++
;
}
/* ----------
* Remember this match as the best (if it is)
* ----------
*/
if
(
thislen
>
len
)
{
len
=
thislen
;
off
=
thisoff
;
}
/* ----------
* Advance to the next history entry
* ----------
*/
hent
=
hent
->
next
;
}
/* ----------
* Return match information only if it results at least in one
* byte reduction.
* ----------
*/
if
(
len
>
2
)
{
*
lenp
=
len
;
*
offp
=
off
;
return
1
;
}
return
0
;
}
/* ----------
* pglz_compress -
* ----------
*/
int
pglz_compress
(
char
*
source
,
int
slen
,
PGLZ_Header
*
dest
,
PGLZ_Strategy
*
strategy
)
{
PGLZ_HistEntry
*
hist_start
[
PGLZ_HISTORY_SIZE
];
PGLZ_HistEntry
*
hist_alloc
;
PGLZ_HistEntry
hist_prealloc
[
PGLZ_HISTORY_PREALLOC
];
PGLZ_HistEntry
*
hist_next
;
unsigned
char
*
bp
=
((
unsigned
char
*
)
dest
)
+
sizeof
(
PGLZ_Header
);
unsigned
char
*
bstart
=
bp
;
char
*
dp
=
source
;
char
*
dend
=
source
+
slen
;
unsigned
char
ctrl_dummy
=
0
;
unsigned
char
*
ctrlp
=
&
ctrl_dummy
;
unsigned
char
ctrlb
=
0
;
unsigned
char
ctrl
=
0
;
int32
match_len
;
int32
match_off
;
int32
good_match
;
int32
good_drop
;
int32
do_compress
=
1
;
int32
result_size
=
-
1
;
int32
result_max
;
int32
need_rate
;
/* ----------
* Our fallback strategy is the default.
* ----------
*/
if
(
strategy
==
NULL
)
strategy
=
PGLZ_strategy_default
;
/* ----------
* Save the original source size in the header.
* ----------
*/
dest
->
rawsize
=
slen
;
/* ----------
* If the strategy forbids compression (at all or if source chunk too
* small), copy input to output without compression.
* ----------
*/
if
(
strategy
->
match_size_good
==
0
)
{
memcpy
(
bstart
,
source
,
slen
);
return
(
dest
->
varsize
=
slen
+
sizeof
(
PGLZ_Header
));
}
else
{
if
(
slen
<
strategy
->
min_input_size
)
{
memcpy
(
bstart
,
source
,
slen
);
return
(
dest
->
varsize
=
slen
+
sizeof
(
PGLZ_Header
));
}
}
/* ----------
* Limit the match size to the maximum implementation allowed value
* ----------
*/
if
((
good_match
=
strategy
->
match_size_good
)
>
PGLZ_MAX_MATCH
)
good_match
=
PGLZ_MAX_MATCH
;
if
(
good_match
<
17
)
good_match
=
17
;
if
((
good_drop
=
strategy
->
match_size_drop
)
<
0
)
good_drop
=
0
;
if
(
good_drop
>
100
)
good_drop
=
100
;
/* ----------
* Initialize the history tables. For inputs smaller than
* PGLZ_HISTORY_PREALLOC, we already have a big enough history
* table on the stack frame.
* ----------
*/
memset
((
void
*
)
hist_start
,
0
,
sizeof
(
hist_start
));
if
(
slen
+
1
<=
PGLZ_HISTORY_PREALLOC
)
hist_alloc
=
hist_prealloc
;
else
hist_alloc
=
(
PGLZ_HistEntry
*
)
palloc
(
sizeof
(
PGLZ_HistEntry
)
*
(
slen
+
1
));
hist_next
=
hist_alloc
;
/* ----------
* Compute the maximum result size allowed by the strategy.
* If the input size exceeds force_input_size, the max result size
* is the input size itself.
* Otherwise, it is the input size minus the minimum wanted
* compression rate.
* ----------
*/
if
(
slen
>=
strategy
->
force_input_size
)
{
result_max
=
slen
;
}
else
{
need_rate
=
strategy
->
min_comp_rate
;
if
(
need_rate
<
0
)
need_rate
=
0
;
else
if
(
need_rate
>
99
)
need_rate
=
99
;
result_max
=
slen
-
((
slen
*
need_rate
)
/
100
);
}
/* ----------
* Compress the source directly into the output buffer.
* ----------
*/
while
(
dp
<
dend
)
{
/* ----------
* If we already exceeded the maximum result size, set no compression
* flag and stop this. But don't check too often.
* ----------
*/
if
(
bp
-
bstart
>=
result_max
)
{
do_compress
=
0
;
break
;
}
/* ----------
* Try to find a match in the history
* ----------
*/
if
(
pglz_find_match
(
hist_start
,
dp
,
dend
,
&
match_len
,
&
match_off
,
good_match
,
good_drop
))
{
/* ----------
* Create the tag and add history entries for
* all matched characters.
* ----------
*/
pglz_out_tag
(
ctrlp
,
ctrlb
,
ctrl
,
bp
,
match_len
,
match_off
);
while
(
match_len
--
)
{
pglz_hist_add
(
hist_start
,
hist_next
,
dp
,
dend
);
dp
++
;
/* Do not do this ++ in the line above! */
/* The macro would do it four times - Jan. */
}
}
else
{
/* ----------
* No match found. Copy one literal byte.
* ----------
*/
pglz_out_literal
(
ctrlp
,
ctrlb
,
ctrl
,
bp
,
*
dp
);
pglz_hist_add
(
hist_start
,
hist_next
,
dp
,
dend
);
dp
++
;
/* Do not do this ++ in the line above! */
/* The macro would do it four times - Jan. */
}
}
/* ----------
* Get rid of the history (if allocated)
* ----------
*/
if
(
hist_alloc
!=
hist_prealloc
)
pfree
((
void
*
)
hist_alloc
);
/* ----------
* If we are still in compressing mode, write out the last
* control byte and determine if the compression gained the
* rate requested by the strategy.
* ----------
*/
if
(
do_compress
)
{
*
ctrlp
=
ctrlb
;
result_size
=
bp
-
bstart
;
if
(
result_size
>=
result_max
)
{
do_compress
=
0
;
}
}
/* ----------
* Done - if we successfully compressed and matched the
* strategy's constraints, return the compressed result.
* Otherwise copy the original source over it and return
* the original length.
* ----------
*/
if
(
do_compress
)
{
return
(
dest
->
varsize
=
result_size
+
sizeof
(
PGLZ_Header
));
}
else
{
memcpy
(((
char
*
)
dest
)
+
sizeof
(
PGLZ_Header
),
source
,
slen
);
return
(
dest
->
varsize
=
slen
+
sizeof
(
PGLZ_Header
));
}
}
/* ----------
* pglz_decompress -
* ----------
*/
int
pglz_decompress
(
PGLZ_Header
*
source
,
char
*
dest
)
{
unsigned
char
*
dp
;
unsigned
char
*
dend
;
unsigned
char
*
bp
;
unsigned
char
ctrl
;
int32
ctrlc
;
int32
len
;
int32
off
;
dp
=
((
unsigned
char
*
)
source
)
+
sizeof
(
PGLZ_Header
);
dend
=
((
unsigned
char
*
)
source
)
+
source
->
varsize
;
bp
=
(
unsigned
char
*
)
dest
;
if
(
source
->
varsize
==
source
->
rawsize
+
sizeof
(
PGLZ_Header
))
{
memcpy
(
dest
,
dp
,
source
->
rawsize
);
return
source
->
rawsize
;
}
while
(
dp
<
dend
)
{
/* ----------
* Read one control byte and process the next 8 items.
* ----------
*/
ctrl
=
*
dp
++
;
for
(
ctrlc
=
0
;
ctrlc
<
8
&&
dp
<
dend
;
ctrlc
++
)
{
if
(
ctrl
&
1
)
{
/* ----------
* Otherwise it contains the match length minus 3
* and the upper 4 bits of the offset. The next following
* byte contains the lower 8 bits of the offset. If
* the length is coded as 18, another extension tag byte
* tells how much longer the match really was (0-255).
* ----------
*/
len
=
(
dp
[
0
]
&
0x0f
)
+
3
;
off
=
((
dp
[
0
]
&
0xf0
)
<<
4
)
|
dp
[
1
];
dp
+=
2
;
if
(
len
==
18
)
{
len
+=
*
dp
++
;
}
/* ----------
* Now we copy the bytes specified by the tag from
* OUTPUT to OUTPUT. It is dangerous and platform
* dependant to use memcpy() here, because the copied
* areas could overlap extremely!
* ----------
*/
while
(
len
--
)
{
*
bp
=
bp
[
-
off
];
bp
++
;
}
}
else
{
/* ----------
* An unset control bit means LITERAL BYTE. So we
* just copy one from INPUT to OUTPUT.
* ----------
*/
*
bp
++
=
*
dp
++
;
}
/* ----------
* Advance the control bit
* ----------
*/
ctrl
>>=
1
;
}
}
/* ----------
* That's it.
* ----------
*/
return
(
char
*
)
bp
-
dest
;
}
src/include/catalog/pg_proc.h
View file @
79c3b71c
...
...
@@ -6,7 +6,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: pg_proc.h,v 1.10
5 1999/10/11 06:28:28 inoue
Exp $
* $Id: pg_proc.h,v 1.10
6 1999/11/17 21:21:50 wieck
Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
...
...
@@ -2338,6 +2338,28 @@ DESCR("larger of two numbers");
DATA
(
insert
OID
=
1769
(
numeric_cmp
PGUID
11
f
t
t
2
f
23
"1700 1700"
100
0
0
100
numeric_cmp
-
));
DESCR
(
"compare two numbers"
);
/* OID's 1625 - 1639 LZTEXT data type */
DATA
(
insert
OID
=
1626
(
lztextin
PGUID
11
f
t
t
1
f
1625
"0"
100
0
0
100
lztextin
-
));
DESCR
(
"(internal)"
);
DATA
(
insert
OID
=
1627
(
lztextout
PGUID
11
f
t
t
1
f
23
"0"
100
0
0
100
lztextout
-
));
DESCR
(
"(internal)"
);
DATA
(
insert
OID
=
1628
(
lztext_text
PGUID
11
f
t
t
1
f
25
"1625"
100
0
0
100
lztext_text
-
));
DESCR
(
"convert lztext to text"
);
DATA
(
insert
OID
=
1629
(
text
PGUID
11
f
t
t
1
f
25
"1625"
100
0
0
100
lztext_text
-
));
DESCR
(
"convert lztext to text"
);
DATA
(
insert
OID
=
1630
(
text_lztext
PGUID
11
f
t
t
1
f
1625
"25"
100
0
0
100
text_lztext
-
));
DESCR
(
"convert text to lztext"
);
DATA
(
insert
OID
=
1631
(
lztext
PGUID
11
f
t
t
1
f
1625
"25"
100
0
0
100
text_lztext
-
));
DESCR
(
"convert text to lztext"
);
DATA
(
insert
OID
=
1632
(
lztextlen
PGUID
11
f
t
t
1
f
23
"1625"
100
0
1
0
lztextlen
-
));
DESCR
(
"length"
);
DATA
(
insert
OID
=
1633
(
length
PGUID
11
f
t
t
1
f
23
"1625"
100
0
1
0
lztextlen
-
));
DESCR
(
"length"
);
DATA
(
insert
OID
=
1634
(
lztextoctetlen
PGUID
11
f
t
t
1
f
23
"1625"
100
0
1
0
lztextoctetlen
-
));
DESCR
(
"octet length"
);
DATA
(
insert
OID
=
1635
(
octet_length
PGUID
11
f
t
t
1
f
23
"1625"
100
0
1
0
lztextoctetlen
-
));
DESCR
(
"octet length"
);
/*
* prototypes for functions pg_proc.c
...
...
src/include/catalog/pg_type.h
View file @
79c3b71c
...
...
@@ -7,7 +7,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: pg_type.h,v 1.7
0 1999/10/18 14:14:04 momjian
Exp $
* $Id: pg_type.h,v 1.7
1 1999/11/17 21:21:51 wieck
Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
...
...
@@ -382,6 +382,11 @@ DATA(insert OID = 1296 ( timestamp PGUID 4 19 t b t \054 0 0 timestamp_in time
DESCR
(
"date time timezone, limited-range ISO-formated date and time"
);
#define TIMESTAMPOID 1296
/* OIDS 1625 - 1639 */
DATA
(
insert
OID
=
1625
(
lztext
PGUID
-
1
-
1
f
b
t
\
054
0
0
lztextin
lztextout
lztextin
lztextout
i
_null_
));
DESCR
(
"variable-length string, stored compressed"
);
#define LZTEXTOID 1625
/* OIDS 1700 - 1799 */
DATA
(
insert
OID
=
1700
(
numeric
PGUID
-
1
-
1
f
b
t
\
054
0
0
numeric_in
numeric_out
numeric_in
numeric_out
i
_null_
));
DESCR
(
"numeric(precision, decimal), arbitrary precision number"
);
...
...
src/include/utils/builtins.h
View file @
79c3b71c
...
...
@@ -6,7 +6,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: builtins.h,v 1.
89 1999/10/11 06:28:28 inoue
Exp $
* $Id: builtins.h,v 1.
90 1999/11/17 21:21:51 wieck
Exp $
*
* NOTES
* This should normally only be included by fmgr.h.
...
...
@@ -30,6 +30,7 @@
#include "utils/int8.h"
#include "utils/nabstime.h"
#include "utils/numeric.h"
#include "utils/lztext.h"
#include "access/heapam.h"
/* for HeapTuple */
/*
...
...
@@ -627,4 +628,12 @@ HeapTuple RI_FKey_setnull_upd(FmgrInfo *proinfo);
HeapTuple
RI_FKey_setdefault_del
(
FmgrInfo
*
proinfo
);
HeapTuple
RI_FKey_setdefault_upd
(
FmgrInfo
*
proinfo
);
/* lztext.c */
lztext
*
lztextin
(
char
*
str
);
char
*
lztextout
(
lztext
*
lz
);
text
*
lztext_text
(
lztext
*
lz
);
lztext
*
text_lztext
(
text
*
txt
);
int32
lztextlen
(
lztext
*
lz
);
int32
lztextoctetlen
(
lztext
*
lz
);
#endif
/* BUILTINS_H */
src/include/utils/lztext.h
0 → 100644
View file @
79c3b71c
/* ----------
* lztext.h
*
* $Header: /cvsroot/pgsql/src/include/utils/Attic/lztext.h,v 1.1 1999/11/17 21:21:51 wieck Exp $
*
* Definitions for the lztext compressed data type
* ----------
*/
#ifndef _LZTEXT_H_
#define _LZTEXT_H_
#include "utils/pg_lzcompress.h"
/* ----------
* The internal storage format of an LZ compressed text field
* ----------
*/
typedef
PGLZ_Header
lztext
;
#endif
/* _LZTEXT_H_ */
src/include/utils/pg_lzcompress.h
0 → 100644
View file @
79c3b71c
/* ----------
* pg_lzcompress.h -
*
* $Header: /cvsroot/pgsql/src/include/utils/pg_lzcompress.h,v 1.1 1999/11/17 21:21:51 wieck Exp $
*
* Definitions for the builtin LZ compressor
* ----------
*/
#ifndef _PG_LZCOMPRESS_H_
#define _PG_LZCOMPRESS_H_
/* ----------
* PGLZ_Header -
*
* The information at the top of the compressed data.
* The varsize must be kept the same data type as the value
* in front of all variable size data types in PostgreSQL.
* ----------
*/
typedef
struct
PGLZ_Header
{
int32
varsize
;
int32
rawsize
;
}
PGLZ_Header
;
/* ----------
* PGLZ_MAX_OUTPUT -
*
* Macro to compute the maximum buffer required for the
* compression output. It is larger than the input, because
* in the worst case, we cannot write out one single tag but
* need one control byte per 8 literal data bytes plus the
* EOF mark at the end.
* ----------
*/
#define PGLZ_MAX_OUTPUT(_dlen) ((_dlen) + (((_dlen) | 0x07) >> 3) \
+ sizeof(PGLZ_Header))
#define PGLZ_RAW_SIZE(_lzdata) (_lzdata->rawsize)
#define PGLZ_IS_COMPRESSED(_lzdata) (_lzdata->varsize != \
_lzdata->rawsize + sizeof(PGLZ_Header))
/* ----------
* PGLZ_Strategy -
*
* Some values that control the compression algorithm.
*
* min_input_size Minimum input data size to start compression.
*
* force_input_size Input data size at which compressed storage is
* forced even if the compression rate drops below
* min_comp_rate (but not below 0).
*
* min_comp_rate Minimum compression rate (0-99%), the output
* must be smaller than the input. If that isn't
* the case, the compressor will throw away it's
* output and copy the original, uncompressed data
* to the output buffer.
*
* match_size_good The initial GOOD match size when starting history
* lookup. When looking up the history to find a
* match that could be expressed as a tag, the
* algorithm does not allways walk back entirely.
* A good match fast is usually better than the
* best possible one very late. For each iteration
* in the lookup, this value is lowered so the
* longer the lookup takes, the smaller matches
* are considered good.
*
* match_size_drop The percentage, match_size_good is lowered
* at each history check. Allowed values are
* 0 (no change until end) to 100 (only check
* latest history entry at all).
* ----------
*/
typedef
struct
PGLZ_Strategy
{
int32
min_input_size
;
int32
force_input_size
;
int32
min_comp_rate
;
int32
match_size_good
;
int32
match_size_drop
;
}
PGLZ_Strategy
;
/* ----------
* The standard strategies
*
* PGLZ_strategy_default Starts compression only if input is
* at least 256 bytes large. Stores output
* uncompressed if compression does not
* gain at least 20% size reducture but
* input does not exceed 6K. Stops history
* lookup if at least a 128 byte long
* match has been found.
*
* This is the default strategy if none
* is given to pglz_compress().
*
* PGLZ_strategy_allways Starts compression on any infinitely
* small input and does fallback to
* uncompressed storage only if output
* would be larger than input.
*
* PGLZ_strategy_never Force pglz_compress to act as a custom
* interface for memcpy(). Only useful
* for generic interfacing.
* ----------
*/
extern
PGLZ_Strategy
*
PGLZ_strategy_default
;
extern
PGLZ_Strategy
*
PGLZ_strategy_allways
;
extern
PGLZ_Strategy
*
PGLZ_strategy_never
;
/* ----------
* Global function declarations
* ----------
*/
int
pglz_compress
(
char
*
source
,
int32
slen
,
PGLZ_Header
*
dest
,
PGLZ_Strategy
*
strategy
);
int
pglz_decompress
(
PGLZ_Header
*
source
,
char
*
dest
);
#endif
/* _PG_LZCOMPRESS_H_ */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment