Commit aef8948f authored by Michael Paquier's avatar Michael Paquier

Rework refactoring of hex and encoding routines

This commit addresses some issues with c3826f83 that moved the hex
decoding routine to src/common/:
- The decoding function lacked overflow checks, so when used for
security-related features it was an open door to out-of-bound writes if
not carefully used that could remain undetected.  Like the base64
routines already in src/common/ used by SCRAM, this routine is reworked
to check for overflows by having the size of the destination buffer
passed as argument, with overflows checked before doing any writes.
- The encoding routine was missing.  This is moved to src/common/ and
it gains the same overflow checks as the decoding part.

On failure, the hex routines of src/common/ issue an error as per the
discussion done to make them usable by frontend tools, but not by shared
libraries.  Note that this is why ECPG is left out of this commit, and
it still includes a duplicated logic doing hex encoding and decoding.

While on it, this commit uses better variable names for the source and
destination buffers in the existing escape and base64 routines in
encode.c and it makes them more robust to overflow detection.  The
previous core code issued a FATAL after doing out-of-bound writes if
going through the SQL functions, which would be enough to detect
problems when working on changes that impacted this area of the
code.  Instead, an error is issued before doing an out-of-bound write.
The hex routines were being directly called for bytea conversions and
backup manifests without such sanity checks.  The current calls happen
to not have any problems, but careless uses of such APIs could easily
lead to CVE-class bugs.

Author: Bruce Momjian, Michael Paquier
Reviewed-by: Sehrope Sarkuni
Discussion: https://postgr.es/m/20201231003557.GB22199@momjian.us
parent 0d56acfb
......@@ -13,11 +13,11 @@
#include "postgres.h"
#include "access/timeline.h"
#include "common/hex.h"
#include "libpq/libpq.h"
#include "libpq/pqformat.h"
#include "mb/pg_wchar.h"
#include "replication/backup_manifest.h"
#include "utils/builtins.h"
#include "utils/json.h"
static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
......@@ -150,10 +150,12 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
}
else
{
uint64 dstlen = pg_hex_enc_len(pathlen);
appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
enlargeStringInfo(&buf, 2 * pathlen);
buf.len += hex_encode(pathname, pathlen,
&buf.data[buf.len]);
enlargeStringInfo(&buf, dstlen);
buf.len += pg_hex_encode(pathname, pathlen,
&buf.data[buf.len], dstlen);
appendStringInfoString(&buf, "\", ");
}
......@@ -176,6 +178,7 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
{
uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
int checksumlen;
uint64 dstlen;
checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
if (checksumlen < 0)
......@@ -185,9 +188,10 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
appendStringInfo(&buf,
", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
pg_checksum_type_name(checksum_ctx->type));
enlargeStringInfo(&buf, 2 * checksumlen);
buf.len += hex_encode((char *) checksumbuf, checksumlen,
&buf.data[buf.len]);
dstlen = pg_hex_enc_len(checksumlen);
enlargeStringInfo(&buf, dstlen);
buf.len += pg_hex_encode((char *) checksumbuf, checksumlen,
&buf.data[buf.len], dstlen);
appendStringInfoChar(&buf, '"');
}
......@@ -307,8 +311,9 @@ SendBackupManifest(backup_manifest_info *manifest)
{
StringInfoData protobuf;
uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
char *checksumstringbuf;
size_t manifest_bytes_done = 0;
uint64 dstlen;
if (!IsManifestEnabled(manifest))
return;
......@@ -328,8 +333,11 @@ SendBackupManifest(backup_manifest_info *manifest)
if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf) < 0)
elog(ERROR, "failed to finalize checksum of backup manifest");
AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
dstlen = pg_hex_enc_len(PG_SHA256_DIGEST_LENGTH);
checksumstringbuf = palloc0(dstlen + 1); /* includes \0 */
pg_hex_encode((char *) checksumbuf, sizeof checksumbuf,
checksumstringbuf, dstlen);
checksumstringbuf[dstlen] = '\0';
AppendStringToManifest(manifest, checksumstringbuf);
AppendStringToManifest(manifest, "\"}\n");
......
......@@ -15,7 +15,7 @@
#include <ctype.h>
#include "common/hex_decode.h"
#include "common/hex.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
......@@ -32,10 +32,12 @@
*/
struct pg_encoding
{
uint64 (*encode_len) (const char *data, size_t dlen);
uint64 (*decode_len) (const char *data, size_t dlen);
uint64 (*encode) (const char *data, size_t dlen, char *res);
uint64 (*decode) (const char *data, size_t dlen, char *res);
uint64 (*encode_len) (const char *src, size_t srclen);
uint64 (*decode_len) (const char *src, size_t srclen);
uint64 (*encode) (const char *src, size_t srclen,
char *dst, size_t dstlen);
uint64 (*decode) (const char *src, size_t srclen,
char *dst, size_t dstlen);
};
static const struct pg_encoding *pg_find_encoding(const char *name);
......@@ -81,11 +83,7 @@ binary_encode(PG_FUNCTION_ARGS)
result = palloc(VARHDRSZ + resultlen);
res = enc->encode(dataptr, datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
if (res > resultlen)
elog(FATAL, "overflow - encode estimate too small");
res = enc->encode(dataptr, datalen, VARDATA(result), resultlen);
SET_VARSIZE(result, VARHDRSZ + res);
......@@ -129,11 +127,7 @@ binary_decode(PG_FUNCTION_ARGS)
result = palloc(VARHDRSZ + resultlen);
res = enc->decode(dataptr, datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
if (res > resultlen)
elog(FATAL, "overflow - decode estimate too small");
res = enc->decode(dataptr, datalen, VARDATA(result), resultlen);
SET_VARSIZE(result, VARHDRSZ + res);
......@@ -145,32 +139,20 @@ binary_decode(PG_FUNCTION_ARGS)
* HEX
*/
static const char hextbl[] = "0123456789abcdef";
uint64
hex_encode(const char *src, size_t len, char *dst)
{
const char *end = src + len;
while (src < end)
{
*dst++ = hextbl[(*src >> 4) & 0xF];
*dst++ = hextbl[*src & 0xF];
src++;
}
return (uint64) len * 2;
}
/*
* Those two wrappers are still needed to match with the layer of
* src/common/.
*/
static uint64
hex_enc_len(const char *src, size_t srclen)
{
return (uint64) srclen << 1;
return pg_hex_enc_len(srclen);
}
static uint64
hex_dec_len(const char *src, size_t srclen)
{
return (uint64) srclen >> 1;
return pg_hex_dec_len(srclen);
}
/*
......@@ -192,12 +174,12 @@ static const int8 b64lookup[128] = {
};
static uint64
pg_base64_encode(const char *src, size_t len, char *dst)
pg_base64_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
{
char *p,
*lend = dst + 76;
const char *s,
*end = src + len;
*end = src + srclen;
int pos = 2;
uint32 buf = 0;
......@@ -213,6 +195,8 @@ pg_base64_encode(const char *src, size_t len, char *dst)
/* write it out */
if (pos < 0)
{
if ((p - dst + 4) > dstlen)
elog(ERROR, "overflow of destination buffer in base64 encoding");
*p++ = _base64[(buf >> 18) & 0x3f];
*p++ = _base64[(buf >> 12) & 0x3f];
*p++ = _base64[(buf >> 6) & 0x3f];
......@@ -223,25 +207,30 @@ pg_base64_encode(const char *src, size_t len, char *dst)
}
if (p >= lend)
{
if ((p - dst + 1) > dstlen)
elog(ERROR, "overflow of destination buffer in base64 encoding");
*p++ = '\n';
lend = p + 76;
}
}
if (pos != 2)
{
if ((p - dst + 4) > dstlen)
elog(ERROR, "overflow of destination buffer in base64 encoding");
*p++ = _base64[(buf >> 18) & 0x3f];
*p++ = _base64[(buf >> 12) & 0x3f];
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
*p++ = '=';
}
Assert((p - dst) <= dstlen);
return p - dst;
}
static uint64
pg_base64_decode(const char *src, size_t len, char *dst)
pg_base64_decode(const char *src, size_t srclen, char *dst, size_t dstlen)
{
const char *srcend = src + len,
const char *srcend = src + srclen,
*s = src;
char *p = dst;
char c;
......@@ -289,11 +278,21 @@ pg_base64_decode(const char *src, size_t len, char *dst)
pos++;
if (pos == 4)
{
if ((p - dst + 1) > dstlen)
elog(ERROR, "overflow of destination buffer in base64 decoding");
*p++ = (buf >> 16) & 255;
if (end == 0 || end > 1)
{
if ((p - dst + 1) > dstlen)
elog(ERROR, "overflow of destination buffer in base64 decoding");
*p++ = (buf >> 8) & 255;
}
if (end == 0 || end > 2)
{
if ((p - dst + 1) > dstlen)
elog(ERROR, "overflow of destination buffer in base64 decoding");
*p++ = buf & 255;
}
buf = 0;
pos = 0;
}
......@@ -305,6 +304,7 @@ pg_base64_decode(const char *src, size_t len, char *dst)
errmsg("invalid base64 end sequence"),
errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
Assert((p - dst) <= dstlen);
return p - dst;
}
......@@ -340,7 +340,7 @@ pg_base64_dec_len(const char *src, size_t srclen)
#define DIG(VAL) ((VAL) + '0')
static uint64
esc_encode(const char *src, size_t srclen, char *dst)
esc_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
{
const char *end = src + srclen;
char *rp = dst;
......@@ -352,6 +352,8 @@ esc_encode(const char *src, size_t srclen, char *dst)
if (c == '\0' || IS_HIGHBIT_SET(c))
{
if ((rp - dst + 4) > dstlen)
elog(ERROR, "overflow of destination buffer in escape encoding");
rp[0] = '\\';
rp[1] = DIG(c >> 6);
rp[2] = DIG((c >> 3) & 7);
......@@ -361,6 +363,8 @@ esc_encode(const char *src, size_t srclen, char *dst)
}
else if (c == '\\')
{
if ((rp - dst + 2) > dstlen)
elog(ERROR, "overflow of destination buffer in escape encoding");
rp[0] = '\\';
rp[1] = '\\';
rp += 2;
......@@ -368,6 +372,8 @@ esc_encode(const char *src, size_t srclen, char *dst)
}
else
{
if ((rp - dst + 1) > dstlen)
elog(ERROR, "overflow of destination buffer in escape encoding");
*rp++ = c;
len++;
}
......@@ -375,11 +381,12 @@ esc_encode(const char *src, size_t srclen, char *dst)
src++;
}
Assert((rp - dst) <= dstlen);
return len;
}
static uint64
esc_decode(const char *src, size_t srclen, char *dst)
esc_decode(const char *src, size_t srclen, char *dst, size_t dstlen)
{
const char *end = src + srclen;
char *rp = dst;
......@@ -388,7 +395,11 @@ esc_decode(const char *src, size_t srclen, char *dst)
while (src < end)
{
if (src[0] != '\\')
{
if ((rp - dst + 1) > dstlen)
elog(ERROR, "overflow of destination buffer in escape decoding");
*rp++ = *src++;
}
else if (src + 3 < end &&
(src[1] >= '0' && src[1] <= '3') &&
(src[2] >= '0' && src[2] <= '7') &&
......@@ -400,12 +411,16 @@ esc_decode(const char *src, size_t srclen, char *dst)
val <<= 3;
val += VAL(src[2]);
val <<= 3;
if ((rp - dst + 1) > dstlen)
elog(ERROR, "overflow of destination buffer in escape decoding");
*rp++ = val + VAL(src[3]);
src += 4;
}
else if (src + 1 < end &&
(src[1] == '\\'))
{
if ((rp - dst + 1) > dstlen)
elog(ERROR, "overflow of destination buffer in escape decoding");
*rp++ = '\\';
src += 2;
}
......@@ -423,6 +438,7 @@ esc_decode(const char *src, size_t srclen, char *dst)
len++;
}
Assert((rp - dst) <= dstlen);
return len;
}
......@@ -504,7 +520,7 @@ static const struct
{
"hex",
{
hex_enc_len, hex_dec_len, hex_encode, hex_decode
hex_enc_len, hex_dec_len, pg_hex_encode, pg_hex_decode
}
},
{
......
......@@ -21,8 +21,8 @@
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
#include "common/hex.h"
#include "common/int.h"
#include "common/hex_decode.h"
#include "common/unicode_norm.h"
#include "lib/hyperloglog.h"
#include "libpq/pqformat.h"
......@@ -304,10 +304,12 @@ byteain(PG_FUNCTION_ARGS)
if (inputText[0] == '\\' && inputText[1] == 'x')
{
size_t len = strlen(inputText);
uint64 dstlen = pg_hex_dec_len(len - 2);
bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
bc = dstlen + VARHDRSZ; /* maximum possible length */
result = palloc(bc);
bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
bc = pg_hex_decode(inputText + 2, len - 2, VARDATA(result), dstlen);
SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
PG_RETURN_BYTEA_P(result);
......@@ -396,11 +398,15 @@ byteaout(PG_FUNCTION_ARGS)
if (bytea_output == BYTEA_OUTPUT_HEX)
{
uint64 dstlen = pg_hex_enc_len(VARSIZE_ANY_EXHDR(vlena));
/* Print hex format */
rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
rp = result = palloc(dstlen + 2 + 1);
*rp++ = '\\';
*rp++ = 'x';
rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
rp += pg_hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp,
dstlen);
}
else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
{
......
......@@ -58,7 +58,7 @@ OBJS_COMMON = \
file_perm.o \
file_utils.o \
hashfn.o \
hex_decode.o \
hex.o \
ip.o \
jsonapi.o \
keywords.o \
......
/*-------------------------------------------------------------------------
*
* hex_decode.c
* hex decoding
* hex.c
* Encoding and decoding routines for hex.
*
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/common/hex_decode.c
* src/common/hex.c
*
*-------------------------------------------------------------------------
*/
......@@ -21,12 +19,11 @@
#include "postgres_fe.h"
#endif
#include "common/hex.h"
#ifdef FRONTEND
#include "common/logging.h"
#else
#include "mb/pg_wchar.h"
#endif
#include "common/hex_decode.h"
#include "mb/pg_wchar.h"
static const int8 hexlookup[128] = {
......@@ -40,6 +37,8 @@ static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static const char hextbl[] = "0123456789abcdef";
static inline char
get_hex(const char *cp)
{
......@@ -65,8 +64,52 @@ get_hex(const char *cp)
return (char) res;
}
/*
* pg_hex_encode
*
* Encode into hex the given string. Returns the length of the encoded
* string.
*/
uint64
pg_hex_encode(const char *src, size_t srclen, char *dst, size_t dstlen)
{
const char *end = src + srclen;
char *p;
p = dst;
while (src < end)
{
/*
* Leave if there is an overflow in the area allocated for the encoded
* string.
*/
if ((p - dst + 2) > dstlen)
{
#ifdef FRONTEND
pg_log_fatal("overflow of destination buffer in hex encoding");
exit(EXIT_FAILURE);
#else
elog(ERROR, "overflow of destination buffer in hex encoding");
#endif
}
*p++ = hextbl[(*src >> 4) & 0xF];
*p++ = hextbl[*src & 0xF];
src++;
}
Assert((p - dst) <= dstlen);
return p - dst;
}
/*
* pg_hex_decode
*
* Decode the given hex string. Returns the length of the decoded string.
*/
uint64
hex_decode(const char *src, size_t len, char *dst)
pg_hex_decode(const char *src, size_t srclen, char *dst, size_t dstlen)
{
const char *s,
*srcend;
......@@ -74,7 +117,7 @@ hex_decode(const char *src, size_t len, char *dst)
v2,
*p;
srcend = src + len;
srcend = src + srclen;
s = src;
p = dst;
while (s < srcend)
......@@ -86,6 +129,7 @@ hex_decode(const char *src, size_t len, char *dst)
}
v1 = get_hex(s) << 4;
s++;
if (s >= srcend)
{
#ifdef FRONTEND
......@@ -97,10 +141,52 @@ hex_decode(const char *src, size_t len, char *dst)
errmsg("invalid hexadecimal data: odd number of digits")));
#endif
}
v2 = get_hex(s);
s++;
/* overflow check */
if ((p - dst + 1) > dstlen)
{
#ifdef FRONTEND
pg_log_fatal("overflow of destination buffer in hex decoding");
exit(EXIT_FAILURE);
#else
elog(ERROR, "overflow of destination buffer in hex decoding");
#endif
}
*p++ = v1 | v2;
}
Assert((p - dst) <= dstlen);
return p - dst;
}
/*
* pg_hex_enc_len
*
* Returns to caller the length of the string if it were encoded with
* hex based on the length provided by caller. This is useful to estimate
* how large a buffer allocation needs to be done before doing the actual
* encoding.
*/
uint64
pg_hex_enc_len(size_t srclen)
{
return (uint64) srclen << 1;
}
/*
* pg_hex_dec_len
*
* Returns to caller the length of the string if it were to be decoded
* with hex, based on the length given by caller. This is useful to
* estimate how large a buffer allocation needs to be done before doing
* the actual decoding.
*/
uint64
pg_hex_dec_len(size_t srclen)
{
return (uint64) srclen >> 1;
}
/*------------------------------------------------------------------------
*
* hex.h
* Encoding and decoding routines for hex strings.
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/include/common/hex.h
*
*------------------------------------------------------------------------
*/
#ifndef COMMON_HEX_H
#define COMMON_HEX_H
extern uint64 pg_hex_decode(const char *src, size_t srclen,
char *dst, size_t dstlen);
extern uint64 pg_hex_encode(const char *src, size_t srclen,
char *dst, size_t dstlen);
extern uint64 pg_hex_enc_len(size_t srclen);
extern uint64 pg_hex_dec_len(size_t srclen);
#endif /* COMMON_HEX_H */
/*
* hex_decode.h
* hex decoding
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/common/hex_decode.h
*/
#ifndef COMMON_HEX_DECODE_H
#define COMMON_HEX_DECODE_H
extern uint64 hex_decode(const char *src, size_t len, char *dst);
#endif /* COMMON_HEX_DECODE_H */
......@@ -31,9 +31,6 @@ extern void domain_check(Datum value, bool isnull, Oid domainType,
extern int errdatatype(Oid datatypeOid);
extern int errdomainconstraint(Oid datatypeOid, const char *conname);
/* encode.c */
extern uint64 hex_encode(const char *src, size_t len, char *dst);
/* int.c */
extern int2vector *buildint2vector(const int16 *int2s, int n);
......
......@@ -121,7 +121,7 @@ sub mkvcbuild
our @pgcommonallfiles = qw(
archive.c base64.c checksum_helper.c
config_info.c controldata_utils.c d2s.c encnames.c exec.c
f2s.c file_perm.c file_utils.c hashfn.c hex_decode.c ip.c jsonapi.c
f2s.c file_perm.c file_utils.c hashfn.c hex.c ip.c jsonapi.c
keywords.c kwlookup.c link-canary.c md5_common.c
pg_get_line.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment