Commit 1a950f37 authored by Alexander Korotkov's avatar Alexander Korotkov

Implement standard datetime parsing mode

SQL Standard 2016 defines rules for handling separators in datetime template
strings, which are different to to_date()/to_timestamp() rules.  Standard
allows only small set of separators and requires strict matching for them.

Standard applies to jsonpath .datetime() method and CAST (... FORMAT ...) SQL
clause.  We're not going to change handling of separators in existing
to_date()/to_timestamp() functions, because their current behavior is familiar
for users.  Standard behavior now available by special flag, which will be used
in upcoming .datetime() jsonpath method.

Discussion: https://postgr.es/m/CAPpHfdsZgYEra_PeCLGNoXOWYx6iU-S3wF8aX0ObQUcZU%2B4XTw%40mail.gmail.com
Author: Alexander Korotkov
parent bd29cc19
...@@ -99,11 +99,12 @@ ...@@ -99,11 +99,12 @@
#include "utils/pg_locale.h" #include "utils/pg_locale.h"
/* ---------- /* ----------
* Routines type * Routines flags
* ---------- * ----------
*/ */
#define DCH_TYPE 1 /* DATE-TIME version */ #define DCH_FLAG 0x1 /* DATE-TIME flag */
#define NUM_TYPE 2 /* NUMBER version */ #define NUM_FLAG 0x2 /* NUMBER flag */
#define STD_FLAG 0x4 /* STANDARD flag */
/* ---------- /* ----------
* KeyWord Index (ascii from position 32 (' ') to 126 (~)) * KeyWord Index (ascii from position 32 (' ') to 126 (~))
...@@ -384,6 +385,7 @@ typedef struct ...@@ -384,6 +385,7 @@ typedef struct
{ {
FormatNode format[DCH_CACHE_SIZE + 1]; FormatNode format[DCH_CACHE_SIZE + 1];
char str[DCH_CACHE_SIZE + 1]; char str[DCH_CACHE_SIZE + 1];
bool std;
bool valid; bool valid;
int age; int age;
} DCHCacheEntry; } DCHCacheEntry;
...@@ -1000,11 +1002,12 @@ static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int t ...@@ -1000,11 +1002,12 @@ static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int t
static bool is_separator_char(const char *str); static bool is_separator_char(const char *str);
static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
const KeySuffix *suf, const int *index, int ver, NUMDesc *Num); const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
static void DCH_to_char(FormatNode *node, bool is_interval, static void DCH_to_char(FormatNode *node, bool is_interval,
TmToChar *in, char *out, Oid collid); TmToChar *in, char *out, Oid collid);
static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out); static void DCH_from_char(FormatNode *node, char *in, TmFromChar *out,
bool std);
#ifdef DEBUG_TO_FROM_CHAR #ifdef DEBUG_TO_FROM_CHAR
static void dump_index(const KeyWord *k, const int *index); static void dump_index(const KeyWord *k, const int *index);
...@@ -1021,7 +1024,7 @@ static int from_char_parse_int_len(int *dest, char **src, const int len, FormatN ...@@ -1021,7 +1024,7 @@ static int from_char_parse_int_len(int *dest, char **src, const int len, FormatN
static int from_char_parse_int(int *dest, char **src, FormatNode *node); static int from_char_parse_int(int *dest, char **src, FormatNode *node);
static int seq_search(char *name, const char *const *array, int type, int max, int *len); static int seq_search(char *name, const char *const *array, int type, int max, int *len);
static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node); static int from_char_seq_search(int *dest, char **src, const char *const *array, int type, int max, FormatNode *node);
static void do_to_timestamp(text *date_txt, text *fmt, static void do_to_timestamp(text *date_txt, text *fmt, bool std,
struct pg_tm *tm, fsec_t *fsec, int *fprec); struct pg_tm *tm, fsec_t *fsec, int *fprec);
static char *fill_str(char *str, int c, int max); static char *fill_str(char *str, int c, int max);
static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
...@@ -1033,9 +1036,9 @@ static void NUM_numpart_to_char(NUMProc *Np, int id); ...@@ -1033,9 +1036,9 @@ static void NUM_numpart_to_char(NUMProc *Np, int id);
static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
char *number, int input_len, int to_char_out_pre_spaces, char *number, int input_len, int to_char_out_pre_spaces,
int sign, bool is_to_char, Oid collid); int sign, bool is_to_char, Oid collid);
static DCHCacheEntry *DCH_cache_getnew(const char *str); static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
static DCHCacheEntry *DCH_cache_search(const char *str); static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
static DCHCacheEntry *DCH_cache_fetch(const char *str); static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
static NUMCacheEntry *NUM_cache_getnew(const char *str); static NUMCacheEntry *NUM_cache_getnew(const char *str);
static NUMCacheEntry *NUM_cache_search(const char *str); static NUMCacheEntry *NUM_cache_search(const char *str);
static NUMCacheEntry *NUM_cache_fetch(const char *str); static NUMCacheEntry *NUM_cache_fetch(const char *str);
...@@ -1278,7 +1281,7 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n) ...@@ -1278,7 +1281,7 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n)
*/ */
static void static void
parse_format(FormatNode *node, const char *str, const KeyWord *kw, parse_format(FormatNode *node, const char *str, const KeyWord *kw,
const KeySuffix *suf, const int *index, int ver, NUMDesc *Num) const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
{ {
FormatNode *n; FormatNode *n;
...@@ -1296,7 +1299,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, ...@@ -1296,7 +1299,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
/* /*
* Prefix * Prefix
*/ */
if (ver == DCH_TYPE && if ((flags & DCH_FLAG) &&
(s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL) (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
{ {
suffix |= s->id; suffix |= s->id;
...@@ -1317,13 +1320,13 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, ...@@ -1317,13 +1320,13 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
/* /*
* NUM version: Prepare global NUMDesc struct * NUM version: Prepare global NUMDesc struct
*/ */
if (ver == NUM_TYPE) if (flags & NUM_FLAG)
NUMDesc_prepare(Num, n); NUMDesc_prepare(Num, n);
/* /*
* Postfix * Postfix
*/ */
if (ver == DCH_TYPE && *str && if ((flags & DCH_FLAG) && *str &&
(s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL) (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
{ {
n->suffix |= s->id; n->suffix |= s->id;
...@@ -1337,11 +1340,34 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, ...@@ -1337,11 +1340,34 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
{ {
int chlen; int chlen;
/* if (flags & STD_FLAG)
* Process double-quoted literal string, if any {
*/ /*
if (*str == '"') * Standard mode, allow only following separators: "-./,':; "
*/
if (strchr("-./,':; ", *str) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid datetime format separator: \"%s\"",
pnstrdup(str, pg_mblen(str)))));
if (*str == ' ')
n->type = NODE_TYPE_SPACE;
else
n->type = NODE_TYPE_SEPARATOR;
n->character[0] = *str;
n->character[1] = '\0';
n->key = NULL;
n->suffix = 0;
n++;
str++;
}
else if (*str == '"')
{ {
/*
* Process double-quoted literal string, if any
*/
str++; str++;
while (*str) while (*str)
{ {
...@@ -1373,7 +1399,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, ...@@ -1373,7 +1399,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
str++; str++;
chlen = pg_mblen(str); chlen = pg_mblen(str);
if (ver == DCH_TYPE && is_separator_char(str)) if ((flags & DCH_FLAG) && is_separator_char(str))
n->type = NODE_TYPE_SEPARATOR; n->type = NODE_TYPE_SEPARATOR;
else if (isspace((unsigned char) *str)) else if (isspace((unsigned char) *str))
n->type = NODE_TYPE_SPACE; n->type = NODE_TYPE_SPACE;
...@@ -3060,13 +3086,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col ...@@ -3060,13 +3086,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
* ---------- * ----------
*/ */
static void static void
DCH_from_char(FormatNode *node, char *in, TmFromChar *out) DCH_from_char(FormatNode *node, char *in, TmFromChar *out, bool std)
{ {
FormatNode *n; FormatNode *n;
char *s; char *s;
int len, int len,
value; value;
bool fx_mode = false; bool fx_mode = std;
/* number of extra skipped characters (more than given in format string) */ /* number of extra skipped characters (more than given in format string) */
int extra_skip = 0; int extra_skip = 0;
...@@ -3089,7 +3115,23 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) ...@@ -3089,7 +3115,23 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
{ {
if (!fx_mode) if (std)
{
/*
* Standard mode requires strict matching between format
* string separators/spaces and input string.
*/
Assert(n->character[0] && !n->character[1]);
if (*s == n->character[0])
s++;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("unmatched format separator \"%c\"",
n->character[0])));
}
else if (!fx_mode)
{ {
/* /*
* In non FX (fixed format) mode one format string space or * In non FX (fixed format) mode one format string space or
...@@ -3434,6 +3476,27 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) ...@@ -3434,6 +3476,27 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
} }
} }
} }
/*
* Standard parsing mode doesn't allow unmatched format patterns or
* trailing characters in the input string.
*/
if (std)
{
if (n->type != NODE_TYPE_END)
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("input string is too short for datetime format")));
while (*s != '\0' && isspace((unsigned char) *s))
s++;
if (*s != '\0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("trailing characters remain in input string after "
"datetime format")));
}
} }
/* /*
...@@ -3456,7 +3519,7 @@ DCH_prevent_counter_overflow(void) ...@@ -3456,7 +3519,7 @@ DCH_prevent_counter_overflow(void)
/* select a DCHCacheEntry to hold the given format picture */ /* select a DCHCacheEntry to hold the given format picture */
static DCHCacheEntry * static DCHCacheEntry *
DCH_cache_getnew(const char *str) DCH_cache_getnew(const char *str, bool std)
{ {
DCHCacheEntry *ent; DCHCacheEntry *ent;
...@@ -3506,6 +3569,7 @@ DCH_cache_getnew(const char *str) ...@@ -3506,6 +3569,7 @@ DCH_cache_getnew(const char *str)
MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry)); MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
ent->valid = false; ent->valid = false;
StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1); StrNCpy(ent->str, str, DCH_CACHE_SIZE + 1);
ent->std = std;
ent->age = (++DCHCounter); ent->age = (++DCHCounter);
/* caller is expected to fill format, then set valid */ /* caller is expected to fill format, then set valid */
++n_DCHCache; ++n_DCHCache;
...@@ -3515,7 +3579,7 @@ DCH_cache_getnew(const char *str) ...@@ -3515,7 +3579,7 @@ DCH_cache_getnew(const char *str)
/* look for an existing DCHCacheEntry matching the given format picture */ /* look for an existing DCHCacheEntry matching the given format picture */
static DCHCacheEntry * static DCHCacheEntry *
DCH_cache_search(const char *str) DCH_cache_search(const char *str, bool std)
{ {
/* Ensure we can advance DCHCounter below */ /* Ensure we can advance DCHCounter below */
DCH_prevent_counter_overflow(); DCH_prevent_counter_overflow();
...@@ -3524,7 +3588,7 @@ DCH_cache_search(const char *str) ...@@ -3524,7 +3588,7 @@ DCH_cache_search(const char *str)
{ {
DCHCacheEntry *ent = DCHCache[i]; DCHCacheEntry *ent = DCHCache[i];
if (ent->valid && strcmp(ent->str, str) == 0) if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
{ {
ent->age = (++DCHCounter); ent->age = (++DCHCounter);
return ent; return ent;
...@@ -3536,21 +3600,21 @@ DCH_cache_search(const char *str) ...@@ -3536,21 +3600,21 @@ DCH_cache_search(const char *str)
/* Find or create a DCHCacheEntry for the given format picture */ /* Find or create a DCHCacheEntry for the given format picture */
static DCHCacheEntry * static DCHCacheEntry *
DCH_cache_fetch(const char *str) DCH_cache_fetch(const char *str, bool std)
{ {
DCHCacheEntry *ent; DCHCacheEntry *ent;
if ((ent = DCH_cache_search(str)) == NULL) if ((ent = DCH_cache_search(str, std)) == NULL)
{ {
/* /*
* Not in the cache, must run parser and save a new format-picture to * Not in the cache, must run parser and save a new format-picture to
* the cache. Do not mark the cache entry valid until parsing * the cache. Do not mark the cache entry valid until parsing
* succeeds. * succeeds.
*/ */
ent = DCH_cache_getnew(str); ent = DCH_cache_getnew(str, std);
parse_format(ent->format, str, DCH_keywords, parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
DCH_suff, DCH_index, DCH_TYPE, NULL); DCH_FLAG | (std ? STD_FLAG : 0), NULL);
ent->valid = true; ent->valid = true;
} }
...@@ -3595,14 +3659,14 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid) ...@@ -3595,14 +3659,14 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
parse_format(format, fmt_str, DCH_keywords, parse_format(format, fmt_str, DCH_keywords,
DCH_suff, DCH_index, DCH_TYPE, NULL); DCH_suff, DCH_index, DCH_FLAG, NULL);
} }
else else
{ {
/* /*
* Use cache buffers * Use cache buffers
*/ */
DCHCacheEntry *ent = DCH_cache_fetch(fmt_str); DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
incache = true; incache = true;
format = ent->format; format = ent->format;
...@@ -3744,7 +3808,7 @@ to_timestamp(PG_FUNCTION_ARGS) ...@@ -3744,7 +3808,7 @@ to_timestamp(PG_FUNCTION_ARGS)
fsec_t fsec; fsec_t fsec;
int fprec; int fprec;
do_to_timestamp(date_txt, fmt, &tm, &fsec, &fprec); do_to_timestamp(date_txt, fmt, false, &tm, &fsec, &fprec);
/* Use the specified time zone, if any. */ /* Use the specified time zone, if any. */
if (tm.tm_zone) if (tm.tm_zone)
...@@ -3783,7 +3847,7 @@ to_date(PG_FUNCTION_ARGS) ...@@ -3783,7 +3847,7 @@ to_date(PG_FUNCTION_ARGS)
struct pg_tm tm; struct pg_tm tm;
fsec_t fsec; fsec_t fsec;
do_to_timestamp(date_txt, fmt, &tm, &fsec, NULL); do_to_timestamp(date_txt, fmt, false, &tm, &fsec, NULL);
/* Prevent overflow in Julian-day routines */ /* Prevent overflow in Julian-day routines */
if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday)) if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
...@@ -3818,7 +3882,7 @@ to_date(PG_FUNCTION_ARGS) ...@@ -3818,7 +3882,7 @@ to_date(PG_FUNCTION_ARGS)
* struct 'tm' and 'fsec'. * struct 'tm' and 'fsec'.
*/ */
static void static void
do_to_timestamp(text *date_txt, text *fmt, do_to_timestamp(text *date_txt, text *fmt, bool std,
struct pg_tm *tm, fsec_t *fsec, int *fprec) struct pg_tm *tm, fsec_t *fsec, int *fprec)
{ {
FormatNode *format; FormatNode *format;
...@@ -3853,15 +3917,15 @@ do_to_timestamp(text *date_txt, text *fmt, ...@@ -3853,15 +3917,15 @@ do_to_timestamp(text *date_txt, text *fmt,
format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode)); format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
parse_format(format, fmt_str, DCH_keywords, parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
DCH_suff, DCH_index, DCH_TYPE, NULL); DCH_FLAG | (std ? STD_FLAG : 0), NULL);
} }
else else
{ {
/* /*
* Use cache buffers * Use cache buffers
*/ */
DCHCacheEntry *ent = DCH_cache_fetch(fmt_str); DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
incache = true; incache = true;
format = ent->format; format = ent->format;
...@@ -3872,7 +3936,7 @@ do_to_timestamp(text *date_txt, text *fmt, ...@@ -3872,7 +3936,7 @@ do_to_timestamp(text *date_txt, text *fmt,
/* dump_index(DCH_keywords, DCH_index); */ /* dump_index(DCH_keywords, DCH_index); */
#endif #endif
DCH_from_char(format, date_str, &tmfc); DCH_from_char(format, date_str, &tmfc, std);
pfree(fmt_str); pfree(fmt_str);
...@@ -4241,7 +4305,7 @@ NUM_cache_fetch(const char *str) ...@@ -4241,7 +4305,7 @@ NUM_cache_fetch(const char *str)
zeroize_NUM(&ent->Num); zeroize_NUM(&ent->Num);
parse_format(ent->format, str, NUM_keywords, parse_format(ent->format, str, NUM_keywords,
NULL, NUM_index, NUM_TYPE, &ent->Num); NULL, NUM_index, NUM_FLAG, &ent->Num);
ent->valid = true; ent->valid = true;
} }
...@@ -4273,7 +4337,7 @@ NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree) ...@@ -4273,7 +4337,7 @@ NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
zeroize_NUM(Num); zeroize_NUM(Num);
parse_format(format, str, NUM_keywords, parse_format(format, str, NUM_keywords,
NULL, NUM_index, NUM_TYPE, Num); NULL, NUM_index, NUM_FLAG, Num);
} }
else else
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment