Commit d6775504 authored by Tom Lane's avatar Tom Lane

Allow to_date/to_timestamp to recognize non-English month/day names.

to_char() has long allowed the TM (translation mode) prefix to
specify output of translated month or day names; but that prefix
had no effect in input format strings.  Now it does.  to_date()
and to_timestamp() will now recognize the same month or day names
that to_char() would output for the same format code.  Matching is
case-insensitive (per the active collation's notion of what that
means), just as it has always been for English month/day names
without the TM prefix.

(As per the discussion thread, there are lots of cases that this
feature will not handle, such as alternate day names.  But being
able to accept what to_char() will output seems useful enough.)

In passing, fix some shaky English and violations of message
style guidelines in jsonpath errors for the .datetime() method,
which depends on this code.

Juan José Santamaría Flecha, reviewed and modified by me,
with other commentary from Alvaro Herrera, Tomas Vondra,
Arthur Zakirov, Peter Eisentraut, Mark Dilger.

Discussion: https://postgr.es/m/CAC+AXB3u1jTngJcoC1nAHBf=M3v-jrEfo86UFtCqCjzbWS9QhA@mail.gmail.com
parent 1810ca18
......@@ -5968,7 +5968,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
</row>
<row>
<entry><literal>TM</literal> prefix</entry>
<entry>translation mode (print localized day and month names based on
<entry>translation mode (use localized day and month names based on
<xref linkend="guc-lc-time"/>)</entry>
<entry><literal>TMMonth</literal></entry>
</row>
......@@ -5999,9 +5999,20 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
<listitem>
<para>
<literal>TM</literal> does not include trailing blanks.
<function>to_timestamp</function> and <function>to_date</function> ignore
the <literal>TM</literal> modifier.
<literal>TM</literal> suppresses trailing blanks whether or
not <literal>FM</literal> is specified.
</para>
</listitem>
<listitem>
<para>
<function>to_timestamp</function> and <function>to_date</function>
ignore letter case in the input; so for
example <literal>MON</literal>, <literal>Mon</literal>,
and <literal>mon</literal> all accept the same strings. When using
the <literal>TM</literal> modifier, case-folding is done according to
the rules of the function's input collation (see
<xref linkend="collation"/>).
</para>
</listitem>
......@@ -12824,12 +12835,12 @@ table2-mapping
<para>
The <literal>datetime()</literal> and
<literal>datetime(<replaceable>template</replaceable>)</literal> methods
use the same parsing rules as <literal>to_timestamp</literal> SQL
function does (see <xref linkend="functions-formatting"/>) with three
exceptions. At first, these methods doesn't allow unmatched template
patterns. At second, only following separators are allowed in the
template string: minus sign, period, solidus, comma, apostrophe,
semicolon, colon and space. At third, separators in the template string
use the same parsing rules as the <literal>to_timestamp</literal> SQL
function does (see <xref linkend="functions-formatting"/>), with three
exceptions. First, these methods don't allow unmatched template
patterns. Second, only the following separators are allowed in the
template string: minus sign, period, solidus (slash), comma, apostrophe,
semicolon, colon and space. Third, separators in the template string
must exactly match the input string.
</para>
</note>
......
This diff is collapsed.
......@@ -1781,6 +1781,7 @@ executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
JsonbValue jbvbuf;
Datum value;
text *datetime;
Oid collid;
Oid typid;
int32 typmod = -1;
int tz = 0;
......@@ -1797,6 +1798,13 @@ executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
datetime = cstring_to_text_with_len(jb->val.string.val,
jb->val.string.len);
/*
* At some point we might wish to have callers supply the collation to
* use, but right now it's unclear that they'd be able to do better than
* DEFAULT_COLLATION_OID anyway.
*/
collid = DEFAULT_COLLATION_OID;
if (jsp->content.arg)
{
text *template;
......@@ -1814,7 +1822,7 @@ executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
template = cstring_to_text_with_len(template_str,
template_len);
value = parse_datetime(datetime, template, true,
value = parse_datetime(datetime, template, collid, true,
&typid, &typmod, &tz,
jspThrowErrors(cxt) ? NULL : &have_error);
......@@ -1858,7 +1866,7 @@ executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
MemoryContextSwitchTo(oldcxt);
}
value = parse_datetime(datetime, fmt_txt[i], true,
value = parse_datetime(datetime, fmt_txt[i], collid, true,
&typid, &typmod, &tz,
&have_error);
......@@ -1872,8 +1880,9 @@ executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
if (res == jperNotFound)
RETURN_ERROR(ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_JSON_DATETIME_FUNCTION),
errmsg("datetime format is not unrecognized"),
errhint("use datetime template argument for explicit format specification"))));
errmsg("datetime format is not recognized: \"%s\"",
text_to_cstring(datetime)),
errhint("Use a datetime template argument to specify the input data format."))));
}
pfree(datetime);
......
......@@ -96,11 +96,17 @@ char *locale_monetary;
char *locale_numeric;
char *locale_time;
/* lc_time localization cache */
char *localized_abbrev_days[7];
char *localized_full_days[7];
char *localized_abbrev_months[12];
char *localized_full_months[12];
/*
* lc_time localization cache.
*
* We use only the first 7 or 12 entries of these arrays. The last array
* element is left as NULL for the convenience of outside code that wants
* to sequentially scan these arrays.
*/
char *localized_abbrev_days[7 + 1];
char *localized_full_days[7 + 1];
char *localized_abbrev_months[12 + 1];
char *localized_full_months[12 + 1];
/* indicates whether locale information cache is valid */
static bool CurrentLocaleConvValid = false;
......@@ -922,6 +928,8 @@ cache_locale_time(void)
cache_single_string(&localized_full_days[i], bufptr, encoding);
bufptr += MAX_L10N_DATA;
}
localized_abbrev_days[7] = NULL;
localized_full_days[7] = NULL;
/* localized months */
for (i = 0; i < 12; i++)
......@@ -931,6 +939,8 @@ cache_locale_time(void)
cache_single_string(&localized_full_months[i], bufptr, encoding);
bufptr += MAX_L10N_DATA;
}
localized_abbrev_months[12] = NULL;
localized_full_months[12] = NULL;
CurrentLCTimeValid = true;
}
......
......@@ -26,7 +26,7 @@ extern char *asc_tolower(const char *buff, size_t nbytes);
extern char *asc_toupper(const char *buff, size_t nbytes);
extern char *asc_initcap(const char *buff, size_t nbytes);
extern Datum parse_datetime(text *date_txt, text *fmt, bool std,
extern Datum parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
Oid *typid, int32 *typmod, int *tz,
bool *have_error);
......
......@@ -461,6 +461,22 @@ SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
01 NİS 2010
(1 row)
-- to_date
SELECT to_date('01 ŞUB 2010', 'DD TMMON YYYY');
to_date
------------
02-01-2010
(1 row)
SELECT to_date('01 Şub 2010', 'DD TMMON YYYY');
to_date
------------
02-01-2010
(1 row)
SELECT to_date('1234567890ab 2010', 'TMMONTH YYYY'); -- fail
ERROR: invalid value "1234567890ab" for "MONTH"
DETAIL: The given value did not match any of the allowed values for this field.
-- backwards parsing
CREATE VIEW collview1 AS SELECT * FROM collate_test1 WHERE b COLLATE "C" >= 'bbc';
CREATE VIEW collview2 AS SELECT a, b FROM collate_test1 ORDER BY b COLLATE "C";
......
......@@ -1674,9 +1674,9 @@ select jsonb_path_query('[]', 'strict $.datetime()');
ERROR: jsonpath item method .datetime() can only be applied to a string
select jsonb_path_query('{}', '$.datetime()');
ERROR: jsonpath item method .datetime() can only be applied to a string
select jsonb_path_query('""', '$.datetime()');
ERROR: datetime format is not unrecognized
HINT: use datetime template argument for explicit format specification
select jsonb_path_query('"bogus"', '$.datetime()');
ERROR: datetime format is not recognized: "bogus"
HINT: Use a datetime template argument to specify the input data format.
select jsonb_path_query('"12:34"', '$.datetime("aaa")');
ERROR: invalid datetime format separator: "a"
select jsonb_path_query('"aaaa"', '$.datetime("HH24")');
......
......@@ -182,6 +182,12 @@ SELECT to_char(date '2010-02-01', 'DD TMMON YYYY' COLLATE "tr_TR");
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY');
SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");
-- to_date
SELECT to_date('01 ŞUB 2010', 'DD TMMON YYYY');
SELECT to_date('01 Şub 2010', 'DD TMMON YYYY');
SELECT to_date('1234567890ab 2010', 'TMMONTH YYYY'); -- fail
-- backwards parsing
......
......@@ -352,7 +352,7 @@ select jsonb_path_query('1', '$.datetime()');
select jsonb_path_query('[]', '$.datetime()');
select jsonb_path_query('[]', 'strict $.datetime()');
select jsonb_path_query('{}', '$.datetime()');
select jsonb_path_query('""', '$.datetime()');
select jsonb_path_query('"bogus"', '$.datetime()');
select jsonb_path_query('"12:34"', '$.datetime("aaa")');
select jsonb_path_query('"aaaa"', '$.datetime("HH24")');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment