Commit 1f3a0217 authored by Robert Haas's avatar Robert Haas

Adjust pg_parse_json() so that it does not directly ereport().

Instead, it now returns a value indicating either success or the
type of error which occurred. The old behavior is still available
by calling pg_parse_json_or_ereport(). If the new interface is
used, an error can be thrown by passing the return value of
pg_parse_json() to json_ereport_error().

pg_parse_json() can still elog() in can't-happen cases, but it
seems like that issue is best handled separately.

Adjust json_lex() and json_count_array_elements() to return an
error code, too.

This is all in preparation for making the backend's json parser
available to frontend code.

Reviewed and/or tested by Mark Dilger and Andrew Dunstan.

Discussion: http://postgr.es/m/CA+TgmoYfOXhd27MUDGioVh6QtpD0C1K-f6ObSA10AWiHBAL5bA@mail.gmail.com
parent 3e4818e9
...@@ -81,7 +81,7 @@ json_in(PG_FUNCTION_ARGS) ...@@ -81,7 +81,7 @@ json_in(PG_FUNCTION_ARGS)
/* validate it */ /* validate it */
lex = makeJsonLexContext(result, false); lex = makeJsonLexContext(result, false);
pg_parse_json(lex, &nullSemAction); pg_parse_json_or_ereport(lex, &nullSemAction);
/* Internal representation is the same as text, for now */ /* Internal representation is the same as text, for now */
PG_RETURN_TEXT_P(result); PG_RETURN_TEXT_P(result);
...@@ -128,7 +128,7 @@ json_recv(PG_FUNCTION_ARGS) ...@@ -128,7 +128,7 @@ json_recv(PG_FUNCTION_ARGS)
/* Validate it. */ /* Validate it. */
lex = makeJsonLexContextCstringLen(str, nbytes, false); lex = makeJsonLexContextCstringLen(str, nbytes, false);
pg_parse_json(lex, &nullSemAction); pg_parse_json_or_ereport(lex, &nullSemAction);
PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes)); PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes));
} }
...@@ -1337,12 +1337,15 @@ json_typeof(PG_FUNCTION_ARGS) ...@@ -1337,12 +1337,15 @@ json_typeof(PG_FUNCTION_ARGS)
JsonLexContext *lex; JsonLexContext *lex;
JsonTokenType tok; JsonTokenType tok;
char *type; char *type;
JsonParseErrorType result;
json = PG_GETARG_TEXT_PP(0); json = PG_GETARG_TEXT_PP(0);
lex = makeJsonLexContext(json, false); lex = makeJsonLexContext(json, false);
/* Lex exactly one token from the input and check its type. */ /* Lex exactly one token from the input and check its type. */
json_lex(lex); result = json_lex(lex);
if (result != JSON_SUCCESS)
json_ereport_error(result, lex);
tok = lex->token_type; tok = lex->token_type;
switch (tok) switch (tok)
{ {
......
...@@ -35,18 +35,17 @@ typedef enum /* contexts of JSON parser */ ...@@ -35,18 +35,17 @@ typedef enum /* contexts of JSON parser */
JSON_PARSE_END /* saw the end of a document, expect nothing */ JSON_PARSE_END /* saw the end of a document, expect nothing */
} JsonParseContext; } JsonParseContext;
static inline void json_lex_string(JsonLexContext *lex); static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
static inline void json_lex_number(JsonLexContext *lex, char *s, static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
bool *num_err, int *total_len); bool *num_err, int *total_len);
static inline void parse_scalar(JsonLexContext *lex, JsonSemAction *sem); static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
static void parse_object_field(JsonLexContext *lex, JsonSemAction *sem); static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
static void parse_object(JsonLexContext *lex, JsonSemAction *sem); static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem);
static void parse_array_element(JsonLexContext *lex, JsonSemAction *sem); static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
static void parse_array(JsonLexContext *lex, JsonSemAction *sem); static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem);
static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex) pg_attribute_noreturn(); static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
static void report_invalid_token(JsonLexContext *lex) pg_attribute_noreturn();
static int report_json_context(JsonLexContext *lex); static int report_json_context(JsonLexContext *lex);
static char *extract_mb_char(char *s); static char *extract_token(JsonLexContext *lex);
/* the null action object used for pure validation */ /* the null action object used for pure validation */
JsonSemAction nullSemAction = JsonSemAction nullSemAction =
...@@ -74,13 +73,13 @@ lex_peek(JsonLexContext *lex) ...@@ -74,13 +73,13 @@ lex_peek(JsonLexContext *lex)
* move the lexer to the next token if the current look_ahead token matches * move the lexer to the next token if the current look_ahead token matches
* the parameter token. Otherwise, report an error. * the parameter token. Otherwise, report an error.
*/ */
static inline void static inline JsonParseErrorType
lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token) lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
{ {
if (lex_peek(lex) == token) if (lex_peek(lex) == token)
json_lex(lex); return json_lex(lex);
else else
report_parse_error(ctx, lex); return report_parse_error(ctx, lex);
} }
/* chars to consider as part of an alphanumeric token */ /* chars to consider as part of an alphanumeric token */
...@@ -171,13 +170,16 @@ makeJsonLexContextCstringLen(char *json, int len, bool need_escapes) ...@@ -171,13 +170,16 @@ makeJsonLexContextCstringLen(char *json, int len, bool need_escapes)
* action routines to be called at appropriate spots during parsing, and a * action routines to be called at appropriate spots during parsing, and a
* pointer to a state object to be passed to those routines. * pointer to a state object to be passed to those routines.
*/ */
void JsonParseErrorType
pg_parse_json(JsonLexContext *lex, JsonSemAction *sem) pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
{ {
JsonTokenType tok; JsonTokenType tok;
JsonParseErrorType result;
/* get the initial token */ /* get the initial token */
json_lex(lex); result = json_lex(lex);
if (result != JSON_SUCCESS)
return result;
tok = lex_peek(lex); tok = lex_peek(lex);
...@@ -185,17 +187,36 @@ pg_parse_json(JsonLexContext *lex, JsonSemAction *sem) ...@@ -185,17 +187,36 @@ pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
switch (tok) switch (tok)
{ {
case JSON_TOKEN_OBJECT_START: case JSON_TOKEN_OBJECT_START:
parse_object(lex, sem); result = parse_object(lex, sem);
break; break;
case JSON_TOKEN_ARRAY_START: case JSON_TOKEN_ARRAY_START:
parse_array(lex, sem); result = parse_array(lex, sem);
break; break;
default: default:
parse_scalar(lex, sem); /* json can be a bare scalar */ result = parse_scalar(lex, sem); /* json can be a bare scalar */
} }
lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END); if (result == JSON_SUCCESS)
result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
return result;
}
/*
* pg_parse_json_or_ereport
*
* This fuction is like pg_parse_json, except that it does not return a
* JsonParseErrorType. Instead, in case of any failure, this function will
* ereport(ERROR).
*/
void
pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem)
{
JsonParseErrorType result;
result = pg_parse_json(lex, sem);
if (result != JSON_SUCCESS)
json_ereport_error(result, lex);
} }
/* /*
...@@ -206,11 +227,12 @@ pg_parse_json(JsonLexContext *lex, JsonSemAction *sem) ...@@ -206,11 +227,12 @@ pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
* *
* Designed to be called from array_start routines. * Designed to be called from array_start routines.
*/ */
int JsonParseErrorType
json_count_array_elements(JsonLexContext *lex) json_count_array_elements(JsonLexContext *lex, int *elements)
{ {
JsonLexContext copylex; JsonLexContext copylex;
int count; int count;
JsonParseErrorType result;
/* /*
* It's safe to do this with a shallow copy because the lexical routines * It's safe to do this with a shallow copy because the lexical routines
...@@ -222,21 +244,32 @@ json_count_array_elements(JsonLexContext *lex) ...@@ -222,21 +244,32 @@ json_count_array_elements(JsonLexContext *lex)
copylex.lex_level++; copylex.lex_level++;
count = 0; count = 0;
lex_expect(JSON_PARSE_ARRAY_START, &copylex, JSON_TOKEN_ARRAY_START); result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
JSON_TOKEN_ARRAY_START);
if (result != JSON_SUCCESS)
return result;
if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END) if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
{ {
while (1) while (1)
{ {
count++; count++;
parse_array_element(&copylex, &nullSemAction); result = parse_array_element(&copylex, &nullSemAction);
if (result != JSON_SUCCESS)
return result;
if (copylex.token_type != JSON_TOKEN_COMMA) if (copylex.token_type != JSON_TOKEN_COMMA)
break; break;
json_lex(&copylex); result = json_lex(&copylex);
if (result != JSON_SUCCESS)
return result;
} }
} }
lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex, JSON_TOKEN_ARRAY_END); result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
JSON_TOKEN_ARRAY_END);
if (result != JSON_SUCCESS)
return result;
return count; *elements = count;
return JSON_SUCCESS;
} }
/* /*
...@@ -248,25 +281,23 @@ json_count_array_elements(JsonLexContext *lex) ...@@ -248,25 +281,23 @@ json_count_array_elements(JsonLexContext *lex)
* - object ( { } ) * - object ( { } )
* - object field * - object field
*/ */
static inline void static inline JsonParseErrorType
parse_scalar(JsonLexContext *lex, JsonSemAction *sem) parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
{ {
char *val = NULL; char *val = NULL;
json_scalar_action sfunc = sem->scalar; json_scalar_action sfunc = sem->scalar;
JsonTokenType tok = lex_peek(lex); JsonTokenType tok = lex_peek(lex);
JsonParseErrorType result;
/* a scalar must be a string, a number, true, false, or null */ /* a scalar must be a string, a number, true, false, or null */
if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER && if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE && tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
tok != JSON_TOKEN_NULL) tok != JSON_TOKEN_NULL)
report_parse_error(JSON_PARSE_VALUE, lex); return report_parse_error(JSON_PARSE_VALUE, lex);
/* if no semantic function, just consume the token */ /* if no semantic function, just consume the token */
if (sfunc == NULL) if (sfunc == NULL)
{ return json_lex(lex);
json_lex(lex);
return;
}
/* extract the de-escaped string value, or the raw lexeme */ /* extract the de-escaped string value, or the raw lexeme */
if (lex_peek(lex) == JSON_TOKEN_STRING) if (lex_peek(lex) == JSON_TOKEN_STRING)
...@@ -284,13 +315,17 @@ parse_scalar(JsonLexContext *lex, JsonSemAction *sem) ...@@ -284,13 +315,17 @@ parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
} }
/* consume the token */ /* consume the token */
json_lex(lex); result = json_lex(lex);
if (result != JSON_SUCCESS)
return result;
/* invoke the callback */ /* invoke the callback */
(*sfunc) (sem->semstate, val, tok); (*sfunc) (sem->semstate, val, tok);
return JSON_SUCCESS;
} }
static void static JsonParseErrorType
parse_object_field(JsonLexContext *lex, JsonSemAction *sem) parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
{ {
/* /*
...@@ -304,14 +339,19 @@ parse_object_field(JsonLexContext *lex, JsonSemAction *sem) ...@@ -304,14 +339,19 @@ parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
json_ofield_action oend = sem->object_field_end; json_ofield_action oend = sem->object_field_end;
bool isnull; bool isnull;
JsonTokenType tok; JsonTokenType tok;
JsonParseErrorType result;
if (lex_peek(lex) != JSON_TOKEN_STRING) if (lex_peek(lex) != JSON_TOKEN_STRING)
report_parse_error(JSON_PARSE_STRING, lex); return report_parse_error(JSON_PARSE_STRING, lex);
if ((ostart != NULL || oend != NULL) && lex->strval != NULL) if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
fname = pstrdup(lex->strval->data); fname = pstrdup(lex->strval->data);
json_lex(lex); result = json_lex(lex);
if (result != JSON_SUCCESS)
return result;
lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON); result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
if (result != JSON_SUCCESS)
return result;
tok = lex_peek(lex); tok = lex_peek(lex);
isnull = tok == JSON_TOKEN_NULL; isnull = tok == JSON_TOKEN_NULL;
...@@ -322,20 +362,23 @@ parse_object_field(JsonLexContext *lex, JsonSemAction *sem) ...@@ -322,20 +362,23 @@ parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
switch (tok) switch (tok)
{ {
case JSON_TOKEN_OBJECT_START: case JSON_TOKEN_OBJECT_START:
parse_object(lex, sem); result = parse_object(lex, sem);
break; break;
case JSON_TOKEN_ARRAY_START: case JSON_TOKEN_ARRAY_START:
parse_array(lex, sem); result = parse_array(lex, sem);
break; break;
default: default:
parse_scalar(lex, sem); result = parse_scalar(lex, sem);
} }
if (result != JSON_SUCCESS)
return result;
if (oend != NULL) if (oend != NULL)
(*oend) (sem->semstate, fname, isnull); (*oend) (sem->semstate, fname, isnull);
return JSON_SUCCESS;
} }
static void static JsonParseErrorType
parse_object(JsonLexContext *lex, JsonSemAction *sem) parse_object(JsonLexContext *lex, JsonSemAction *sem)
{ {
/* /*
...@@ -345,6 +388,7 @@ parse_object(JsonLexContext *lex, JsonSemAction *sem) ...@@ -345,6 +388,7 @@ parse_object(JsonLexContext *lex, JsonSemAction *sem)
json_struct_action ostart = sem->object_start; json_struct_action ostart = sem->object_start;
json_struct_action oend = sem->object_end; json_struct_action oend = sem->object_end;
JsonTokenType tok; JsonTokenType tok;
JsonParseErrorType result;
check_stack_depth(); check_stack_depth();
...@@ -360,40 +404,51 @@ parse_object(JsonLexContext *lex, JsonSemAction *sem) ...@@ -360,40 +404,51 @@ parse_object(JsonLexContext *lex, JsonSemAction *sem)
lex->lex_level++; lex->lex_level++;
Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START); Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
json_lex(lex); result = json_lex(lex);
if (result != JSON_SUCCESS)
return result;
tok = lex_peek(lex); tok = lex_peek(lex);
switch (tok) switch (tok)
{ {
case JSON_TOKEN_STRING: case JSON_TOKEN_STRING:
parse_object_field(lex, sem); result = parse_object_field(lex, sem);
while (lex_peek(lex) == JSON_TOKEN_COMMA) while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
{ {
json_lex(lex); result = json_lex(lex);
parse_object_field(lex, sem); if (result != JSON_SUCCESS)
break;
result = parse_object_field(lex, sem);
} }
break; break;
case JSON_TOKEN_OBJECT_END: case JSON_TOKEN_OBJECT_END:
break; break;
default: default:
/* case of an invalid initial token inside the object */ /* case of an invalid initial token inside the object */
report_parse_error(JSON_PARSE_OBJECT_START, lex); result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
} }
if (result != JSON_SUCCESS)
return result;
lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END); result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
if (result != JSON_SUCCESS)
return result;
lex->lex_level--; lex->lex_level--;
if (oend != NULL) if (oend != NULL)
(*oend) (sem->semstate); (*oend) (sem->semstate);
return JSON_SUCCESS;
} }
static void static JsonParseErrorType
parse_array_element(JsonLexContext *lex, JsonSemAction *sem) parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
{ {
json_aelem_action astart = sem->array_element_start; json_aelem_action astart = sem->array_element_start;
json_aelem_action aend = sem->array_element_end; json_aelem_action aend = sem->array_element_end;
JsonTokenType tok = lex_peek(lex); JsonTokenType tok = lex_peek(lex);
JsonParseErrorType result;
bool isnull; bool isnull;
...@@ -406,20 +461,25 @@ parse_array_element(JsonLexContext *lex, JsonSemAction *sem) ...@@ -406,20 +461,25 @@ parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
switch (tok) switch (tok)
{ {
case JSON_TOKEN_OBJECT_START: case JSON_TOKEN_OBJECT_START:
parse_object(lex, sem); result = parse_object(lex, sem);
break; break;
case JSON_TOKEN_ARRAY_START: case JSON_TOKEN_ARRAY_START:
parse_array(lex, sem); result = parse_array(lex, sem);
break; break;
default: default:
parse_scalar(lex, sem); result = parse_scalar(lex, sem);
} }
if (result != JSON_SUCCESS)
return result;
if (aend != NULL) if (aend != NULL)
(*aend) (sem->semstate, isnull); (*aend) (sem->semstate, isnull);
return JSON_SUCCESS;
} }
static void static JsonParseErrorType
parse_array(JsonLexContext *lex, JsonSemAction *sem) parse_array(JsonLexContext *lex, JsonSemAction *sem)
{ {
/* /*
...@@ -428,6 +488,7 @@ parse_array(JsonLexContext *lex, JsonSemAction *sem) ...@@ -428,6 +488,7 @@ parse_array(JsonLexContext *lex, JsonSemAction *sem)
*/ */
json_struct_action astart = sem->array_start; json_struct_action astart = sem->array_start;
json_struct_action aend = sem->array_end; json_struct_action aend = sem->array_end;
JsonParseErrorType result;
check_stack_depth(); check_stack_depth();
...@@ -442,35 +503,43 @@ parse_array(JsonLexContext *lex, JsonSemAction *sem) ...@@ -442,35 +503,43 @@ parse_array(JsonLexContext *lex, JsonSemAction *sem)
*/ */
lex->lex_level++; lex->lex_level++;
lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START); result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
if (lex_peek(lex) != JSON_TOKEN_ARRAY_END) if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
{ {
result = parse_array_element(lex, sem);
parse_array_element(lex, sem); while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
while (lex_peek(lex) == JSON_TOKEN_COMMA)
{ {
json_lex(lex); result = json_lex(lex);
parse_array_element(lex, sem); if (result != JSON_SUCCESS)
break;
result = parse_array_element(lex, sem);
} }
} }
if (result != JSON_SUCCESS)
return result;
lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END); result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
if (result != JSON_SUCCESS)
return result;
lex->lex_level--; lex->lex_level--;
if (aend != NULL) if (aend != NULL)
(*aend) (sem->semstate); (*aend) (sem->semstate);
return JSON_SUCCESS;
} }
/* /*
* Lex one token from the input stream. * Lex one token from the input stream.
*/ */
void JsonParseErrorType
json_lex(JsonLexContext *lex) json_lex(JsonLexContext *lex)
{ {
char *s; char *s;
int len; int len;
JsonParseErrorType result;
/* Skip leading whitespace. */ /* Skip leading whitespace. */
s = lex->token_terminator; s = lex->token_terminator;
...@@ -494,6 +563,7 @@ json_lex(JsonLexContext *lex) ...@@ -494,6 +563,7 @@ json_lex(JsonLexContext *lex)
lex->token_type = JSON_TOKEN_END; lex->token_type = JSON_TOKEN_END;
} }
else else
{
switch (*s) switch (*s)
{ {
/* Single-character token, some kind of punctuation mark. */ /* Single-character token, some kind of punctuation mark. */
...@@ -529,12 +599,16 @@ json_lex(JsonLexContext *lex) ...@@ -529,12 +599,16 @@ json_lex(JsonLexContext *lex)
break; break;
case '"': case '"':
/* string */ /* string */
json_lex_string(lex); result = json_lex_string(lex);
if (result != JSON_SUCCESS)
return result;
lex->token_type = JSON_TOKEN_STRING; lex->token_type = JSON_TOKEN_STRING;
break; break;
case '-': case '-':
/* Negative number. */ /* Negative number. */
json_lex_number(lex, s + 1, NULL, NULL); result = json_lex_number(lex, s + 1, NULL, NULL);
if (result != JSON_SUCCESS)
return result;
lex->token_type = JSON_TOKEN_NUMBER; lex->token_type = JSON_TOKEN_NUMBER;
break; break;
case '0': case '0':
...@@ -548,7 +622,9 @@ json_lex(JsonLexContext *lex) ...@@ -548,7 +622,9 @@ json_lex(JsonLexContext *lex)
case '8': case '8':
case '9': case '9':
/* Positive number. */ /* Positive number. */
json_lex_number(lex, s, NULL, NULL); result = json_lex_number(lex, s, NULL, NULL);
if (result != JSON_SUCCESS)
return result;
lex->token_type = JSON_TOKEN_NUMBER; lex->token_type = JSON_TOKEN_NUMBER;
break; break;
default: default:
...@@ -576,7 +652,7 @@ json_lex(JsonLexContext *lex) ...@@ -576,7 +652,7 @@ json_lex(JsonLexContext *lex)
{ {
lex->prev_token_terminator = lex->token_terminator; lex->prev_token_terminator = lex->token_terminator;
lex->token_terminator = s + 1; lex->token_terminator = s + 1;
report_invalid_token(lex); return JSON_INVALID_TOKEN;
} }
/* /*
...@@ -593,21 +669,24 @@ json_lex(JsonLexContext *lex) ...@@ -593,21 +669,24 @@ json_lex(JsonLexContext *lex)
else if (memcmp(s, "null", 4) == 0) else if (memcmp(s, "null", 4) == 0)
lex->token_type = JSON_TOKEN_NULL; lex->token_type = JSON_TOKEN_NULL;
else else
report_invalid_token(lex); return JSON_INVALID_TOKEN;
} }
else if (p - s == 5 && memcmp(s, "false", 5) == 0) else if (p - s == 5 && memcmp(s, "false", 5) == 0)
lex->token_type = JSON_TOKEN_FALSE; lex->token_type = JSON_TOKEN_FALSE;
else else
report_invalid_token(lex); return JSON_INVALID_TOKEN;
} }
} /* end of switch */ } /* end of switch */
}
return JSON_SUCCESS;
} }
/* /*
* The next token in the input stream is known to be a string; lex it. * The next token in the input stream is known to be a string; lex it.
*/ */
static inline void static inline JsonParseErrorType
json_lex_string(JsonLexContext *lex) json_lex_string(JsonLexContext *lex)
{ {
char *s; char *s;
...@@ -628,7 +707,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -628,7 +707,7 @@ json_lex_string(JsonLexContext *lex)
if (len >= lex->input_length) if (len >= lex->input_length)
{ {
lex->token_terminator = s; lex->token_terminator = s;
report_invalid_token(lex); return JSON_INVALID_TOKEN;
} }
else if (*s == '"') else if (*s == '"')
break; break;
...@@ -637,12 +716,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -637,12 +716,7 @@ json_lex_string(JsonLexContext *lex)
/* Per RFC4627, these characters MUST be escaped. */ /* Per RFC4627, these characters MUST be escaped. */
/* Since *s isn't printable, exclude it from the context string */ /* Since *s isn't printable, exclude it from the context string */
lex->token_terminator = s; lex->token_terminator = s;
ereport(ERROR, return JSON_ESCAPING_REQUIRED;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Character with value 0x%02x must be escaped.",
(unsigned char) *s),
report_json_context(lex)));
} }
else if (*s == '\\') else if (*s == '\\')
{ {
...@@ -652,7 +726,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -652,7 +726,7 @@ json_lex_string(JsonLexContext *lex)
if (len >= lex->input_length) if (len >= lex->input_length)
{ {
lex->token_terminator = s; lex->token_terminator = s;
report_invalid_token(lex); return JSON_INVALID_TOKEN;
} }
else if (*s == 'u') else if (*s == 'u')
{ {
...@@ -666,7 +740,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -666,7 +740,7 @@ json_lex_string(JsonLexContext *lex)
if (len >= lex->input_length) if (len >= lex->input_length)
{ {
lex->token_terminator = s; lex->token_terminator = s;
report_invalid_token(lex); return JSON_INVALID_TOKEN;
} }
else if (*s >= '0' && *s <= '9') else if (*s >= '0' && *s <= '9')
ch = (ch * 16) + (*s - '0'); ch = (ch * 16) + (*s - '0');
...@@ -677,12 +751,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -677,12 +751,7 @@ json_lex_string(JsonLexContext *lex)
else else
{ {
lex->token_terminator = s + pg_mblen(s); lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, return JSON_UNICODE_ESCAPE_FORMAT;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("\"\\u\" must be followed by four hexadecimal digits."),
report_json_context(lex)));
} }
} }
if (lex->strval != NULL) if (lex->strval != NULL)
...@@ -693,33 +762,20 @@ json_lex_string(JsonLexContext *lex) ...@@ -693,33 +762,20 @@ json_lex_string(JsonLexContext *lex)
if (ch >= 0xd800 && ch <= 0xdbff) if (ch >= 0xd800 && ch <= 0xdbff)
{ {
if (hi_surrogate != -1) if (hi_surrogate != -1)
ereport(ERROR, return JSON_UNICODE_HIGH_SURROGATE;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("Unicode high surrogate must not follow a high surrogate."),
report_json_context(lex)));
hi_surrogate = (ch & 0x3ff) << 10; hi_surrogate = (ch & 0x3ff) << 10;
continue; continue;
} }
else if (ch >= 0xdc00 && ch <= 0xdfff) else if (ch >= 0xdc00 && ch <= 0xdfff)
{ {
if (hi_surrogate == -1) if (hi_surrogate == -1)
ereport(ERROR, return JSON_UNICODE_LOW_SURROGATE;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
ch = 0x10000 + hi_surrogate + (ch & 0x3ff); ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
hi_surrogate = -1; hi_surrogate = -1;
} }
if (hi_surrogate != -1) if (hi_surrogate != -1)
ereport(ERROR, return JSON_UNICODE_LOW_SURROGATE;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
/* /*
* For UTF8, replace the escape sequence by the actual * For UTF8, replace the escape sequence by the actual
...@@ -731,11 +787,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -731,11 +787,7 @@ json_lex_string(JsonLexContext *lex)
if (ch == 0) if (ch == 0)
{ {
/* We can't allow this, since our TEXT type doesn't */ /* We can't allow this, since our TEXT type doesn't */
ereport(ERROR, return JSON_UNICODE_CODE_POINT_ZERO;
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
errdetail("\\u0000 cannot be converted to text."),
report_json_context(lex)));
} }
else if (GetDatabaseEncoding() == PG_UTF8) else if (GetDatabaseEncoding() == PG_UTF8)
{ {
...@@ -753,25 +805,14 @@ json_lex_string(JsonLexContext *lex) ...@@ -753,25 +805,14 @@ json_lex_string(JsonLexContext *lex)
appendStringInfoChar(lex->strval, (char) ch); appendStringInfoChar(lex->strval, (char) ch);
} }
else else
{ return JSON_UNICODE_HIGH_ESCAPE;
ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
report_json_context(lex)));
}
} }
} }
else if (lex->strval != NULL) else if (lex->strval != NULL)
{ {
if (hi_surrogate != -1) if (hi_surrogate != -1)
ereport(ERROR, return JSON_UNICODE_LOW_SURROGATE;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
switch (*s) switch (*s)
{ {
...@@ -796,15 +837,10 @@ json_lex_string(JsonLexContext *lex) ...@@ -796,15 +837,10 @@ json_lex_string(JsonLexContext *lex)
appendStringInfoChar(lex->strval, '\t'); appendStringInfoChar(lex->strval, '\t');
break; break;
default: default:
/* Not a valid string escape, so error out. */ /* Not a valid string escape, so signal error. */
lex->token_start = s;
lex->token_terminator = s + pg_mblen(s); lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, return JSON_ESCAPING_INVALID;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s",
"json"),
errdetail("Escape sequence \"\\%s\" is invalid.",
extract_mb_char(s)),
report_json_context(lex)));
} }
} }
else if (strchr("\"\\/bfnrt", *s) == NULL) else if (strchr("\"\\/bfnrt", *s) == NULL)
...@@ -816,24 +852,16 @@ json_lex_string(JsonLexContext *lex) ...@@ -816,24 +852,16 @@ json_lex_string(JsonLexContext *lex)
* replace it with a switch statement, but testing so far has * replace it with a switch statement, but testing so far has
* shown it's not a performance win. * shown it's not a performance win.
*/ */
lex->token_start = s;
lex->token_terminator = s + pg_mblen(s); lex->token_terminator = s + pg_mblen(s);
ereport(ERROR, return JSON_ESCAPING_INVALID;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Escape sequence \"\\%s\" is invalid.",
extract_mb_char(s)),
report_json_context(lex)));
} }
} }
else if (lex->strval != NULL) else if (lex->strval != NULL)
{ {
if (hi_surrogate != -1) if (hi_surrogate != -1)
ereport(ERROR, return JSON_UNICODE_LOW_SURROGATE;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
appendStringInfoChar(lex->strval, *s); appendStringInfoChar(lex->strval, *s);
} }
...@@ -841,15 +869,12 @@ json_lex_string(JsonLexContext *lex) ...@@ -841,15 +869,12 @@ json_lex_string(JsonLexContext *lex)
} }
if (hi_surrogate != -1) if (hi_surrogate != -1)
ereport(ERROR, return JSON_UNICODE_LOW_SURROGATE;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Unicode low surrogate must follow a high surrogate."),
report_json_context(lex)));
/* Hooray, we found the end of the string! */ /* Hooray, we found the end of the string! */
lex->prev_token_terminator = lex->token_terminator; lex->prev_token_terminator = lex->token_terminator;
lex->token_terminator = s + 1; lex->token_terminator = s + 1;
return JSON_SUCCESS;
} }
/* /*
...@@ -880,7 +905,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -880,7 +905,7 @@ json_lex_string(JsonLexContext *lex)
* raising an error for a badly-formed number. Also, if total_len is not NULL * raising an error for a badly-formed number. Also, if total_len is not NULL
* the distance from lex->input to the token end+1 is returned to *total_len. * the distance from lex->input to the token end+1 is returned to *total_len.
*/ */
static inline void static inline JsonParseErrorType
json_lex_number(JsonLexContext *lex, char *s, json_lex_number(JsonLexContext *lex, char *s,
bool *num_err, int *total_len) bool *num_err, int *total_len)
{ {
...@@ -969,8 +994,10 @@ json_lex_number(JsonLexContext *lex, char *s, ...@@ -969,8 +994,10 @@ json_lex_number(JsonLexContext *lex, char *s,
lex->token_terminator = s; lex->token_terminator = s;
/* handle error if any */ /* handle error if any */
if (error) if (error)
report_invalid_token(lex); return JSON_INVALID_TOKEN;
} }
return JSON_SUCCESS;
} }
/* /*
...@@ -978,132 +1005,119 @@ json_lex_number(JsonLexContext *lex, char *s, ...@@ -978,132 +1005,119 @@ json_lex_number(JsonLexContext *lex, char *s,
* *
* lex->token_start and lex->token_terminator must identify the current token. * lex->token_start and lex->token_terminator must identify the current token.
*/ */
static void static JsonParseErrorType
report_parse_error(JsonParseContext ctx, JsonLexContext *lex) report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
{ {
char *token;
int toklen;
/* Handle case where the input ended prematurely. */ /* Handle case where the input ended prematurely. */
if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END) if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
ereport(ERROR, return JSON_EXPECTED_MORE;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("The input string ended unexpectedly."),
report_json_context(lex)));
/* Separate out the current token. */
toklen = lex->token_terminator - lex->token_start;
token = palloc(toklen + 1);
memcpy(token, lex->token_start, toklen);
token[toklen] = '\0';
/* Complain, with the appropriate detail message. */ /* Otherwise choose the error type based on the parsing context. */
if (ctx == JSON_PARSE_END)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected end of input, but found \"%s\".",
token),
report_json_context(lex)));
else
{
switch (ctx) switch (ctx)
{ {
case JSON_PARSE_END:
return JSON_EXPECTED_END;
case JSON_PARSE_VALUE: case JSON_PARSE_VALUE:
ereport(ERROR, return JSON_EXPECTED_JSON;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected JSON value, but found \"%s\".",
token),
report_json_context(lex)));
break;
case JSON_PARSE_STRING: case JSON_PARSE_STRING:
ereport(ERROR, return JSON_EXPECTED_STRING;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected string, but found \"%s\".",
token),
report_json_context(lex)));
break;
case JSON_PARSE_ARRAY_START: case JSON_PARSE_ARRAY_START:
ereport(ERROR, return JSON_EXPECTED_ARRAY_FIRST;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected array element or \"]\", but found \"%s\".",
token),
report_json_context(lex)));
break;
case JSON_PARSE_ARRAY_NEXT: case JSON_PARSE_ARRAY_NEXT:
ereport(ERROR, return JSON_EXPECTED_ARRAY_NEXT;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected \",\" or \"]\", but found \"%s\".",
token),
report_json_context(lex)));
break;
case JSON_PARSE_OBJECT_START: case JSON_PARSE_OBJECT_START:
ereport(ERROR, return JSON_EXPECTED_OBJECT_FIRST;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected string or \"}\", but found \"%s\".",
token),
report_json_context(lex)));
break;
case JSON_PARSE_OBJECT_LABEL: case JSON_PARSE_OBJECT_LABEL:
ereport(ERROR, return JSON_EXPECTED_COLON;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected \":\", but found \"%s\".",
token),
report_json_context(lex)));
break;
case JSON_PARSE_OBJECT_NEXT: case JSON_PARSE_OBJECT_NEXT:
ereport(ERROR, return JSON_EXPECTED_OBJECT_NEXT;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected \",\" or \"}\", but found \"%s\".",
token),
report_json_context(lex)));
break;
case JSON_PARSE_OBJECT_COMMA: case JSON_PARSE_OBJECT_COMMA:
ereport(ERROR, return JSON_EXPECTED_STRING;
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"),
errdetail("Expected string, but found \"%s\".",
token),
report_json_context(lex)));
break;
default: default:
elog(ERROR, "unexpected json parse state: %d", ctx); elog(ERROR, "unexpected json parse state: %d", ctx);
} }
}
} }
/* /*
* Report an invalid input token. * Report a JSON error.
*
* lex->token_start and lex->token_terminator must identify the token.
*/ */
static void void
report_invalid_token(JsonLexContext *lex) json_ereport_error(JsonParseErrorType error, JsonLexContext *lex)
{ {
char *token; if (error == JSON_UNICODE_HIGH_ESCAPE ||
int toklen; error == JSON_UNICODE_CODE_POINT_ZERO)
ereport(ERROR,
/* Separate out the offending token. */ (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
toklen = lex->token_terminator - lex->token_start; errmsg("unsupported Unicode escape sequence"),
token = palloc(toklen + 1); errdetail("%s", json_errdetail(error, lex)),
memcpy(token, lex->token_start, toklen); report_json_context(lex)));
token[toklen] = '\0'; else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s", "json"), errmsg("invalid input syntax for type %s", "json"),
errdetail("Token \"%s\" is invalid.", token), errdetail("%s", json_errdetail(error, lex)),
report_json_context(lex))); report_json_context(lex)));
} }
/*
* Construct a detail message for a JSON error.
*/
char *
json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
{
switch (error)
{
case JSON_SUCCESS:
elog(ERROR, "internal error in json parser");
break;
case JSON_ESCAPING_INVALID:
return psprintf(_("Escape sequence \"\\%s\" is invalid."),
extract_token(lex));
case JSON_ESCAPING_REQUIRED:
return psprintf(_("Character with value 0x%02x must be escaped."),
(unsigned char) *(lex->token_terminator));
case JSON_EXPECTED_END:
return psprintf(_("Expected end of input, but found \"%s\"."),
extract_token(lex));
case JSON_EXPECTED_ARRAY_FIRST:
return psprintf(_("Expected array element or \"]\", but found \"%s\"."),
extract_token(lex));
case JSON_EXPECTED_ARRAY_NEXT:
return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."),
extract_token(lex));
case JSON_EXPECTED_COLON:
return psprintf(_("Expected \":\", but found \"%s\"."),
extract_token(lex));
case JSON_EXPECTED_JSON:
return psprintf(_("Expected JSON value, but found \"%s\"."),
extract_token(lex));
case JSON_EXPECTED_MORE:
return _("The input string ended unexpectedly.");
case JSON_EXPECTED_OBJECT_FIRST:
return psprintf(_("Expected string or \"}\", but found \"%s\"."),
extract_token(lex));
case JSON_EXPECTED_OBJECT_NEXT:
return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."),
extract_token(lex));
case JSON_EXPECTED_STRING:
return psprintf(_("Expected string, but found \"%s\"."),
extract_token(lex));
case JSON_INVALID_TOKEN:
return psprintf(_("Token \"%s\" is invalid."),
extract_token(lex));
case JSON_UNICODE_CODE_POINT_ZERO:
return _("\\u0000 cannot be converted to text.");
case JSON_UNICODE_ESCAPE_FORMAT:
return _("\"\\u\" must be followed by four hexadecimal digits.");
case JSON_UNICODE_HIGH_ESCAPE:
return _("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.");
case JSON_UNICODE_HIGH_SURROGATE:
return _("Unicode high surrogate must not follow a high surrogate.");
case JSON_UNICODE_LOW_SURROGATE:
return _("Unicode low surrogate must follow a high surrogate.");
}
}
/* /*
* Report a CONTEXT line for bogus JSON input. * Report a CONTEXT line for bogus JSON input.
* *
...@@ -1177,18 +1191,15 @@ report_json_context(JsonLexContext *lex) ...@@ -1177,18 +1191,15 @@ report_json_context(JsonLexContext *lex)
} }
/* /*
* Extract a single, possibly multi-byte char from the input string. * Extract the current token from a lexing context, for error reporting.
*/ */
static char * static char *
extract_mb_char(char *s) extract_token(JsonLexContext *lex)
{ {
char *res; int toklen = lex->token_terminator - lex->token_start;
int len; char *token = palloc(toklen + 1);
len = pg_mblen(s);
res = palloc(len + 1);
memcpy(res, s, len);
res[len] = '\0';
return res; memcpy(token, lex->token_start, toklen);
token[toklen] = '\0';
return token;
} }
...@@ -272,7 +272,7 @@ jsonb_from_cstring(char *json, int len) ...@@ -272,7 +272,7 @@ jsonb_from_cstring(char *json, int len)
sem.scalar = jsonb_in_scalar; sem.scalar = jsonb_in_scalar;
sem.object_field_start = jsonb_in_object_field_start; sem.object_field_start = jsonb_in_object_field_start;
pg_parse_json(lex, &sem); pg_parse_json_or_ereport(lex, &sem);
/* after parsing, the item member has the composed jsonb structure */ /* after parsing, the item member has the composed jsonb structure */
PG_RETURN_POINTER(JsonbValueToJsonb(state.res)); PG_RETURN_POINTER(JsonbValueToJsonb(state.res));
...@@ -860,7 +860,7 @@ datum_to_jsonb(Datum val, bool is_null, JsonbInState *result, ...@@ -860,7 +860,7 @@ datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
sem.scalar = jsonb_in_scalar; sem.scalar = jsonb_in_scalar;
sem.object_field_start = jsonb_in_object_field_start; sem.object_field_start = jsonb_in_object_field_start;
pg_parse_json(lex, &sem); pg_parse_json_or_ereport(lex, &sem);
} }
break; break;
......
...@@ -606,7 +606,7 @@ json_object_keys(PG_FUNCTION_ARGS) ...@@ -606,7 +606,7 @@ json_object_keys(PG_FUNCTION_ARGS)
sem->object_field_start = okeys_object_field_start; sem->object_field_start = okeys_object_field_start;
/* remainder are all NULL, courtesy of palloc0 above */ /* remainder are all NULL, courtesy of palloc0 above */
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
/* keys are now in state->result */ /* keys are now in state->result */
pfree(lex->strval->data); pfree(lex->strval->data);
...@@ -1000,7 +1000,7 @@ get_worker(text *json, ...@@ -1000,7 +1000,7 @@ get_worker(text *json,
sem->array_element_end = get_array_element_end; sem->array_element_end = get_array_element_end;
} }
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
return state->tresult; return state->tresult;
} }
...@@ -1148,7 +1148,12 @@ get_array_start(void *state) ...@@ -1148,7 +1148,12 @@ get_array_start(void *state)
_state->path_indexes[lex_level] != INT_MIN) _state->path_indexes[lex_level] != INT_MIN)
{ {
/* Negative subscript -- convert to positive-wise subscript */ /* Negative subscript -- convert to positive-wise subscript */
int nelements = json_count_array_elements(_state->lex); JsonParseErrorType error;
int nelements;
error = json_count_array_elements(_state->lex, &nelements);
if (error != JSON_SUCCESS)
json_ereport_error(error, _state->lex);
if (-_state->path_indexes[lex_level] <= nelements) if (-_state->path_indexes[lex_level] <= nelements)
_state->path_indexes[lex_level] += nelements; _state->path_indexes[lex_level] += nelements;
...@@ -1548,7 +1553,7 @@ json_array_length(PG_FUNCTION_ARGS) ...@@ -1548,7 +1553,7 @@ json_array_length(PG_FUNCTION_ARGS)
sem->scalar = alen_scalar; sem->scalar = alen_scalar;
sem->array_element_start = alen_array_element_start; sem->array_element_start = alen_array_element_start;
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
PG_RETURN_INT32(state->count); PG_RETURN_INT32(state->count);
} }
...@@ -1814,7 +1819,7 @@ each_worker(FunctionCallInfo fcinfo, bool as_text) ...@@ -1814,7 +1819,7 @@ each_worker(FunctionCallInfo fcinfo, bool as_text)
"json_each temporary cxt", "json_each temporary cxt",
ALLOCSET_DEFAULT_SIZES); ALLOCSET_DEFAULT_SIZES);
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
MemoryContextDelete(state->tmp_cxt); MemoryContextDelete(state->tmp_cxt);
...@@ -2113,7 +2118,7 @@ elements_worker(FunctionCallInfo fcinfo, const char *funcname, bool as_text) ...@@ -2113,7 +2118,7 @@ elements_worker(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
"json_array_elements temporary cxt", "json_array_elements temporary cxt",
ALLOCSET_DEFAULT_SIZES); ALLOCSET_DEFAULT_SIZES);
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
MemoryContextDelete(state->tmp_cxt); MemoryContextDelete(state->tmp_cxt);
...@@ -2485,7 +2490,7 @@ populate_array_json(PopulateArrayContext *ctx, char *json, int len) ...@@ -2485,7 +2490,7 @@ populate_array_json(PopulateArrayContext *ctx, char *json, int len)
sem.array_element_end = populate_array_element_end; sem.array_element_end = populate_array_element_end;
sem.scalar = populate_array_scalar; sem.scalar = populate_array_scalar;
pg_parse_json(state.lex, &sem); pg_parse_json_or_ereport(state.lex, &sem);
/* number of dimensions should be already known */ /* number of dimensions should be already known */
Assert(ctx->ndims > 0 && ctx->dims); Assert(ctx->ndims > 0 && ctx->dims);
...@@ -3342,7 +3347,7 @@ get_json_object_as_hash(char *json, int len, const char *funcname) ...@@ -3342,7 +3347,7 @@ get_json_object_as_hash(char *json, int len, const char *funcname)
sem->object_field_start = hash_object_field_start; sem->object_field_start = hash_object_field_start;
sem->object_field_end = hash_object_field_end; sem->object_field_end = hash_object_field_end;
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
return tab; return tab;
} }
...@@ -3641,7 +3646,7 @@ populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname, ...@@ -3641,7 +3646,7 @@ populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
state->lex = lex; state->lex = lex;
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
} }
else else
{ {
...@@ -3971,7 +3976,7 @@ json_strip_nulls(PG_FUNCTION_ARGS) ...@@ -3971,7 +3976,7 @@ json_strip_nulls(PG_FUNCTION_ARGS)
sem->array_element_start = sn_array_element_start; sem->array_element_start = sn_array_element_start;
sem->object_field_start = sn_object_field_start; sem->object_field_start = sn_object_field_start;
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
PG_RETURN_TEXT_P(cstring_to_text_with_len(state->strval->data, PG_RETURN_TEXT_P(cstring_to_text_with_len(state->strval->data,
state->strval->len)); state->strval->len));
...@@ -5110,7 +5115,7 @@ iterate_json_values(text *json, uint32 flags, void *action_state, ...@@ -5110,7 +5115,7 @@ iterate_json_values(text *json, uint32 flags, void *action_state,
sem->scalar = iterate_values_scalar; sem->scalar = iterate_values_scalar;
sem->object_field_start = iterate_values_object_field_start; sem->object_field_start = iterate_values_object_field_start;
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
} }
/* /*
...@@ -5230,7 +5235,7 @@ transform_json_string_values(text *json, void *action_state, ...@@ -5230,7 +5235,7 @@ transform_json_string_values(text *json, void *action_state,
sem->array_element_start = transform_string_values_array_element_start; sem->array_element_start = transform_string_values_array_element_start;
sem->object_field_start = transform_string_values_object_field_start; sem->object_field_start = transform_string_values_object_field_start;
pg_parse_json(lex, sem); pg_parse_json_or_ereport(lex, sem);
return cstring_to_text_with_len(state->strval->data, state->strval->len); return cstring_to_text_with_len(state->strval->data, state->strval->len);
} }
......
...@@ -33,6 +33,28 @@ typedef enum ...@@ -33,6 +33,28 @@ typedef enum
JSON_TOKEN_END JSON_TOKEN_END
} JsonTokenType; } JsonTokenType;
typedef enum
{
JSON_SUCCESS,
JSON_ESCAPING_INVALID,
JSON_ESCAPING_REQUIRED,
JSON_EXPECTED_ARRAY_FIRST,
JSON_EXPECTED_ARRAY_NEXT,
JSON_EXPECTED_COLON,
JSON_EXPECTED_END,
JSON_EXPECTED_JSON,
JSON_EXPECTED_MORE,
JSON_EXPECTED_OBJECT_FIRST,
JSON_EXPECTED_OBJECT_NEXT,
JSON_EXPECTED_STRING,
JSON_INVALID_TOKEN,
JSON_UNICODE_CODE_POINT_ZERO,
JSON_UNICODE_ESCAPE_FORMAT,
JSON_UNICODE_HIGH_ESCAPE,
JSON_UNICODE_HIGH_SURROGATE,
JSON_UNICODE_LOW_SURROGATE
} JsonParseErrorType;
/* /*
* All the fields in this structure should be treated as read-only. * All the fields in this structure should be treated as read-only.
...@@ -101,7 +123,14 @@ typedef struct JsonSemAction ...@@ -101,7 +123,14 @@ typedef struct JsonSemAction
* points to. If the action pointers are NULL the parser * points to. If the action pointers are NULL the parser
* does nothing and just continues. * does nothing and just continues.
*/ */
extern void pg_parse_json(JsonLexContext *lex, JsonSemAction *sem); extern JsonParseErrorType pg_parse_json(JsonLexContext *lex,
JsonSemAction *sem);
/*
* Same thing, but signal errors via ereport(ERROR) instead of returning
* a result code.
*/
extern void pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem);
/* the null action object used for pure validation */ /* the null action object used for pure validation */
extern JsonSemAction nullSemAction; extern JsonSemAction nullSemAction;
...@@ -110,8 +139,13 @@ extern JsonSemAction nullSemAction; ...@@ -110,8 +139,13 @@ extern JsonSemAction nullSemAction;
* json_count_array_elements performs a fast secondary parse to determine the * json_count_array_elements performs a fast secondary parse to determine the
* number of elements in passed array lex context. It should be called from an * number of elements in passed array lex context. It should be called from an
* array_start action. * array_start action.
*
* The return value indicates whether any error occurred, while the number
* of elements is stored into *elements (but only if the return value is
* JSON_SUCCESS).
*/ */
extern int json_count_array_elements(JsonLexContext *lex); extern JsonParseErrorType json_count_array_elements(JsonLexContext *lex,
int *elements);
/* /*
* constructors for JsonLexContext, with or without strval element. * constructors for JsonLexContext, with or without strval element.
...@@ -128,7 +162,13 @@ extern JsonLexContext *makeJsonLexContextCstringLen(char *json, ...@@ -128,7 +162,13 @@ extern JsonLexContext *makeJsonLexContextCstringLen(char *json,
bool need_escapes); bool need_escapes);
/* lex one token */ /* lex one token */
extern void json_lex(JsonLexContext *lex); extern JsonParseErrorType json_lex(JsonLexContext *lex);
/* report an error during json lexing or parsing */
extern void json_ereport_error(JsonParseErrorType error, JsonLexContext *lex);
/* construct an error detail string for a json error */
extern char *json_errdetail(JsonParseErrorType error, JsonLexContext *lex);
/* /*
* Utility function to check if a string is a valid JSON number. * Utility function to check if a string is a valid JSON number.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment