Commit e306df7f authored by Andrew Dunstan's avatar Andrew Dunstan

Full Text Search support for json and jsonb

The new functions are ts_headline() and to_tsvector.

Dmitry Dolgov, edited and documented by me.
parent c80b9920
......@@ -9564,6 +9564,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>to_tsvector('english', 'The Fat Rats')</literal></entry>
<entry><literal>'fat':2 'rat':3</literal></entry>
</row>
<row>
<entry>
<literal><function>to_tsvector(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">document</> <type>json(b)</type>)</function></literal>
</entry>
<entry><type>tsvector</type></entry>
<entry>reduce document text to <type>tsvector</></entry>
<entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
<entry><literal>'fat':2 'rat':3</literal></entry>
</row>
<row>
<entry>
<indexterm>
......@@ -9610,6 +9619,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><literal>ts_headline('x y z', 'z'::tsquery)</literal></entry>
<entry><literal>x y &lt;b&gt;z&lt;/b&gt;</literal></entry>
</row>
<row>
<entry>
<literal><function>ts_headline(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">document</replaceable> <type>json(b)</>, <replaceable class="PARAMETER">query</replaceable> <type>tsquery</> <optional>, <replaceable class="PARAMETER">options</replaceable> <type>text</> </optional>)</function></literal>
</entry>
<entry><type>text</type></entry>
<entry>display a query match</entry>
<entry><literal>ts_headline('{"a":"x y z"}'::json, 'z'::tsquery)</literal></entry>
<entry><literal>{"a":"x y &lt;b&gt;z&lt;/b&gt;"}</literal></entry>
</row>
<row>
<entry>
<indexterm>
......
......@@ -16,6 +16,7 @@
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/jsonapi.h"
typedef struct MorphOpaque
......@@ -24,6 +25,14 @@ typedef struct MorphOpaque
int qoperator; /* query operator */
} MorphOpaque;
typedef struct TSVectorBuildState
{
ParsedText *prs;
TSVector result;
Oid cfgId;
} TSVectorBuildState;
static void add_to_tsvector(void *state, char *elem_value, int elem_len);
Datum
get_current_ts_config(PG_FUNCTION_ARGS)
......@@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
Datum
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
Jsonb *jb = PG_GETARG_JSONB(1);
TSVectorBuildState state;
ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
prs->words = NULL;
state.result = NULL;
state.cfgId = cfgId;
state.prs = prs;
iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
PG_FREE_IF_COPY(jb, 1);
if (state.result == NULL)
{
/* There weren't any string elements in jsonb,
* so wee need to return an empty vector */
if (prs->words != NULL)
pfree(prs->words);
state.result = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
state.result->size = 0;
}
PG_RETURN_TSVECTOR(state.result);
}
Datum
jsonb_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
ObjectIdGetDatum(cfgId),
JsonbGetDatum(jb)));
}
Datum
json_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
text *json = PG_GETARG_TEXT_P(1);
TSVectorBuildState state;
ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
prs->words = NULL;
state.result = NULL;
state.cfgId = cfgId;
state.prs = prs;
iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
PG_FREE_IF_COPY(json, 1);
if (state.result == NULL)
{
/* There weren't any string elements in json,
* so wee need to return an empty vector */
if (prs->words != NULL)
pfree(prs->words);
state.result = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
state.result->size = 0;
}
PG_RETURN_TSVECTOR(state.result);
}
Datum
json_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(json)));
}
/*
* Extend current TSVector from _state with a new one,
* build over a json(b) element.
*/
static void
add_to_tsvector(void *_state, char *elem_value, int elem_len)
{
TSVectorBuildState *state = (TSVectorBuildState *) _state;
ParsedText *prs = state->prs;
TSVector item_vector;
int i;
prs->lenwords = elem_len / 6;
if (prs->lenwords == 0)
prs->lenwords = 2;
prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
prs->curwords = 0;
prs->pos = 0;
parsetext(state->cfgId, prs, elem_value, elem_len);
if (prs->curwords)
{
if (state->result != NULL)
{
for (i = 0; i < prs->curwords; i++)
prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
item_vector = make_tsvector(prs);
state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
TSVectorGetDatum(state->result),
PointerGetDatum(item_vector));
}
else
state->result = make_tsvector(prs);
}
}
/*
* to_tsquery
*/
......
......@@ -20,6 +20,7 @@
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/jsonapi.h"
#include "utils/varlena.h"
......@@ -31,6 +32,19 @@ typedef struct
LexDescr *list;
} TSTokenTypeStorage;
/* state for ts_headline_json_* */
typedef struct HeadlineJsonState
{
HeadlineParsedText *prs;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
TSQuery query;
List *prsoptions;
bool transformed;
} HeadlineJsonState;
static text * headline_json_value(void *_state, char *elem_value, int elem_len);
static void
tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
{
......@@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
{
Jsonb *out, *jb = PG_GETARG_JSONB(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
HeadlineParsedText prs;
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
state->prs = &prs;
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
state->query = query;
if (opt)
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
state->prsoptions = NIL;
if (!OidIsValid(state->prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
out = transform_jsonb_string_values(jb, state, action);
PG_FREE_IF_COPY(jb, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
if (state->transformed)
{
pfree(prs.startsel);
pfree(prs.stopsel);
}
PG_RETURN_JSONB(out);
}
Datum
ts_headline_jsonb(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
text *out;
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
HeadlineParsedText prs;
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
state->prs = &prs;
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
state->query = query;
if (opt)
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
state->prsoptions = NIL;
if (!OidIsValid(state->prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
out = transform_json_string_values(json, state, action);
PG_FREE_IF_COPY(json, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
if (state->transformed)
{
pfree(prs.startsel);
pfree(prs.stopsel);
}
PG_RETURN_TEXT_P(out);
}
Datum
ts_headline_json(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_json_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_json_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
/*
* Return headline in text from, generated from a json(b) element
*/
static text *
headline_json_value(void *_state, char *elem_value, int elem_len)
{
HeadlineJsonState *state = (HeadlineJsonState *) _state;
HeadlineParsedText *prs = state->prs;
TSConfigCacheEntry *cfg = state->cfg;
TSParserCacheEntry *prsobj = state->prsobj;
TSQuery query = state->query;
List *prsoptions = state->prsoptions;
prs->curwords = 0;
hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
FunctionCall3(&(prsobj->prsheadline),
PointerGetDatum(prs),
PointerGetDatum(prsoptions),
PointerGetDatum(query));
state->transformed = true;
return generateHeadline(prs);
}
......@@ -4812,6 +4812,24 @@ DESCR("generate headline");
DATA(insert OID = 3755 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 25 "25 3615" _null_ _null_ _null_ _null_ _null_ ts_headline _null_ _null_ _null_ ));
DESCR("generate headline");
DATA(insert OID = 4201 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 3802 "3734 3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid_opt _null_ _null_ _null_ ));
DESCR("generate headline from jsonb");
DATA(insert OID = 4202 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 3802 "3734 3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_byid _null_ _null_ _null_ ));
DESCR("generate headline from jsonb");
DATA(insert OID = 4203 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 3802 "3802 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb_opt _null_ _null_ _null_ ));
DESCR("generate headline from jsonb");
DATA(insert OID = 4204 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3802 "3802 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_jsonb _null_ _null_ _null_ ));
DESCR("generate headline from jsonb");
DATA(insert OID = 4205 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f i s 4 0 114 "3734 114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid_opt _null_ _null_ _null_ ));
DESCR("generate headline from json");
DATA(insert OID = 4206 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f i s 3 0 114 "3734 114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json_byid _null_ _null_ _null_ ));
DESCR("generate headline from json");
DATA(insert OID = 4207 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 3 0 114 "114 3615 25" _null_ _null_ _null_ _null_ _null_ ts_headline_json_opt _null_ _null_ _null_ ));
DESCR("generate headline from json");
DATA(insert OID = 4208 ( ts_headline PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 114 "114 3615" _null_ _null_ _null_ _null_ _null_ ts_headline_json _null_ _null_ _null_ ));
DESCR("generate headline from json");
DATA(insert OID = 3745 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3614 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsvector_byid _null_ _null_ _null_ ));
DESCR("transform to tsvector");
DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ to_tsquery_byid _null_ _null_ _null_ ));
......@@ -4828,6 +4846,14 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s
DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
DESCR("transform jsonb to tsvector");
DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
DESCR("transform json to tsvector");
DATA(insert OID = 4211 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3614 "3734 3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector_byid _null_ _null_ _null_ ));
DESCR("transform jsonb to tsvector");
DATA(insert OID = 4212 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 2 0 3614 "3734 114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector_byid _null_ _null_ _null_ ));
DESCR("transform json to tsvector");
DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
DESCR("trigger for automatic update of tsvector column");
......
......@@ -86,6 +86,15 @@ typedef struct
#define MAXNUMPOS (256)
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
/*
* In case if a TSVector contains several parts and we want to treat them as
* separate, it's necessary to add an artificial increment to position of each
* lexeme from every next part. It's required to avoid the situation when
* tsquery can find a phrase consisting of lexemes from two of such parts.
* TS_JUMP defined a value of this increment.
*/
#define TS_JUMP 1
/* This struct represents a complete tsvector datum */
typedef struct
{
......
......@@ -1674,3 +1674,93 @@ select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
{"a":{},"d":{}}
(1 row)
-- json to tsvector
select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
to_tsvector
---------------------------------------------------------------------------
'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
(1 row)
-- json to tsvector with config
select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
to_tsvector
---------------------------------------------------------------------------
'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
(1 row)
-- json to tsvector with stop words
select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
to_tsvector
----------------------------------------------------------------------------
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
-- ts_vector corner cases
select to_tsvector('""'::json);
to_tsvector
-------------
(1 row)
select to_tsvector('{}'::json);
to_tsvector
-------------
(1 row)
select to_tsvector('[]'::json);
to_tsvector
-------------
(1 row)
select to_tsvector('null'::json);
to_tsvector
-------------
(1 row)
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
ts_headline
---------------------------------------------------------------------------------------------------------
{"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff","c1":"ccc1 ddd1"},"d":["ggg <b>hhh</b>","iii jjj"]}
(1 row)
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
ts_headline
----------------------------------------------------------------------------------------
{"a":"aaa <b>bbb</b>","b":{"c":"ccc <b>ddd</b> fff"},"d":["ggg <b>hhh</b>","iii jjj"]}
(1 row)
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
ts_headline
------------------------------------------------------------------------------------------
{"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
(1 row)
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
ts_headline
------------------------------------------------------------------------------------------
{"a":"aaa <bbb>","b":{"c":"ccc <ddd> fff","c1":"ccc1 ddd1"},"d":["ggg <hhh>","iii jjj"]}
(1 row)
-- corner cases for ts_headline with json
select ts_headline('null'::json, tsquery('aaa & bbb'));
ts_headline
-------------
null
(1 row)
select ts_headline('{}'::json, tsquery('aaa & bbb'));
ts_headline
-------------
{}
(1 row)
select ts_headline('[]'::json, tsquery('aaa & bbb'));
ts_headline
-------------
[]
(1 row)
......@@ -3474,3 +3474,93 @@ HINT: Try using the function jsonb_set to replace key value.
select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
ERROR: cannot replace existing key
HINT: Try using the function jsonb_set to replace key value.
-- jsonb to tsvector
select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
to_tsvector
---------------------------------------------------------------------------
'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
(1 row)
-- jsonb to tsvector with config
select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
to_tsvector
---------------------------------------------------------------------------
'aaa':1 'bbb':2 'ccc':4 'ddd':3 'eee':6 'fff':7 'ggg':8 'hhh':10 'iii':11
(1 row)
-- jsonb to tsvector with stop words
select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
to_tsvector
----------------------------------------------------------------------------
'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13
(1 row)
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
to_tsvector
-------------
(1 row)
select to_tsvector('{}'::jsonb);
to_tsvector
-------------
(1 row)
select to_tsvector('[]'::jsonb);
to_tsvector
-------------
(1 row)
select to_tsvector('null'::jsonb);
to_tsvector
-------------
(1 row)
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
ts_headline
------------------------------------------------------------------------------------------------------------------
{"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
(1 row)
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
ts_headline
-----------------------------------------------------------------------------------------------
{"a": "aaa <b>bbb</b>", "b": {"c": "ccc <b>ddd</b> fff"}, "d": ["ggg <b>hhh</b>", "iii jjj"]}
(1 row)
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
ts_headline
---------------------------------------------------------------------------------------------------
{"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
(1 row)
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
ts_headline
---------------------------------------------------------------------------------------------------
{"a": "aaa <bbb>", "b": {"c": "ccc <ddd> fff", "c1": "ccc1 ddd1"}, "d": ["ggg <hhh>", "iii jjj"]}
(1 row)
-- corner cases for ts_headline with jsonb
select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
ts_headline
-------------
null
(1 row)
select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
ts_headline
-------------
{}
(1 row)
select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
ts_headline
-------------
[]
(1 row)
......@@ -551,3 +551,29 @@ select json_strip_nulls('[1,{"a":1,"b":null,"c":2},3]');
-- an empty object is not null and should not be stripped
select json_strip_nulls('{"a": {"b": null, "c": null}, "d": {} }');
-- json to tsvector
select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
-- json to tsvector with config
select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::json);
-- json to tsvector with stop words
select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json);
-- ts_vector corner cases
select to_tsvector('""'::json);
select to_tsvector('{}'::json);
select to_tsvector('[]'::json);
select to_tsvector('null'::json);
-- ts_headline for json
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'));
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
-- corner cases for ts_headline with json
select ts_headline('null'::json, tsquery('aaa & bbb'));
select ts_headline('{}'::json, tsquery('aaa & bbb'));
select ts_headline('[]'::json, tsquery('aaa & bbb'));
......@@ -878,3 +878,29 @@ select jsonb_insert('{"a": {"b": "value"}}', '{a, c}', '"new_value"', true);
select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"');
select jsonb_insert('{"a": {"b": "value"}}', '{a, b}', '"new_value"', true);
-- jsonb to tsvector
select to_tsvector('{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
-- jsonb to tsvector with config
select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c": {"d": "hhh iii"}}'::jsonb);
-- jsonb to tsvector with stop words
select to_tsvector('{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb);
-- ts_vector corner cases
select to_tsvector('""'::jsonb);
select to_tsvector('{}'::jsonb);
select to_tsvector('[]'::jsonb);
select to_tsvector('null'::jsonb);
-- ts_headline for jsonb
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'));
select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
-- corner cases for ts_headline with jsonb
select ts_headline('null'::jsonb, tsquery('aaa & bbb'));
select ts_headline('{}'::jsonb, tsquery('aaa & bbb'));
select ts_headline('[]'::jsonb, tsquery('aaa & bbb'));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment