Commit 41d17e04 authored by Teodor Sigaev's avatar Teodor Sigaev

Fix URL generation in headline. Only tag lexeme will be replaced by space.

Per http://archives.postgresql.org/pgsql-bugs/2008-12/msg00013.php
parent 8fd07a35
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.10 2009/01/01 17:23:48 momjian Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.11 2009/01/15 16:33:59 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -625,7 +625,7 @@ generateHeadline(HeadlineParsedText *prs) ...@@ -625,7 +625,7 @@ generateHeadline(HeadlineParsedText *prs)
*ptr = ' '; *ptr = ' ';
ptr++; ptr++;
} }
else else if (!wrd->skip)
{ {
if (wrd->selected) if (wrd->selected)
{ {
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.19 2009/01/15 16:33:28 teodor Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.20 2009/01/15 16:33:59 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1587,10 +1587,11 @@ prsd_end(PG_FUNCTION_ARGS) ...@@ -1587,10 +1587,11 @@ prsd_end(PG_FUNCTION_ARGS)
#define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD ) #define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
#define ENDPUNCTOKEN(x) ( (x)==SPACE ) #define ENDPUNCTOKEN(x) ( (x)==SPACE )
#define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY ) #define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
#define HLIDIGNORE(x) ( (x)==URL_T || (x)==TAG_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD ) #define HLIDREPLACE(x) ( (x)==TAG_T )
#define XMLHLIDIGNORE(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD ) #define HLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDIGNORE(x) ) #define XMLHLIDSKIP(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) ) #define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
typedef struct typedef struct
...@@ -1695,13 +1696,15 @@ mark_fragment(HeadlineParsedText *prs, int highlight, int startpos, int endpos) ...@@ -1695,13 +1696,15 @@ mark_fragment(HeadlineParsedText *prs, int highlight, int startpos, int endpos)
prs->words[i].selected = 1; prs->words[i].selected = 1;
if (highlight == 0) if (highlight == 0)
{ {
if (HLIDIGNORE(prs->words[i].type)) if (HLIDREPLACE(prs->words[i].type))
prs->words[i].replace = 1; prs->words[i].replace = 1;
else if ( HLIDSKIP(prs->words[i].type) )
prs->words[i].skip = 1;
} }
else else
{ {
if (XMLHLIDIGNORE(prs->words[i].type)) if (XMLHLIDSKIP(prs->words[i].type))
prs->words[i].replace = 1; prs->words[i].skip = 1;
} }
prs->words[i].in = (prs->words[i].repeated) ? 0 : 1; prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
...@@ -2050,13 +2053,15 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight, ...@@ -2050,13 +2053,15 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
prs->words[i].selected = 1; prs->words[i].selected = 1;
if (highlight == 0) if (highlight == 0)
{ {
if (HLIDIGNORE(prs->words[i].type)) if (HLIDREPLACE(prs->words[i].type))
prs->words[i].replace = 1; prs->words[i].replace = 1;
else if ( HLIDSKIP(prs->words[i].type) )
prs->words[i].skip = 1;
} }
else else
{ {
if (XMLHLIDIGNORE(prs->words[i].type)) if (XMLHLIDSKIP(prs->words[i].type))
prs->words[i].replace = 1; prs->words[i].skip = 1;
} }
prs->words[i].in = (prs->words[i].repeated) ? 0 : 1; prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* *
* Copyright (c) 1998-2009, PostgreSQL Global Development Group * Copyright (c) 1998-2009, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.12 2009/01/01 17:24:01 momjian Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.13 2009/01/15 16:33:59 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -38,7 +38,8 @@ typedef struct ...@@ -38,7 +38,8 @@ typedef struct
in:1, in:1,
replace:1, replace:1,
repeated:1, repeated:1,
unused:4, skip:1,
unused:3,
type:8, type:8,
len:16; len:16;
char *word; char *word;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment