Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
2860041b
Commit
2860041b
authored
Aug 15, 2002
by
Bruce Momjian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
August 13, 2002
Use parser of OpenFTS v0.33. -- Teodor Sigaev
parent
12763562
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
129 additions
and
100 deletions
+129
-100
contrib/tsearch/README.tsearch
contrib/tsearch/README.tsearch
+5
-0
contrib/tsearch/deflex.h
contrib/tsearch/deflex.h
+18
-13
contrib/tsearch/expected/tsearch.out
contrib/tsearch/expected/tsearch.out
+4
-4
contrib/tsearch/morph.c
contrib/tsearch/morph.c
+14
-10
contrib/tsearch/parser.l
contrib/tsearch/parser.l
+88
-73
No files found.
contrib/tsearch/README.tsearch
View file @
2860041b
...
...
@@ -4,6 +4,11 @@ a searchable data type (textual) with indexed access.
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
(oleg@sai.msu.su).
CHANGES:
August 13, 2002
Use parser of OpenFTS v0.33.
IMPORTANT NOTICE:
This is a first step of our work on integration of OpenFTS
...
...
contrib/tsearch/deflex.h
View file @
2860041b
...
...
@@ -2,28 +2,33 @@
#define __DEFLEX_H__
/* rememder !!!! */
#define LASTNUM
19
#define LASTNUM
23
#define LATWORD 1
#define
NONLATINWORD
2
#define
CYRWORD
2
#define UWORD 3
#define EMAIL 4
#define FURL 5
#define HOST 6
#define
FLOAT
7
#define
FINT
8
#define PART
WORD 9
#define
NONLATINPARTWORD 10
#define LATPART
WORD 11
#define SPACE 12
#define
SYMTAG
13
#define HTTP 14
#define
DEFIS
WORD 15
#define
DEFISLAT
WORD 16
#define
DEFISNONLATI
NWORD 17
#define
SCIENTIFIC
7
#define
VERSIONNUMBER
8
#define PART
HYPHENWORD 9
#define
CYRPARTHYPHENWORD 10
#define LATPART
HYPHENWORD 11
#define SPACE
12
#define
TAG
13
#define HTTP
14
#define
HYPHEN
WORD 15
#define
LATHYPHEN
WORD 16
#define
CYRHYPHE
NWORD 17
#define URI 18
#define FILEPATH 19
#define DECIMAL 20
#define SIGNEDINT 21
#define UNSIGNEDINT 22
#define HTMLENTITY 23
extern
const
char
*
descr
[];
#endif
contrib/tsearch/expected/tsearch.out
View file @
2860041b
...
...
@@ -689,9 +689,9 @@ SELECT count(*) FROM test_txtidx WHERE a ## '(eq|yt)&(wR|qh)';
select txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty');
txt2txtidx
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
'ad' 'dw' 'jf' '234' '345' '4.2' '455' 'jqw' 'qwe' 'wer' 'wow' 'asdf' 'ewr1' 'qwer' 'sdjk' '5.005' 'ewri2' 'qwqwe' 'wefjn' 'gist.c' 'gist.h' 'qwerti' '234.435' ':8100/?' 'qwe-wer' 'readlin' 'www.com' '+4.0e-10' 'gist.h.c' 'rewt/ewr' 'qwe@efd.r' '/?ad=qwe&dw' '/wqe-324/ewr' 'aew.werc.ewr' '1aew.werc.ewr' '2aew.werc.ewr' '3aew.werc.ewr' '4aew.werc.ewr' '5aew.werc.ewr' '6aew.werc.ewr' '7aew.werc.ewr' '/usr/local/fff' '/awdf/dwqe/4325' ':8100/?ad=qwe&dw' 'teodor@stack.net' '5aew.werc.ewr:8100/?' ':8100/?ad=qwe&dw=%20%32' 'aew.werc.ewr/?ad=qwe&dw' '1aew.werc.ewr/?ad=qwe&dw' '3aew.werc.ewr/?ad=qwe&dw' '6aew.werc.ewr:8100/?ad=qwe&dw' '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32'
txt2txtidx

-------------
'ad' 'dw' 'jf' '234' '345' '4.2' '455' 'jqw' 'qwe' 'wer' 'wow' 'asdf' 'ewr1' 'qwer' 'sdjk' '5.005' 'ewri2' 'qwqwe' 'wefjn' 'gist.c' 'gist.h' 'qwerti' '234.435' ':8100/?' 'qwe-wer' 'readlin' 'www.com' '+4.0e-10' 'gist.h.c' 'rewt/ewr' 'qwe@efd.r' '
readline-4' '
/?ad=qwe&dw' '/wqe-324/ewr' 'aew.werc.ewr' '1aew.werc.ewr' '2aew.werc.ewr' '3aew.werc.ewr' '4aew.werc.ewr' '5aew.werc.ewr' '6aew.werc.ewr' '7aew.werc.ewr' '/usr/local/fff' '/awdf/dwqe/4325' ':8100/?ad=qwe&dw' 'teodor@stack.net' '5aew.werc.ewr:8100/?' ':8100/?ad=qwe&dw=%20%32' 'aew.werc.ewr/?ad=qwe&dw' '1aew.werc.ewr/?ad=qwe&dw' '3aew.werc.ewr/?ad=qwe&dw' '6aew.werc.ewr:8100/?ad=qwe&dw' '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32'
(1 row)
select txtidxsize(txt2txtidx('345 qw'));
...
...
@@ -705,7 +705,7 @@ select txtidxsize(txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.e
<i <b> wow < jqw <> qwerty'));
txtidxsize
------------
5
2
5
3
(1 row)
insert into test_txtidx (a) values ('345 qwerty');
...
...
contrib/tsearch/morph.c
View file @
2860041b
...
...
@@ -75,19 +75,23 @@ static MAPDICT mapdict[] = {
{
NODICT
,
NODICT
},
/* EMAIL */
{
NODICT
,
NODICT
},
/* FURL */
{
NODICT
,
NODICT
},
/* HOST */
{
NODICT
,
NODICT
},
/*
FLOAT
*/
{
NODICT
,
NODICT
},
/*
FINT
*/
{
BYLOCALE
,
DEFAULTDICT
},
/* PARTWORD */
{
BYLOCALE
,
NODICT
},
/*
NONLATINPART
WORD */
{
DEFAULTDICT
,
NODICT
},
/* LATPARTWORD */
{
NODICT
,
NODICT
},
/*
SCIENTIFIC
*/
{
NODICT
,
NODICT
},
/*
VERSIONNUMBER
*/
{
BYLOCALE
,
DEFAULTDICT
},
/* PART
HYPHEN
WORD */
{
BYLOCALE
,
NODICT
},
/*
CYRPARTHYPHEN
WORD */
{
DEFAULTDICT
,
NODICT
},
/* LATPART
HYPHEN
WORD */
{
STOPLEXEM
,
NODICT
},
/* SPACE */
{
STOPLEXEM
,
NODICT
},
/*
SYM
TAG */
{
STOPLEXEM
,
NODICT
},
/* TAG */
{
STOPLEXEM
,
NODICT
},
/* HTTP */
{
BYLOCALE
,
DEFAULTDICT
},
/*
DEFIS
WORD */
{
DEFAULTDICT
,
NODICT
},
/*
DEFISLAT
WORD */
{
BYLOCALE
,
NODICT
},
/*
DEFISNONLATI
NWORD */
{
BYLOCALE
,
DEFAULTDICT
},
/*
HYPHEN
WORD */
{
DEFAULTDICT
,
NODICT
},
/*
LATHYPHEN
WORD */
{
BYLOCALE
,
NODICT
},
/*
CYRHYPHE
NWORD */
{
NODICT
,
NODICT
},
/* URI */
{
NODICT
,
NODICT
}
/* FILEPATH */
{
NODICT
,
NODICT
},
/* FILEPATH */
{
NODICT
,
NODICT
},
/* DECIMAL */
{
NODICT
,
NODICT
},
/* SIGNEDINT */
{
NODICT
,
NODICT
},
/* UNSIGNEDINT */
{
STOPLEXEM
,
NODICT
}
/* HTMLENTITY */
};
static
bool
inited
=
false
;
...
...
contrib/tsearch/parser.l
View file @
2860041b
...
...
@@ -5,18 +5,17 @@
/* postgres allocation function */
#include "postgres.h"
#define free
pfree
#define malloc
palloc
#define free
pfree
#define malloc
palloc
#define realloc repalloc
#ifdef strdup
#undef strdup
#endif
#define strdup pstrdup
#define strdup pstrdup
char *token = NULL; /* pointer to token */
char *s = NULL; /*
for returning full defis
-word */
char *s = NULL; /*
to return WHOLE hyphenated
-word */
YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
...
...
@@ -57,21 +56,21 @@ int bytestoread = 0; /* for limiting read from filehandle */
%option nounput
%option noyywrap
/* parser's state for parsing defis-word */
/* parser's state for parsing hyphenated-word */
%x DELIM
/* parser's state for parsing URL*/
%x URL
%x SERVER
/* parser's state for parsing filepath */
/* parser's state for parsing TAGS */
%x INTAG
%x QINTAG
%x INCOMMENT
%x INSCRIPT
/*
NONLATIN
char */
NONLATIN
ALNUM [0-9\200-\377]
NONLATIN
ALPHA [\200-\377]
/*
cyrillic koi8
char */
CYR
ALNUM [0-9\200-\377]
CYR
ALPHA [\200-\377]
ALPHA [a-zA-Z\200-\377]
ALNUM [0-9a-zA-Z\200-\377]
...
...
@@ -81,66 +80,59 @@ URI [-_[:alnum:]/%,\.;=&?#]+
%%
"<"[[:alpha:]] { BEGIN INTAG;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
}
"</"[[:alpha:]] { BEGIN INTAG;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
}
"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
"<>" {
<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" {
BEGIN INITIAL;
*tsearch_yytext=' '; *(tsearch_yytext+1) = '\0';
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return S
YMTAG
;
return S
PACE
;
}
"<"[^>[:alpha:]] {
"<!--" { BEGIN INCOMMENT; }
<INCOMMENT>"-->" {
BEGIN INITIAL;
*tsearch_yytext=' '; *(tsearch_yytext+1) = '\0';
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SPACE;
}
<INTAG>"\"" { BEGIN QINTAG;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
}
<QINTAG>"\\\"" {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
}
"<"[\![:alpha:]] { BEGIN INTAG; }
<QINTAG>"\"" { BEGIN INTAG;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
}
"</"[[:alpha:]] { BEGIN INTAG; }
<QINTAG>.|\n {
<INTAG>"\"" { BEGIN QINTAG; }
<QINTAG>"\\\"" ;
<QINTAG>"\"" { BEGIN INTAG; }
<INTAG>">" {
BEGIN INITIAL;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
*tsearch_yytext=' ';
token = tsearch_yytext;
tokenlen = 1;
return TAG;
}
<INTAG>">" { BEGIN INITIAL;
<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n ;
\&(quot|amp|nbsp|lt|gt)\; {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return
SYMTAG
;
}
return
HTMLENTITY
;
}
<INTAG>.|\n
{
\&\#[0-9][0-9]?[0-9]?\;
{
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return
SYMTAG
;
return
HTMLENTITY
;
}
[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
token = tsearch_yytext;
...
...
@@ -148,22 +140,34 @@ URI [-_[:alnum:]/%,\.;=&?#]+
return EMAIL;
}
<DELIM,INITIAL>[0-9] /* digit's and point (might be a version) */
{
[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */
{
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return FINT;
return SCIENTIFIC;
}
[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return VERSIONNUMBER;
}
[+-]?[0-9]+\.[0-9]+ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return DECIMAL;
}
<DELIM,INITIAL>[0-9]+[0-9\.]*[0-9] /* digit's and point (might be a version) */
{
[+-][0-9]+
{
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return
F
INT;
return
SIGNED
INT;
}
[+-]?[0-9\.]+[eE][+-]?[0-9]+ /* float */
{
<DELIM,INITIAL>[0-9]+
{
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return
FLOA
T;
return
UNSIGNEDIN
T;
}
http"://" {
...
...
@@ -208,52 +212,58 @@ ftp"://" {
return FILEPATH;
}
({
NONLATINALNUM}+-)+{NONLATIN
ALPHA}+ /* composite-word */ {
({
CYRALPHA}+-)+{CYR
ALPHA}+ /* composite-word */ {
BEGIN DELIM;
if (s) { free(s); s=NULL; }
s = strdup( tsearch_yytext );
tokenlen = tsearch_yyleng;
yyless( 0 );
token = s;
return
DEFISNONLATI
NWORD;
return
CYRHYPHE
NWORD;
}
([[:al
num
:]]+-)+[[:alpha:]]+ /* composite-word */ {
([[:al
pha
:]]+-)+[[:alpha:]]+ /* composite-word */ {
BEGIN DELIM;
if (s) { free(s); s=NULL; }
tokenlen = tsearch_yyleng;
s = strdup( tsearch_yytext );
tokenlen = tsearch_yyleng;
yyless( 0 );
token = s;
return
DEFISLAT
WORD;
return
LATHYPHEN
WORD;
}
({ALNUM}+-)+{AL
PHA
}+ /* composite-word */ {
({ALNUM}+-)+{AL
NUM
}+ /* composite-word */ {
BEGIN DELIM;
if (s) { free(s); s=NULL; }
s = strdup( tsearch_yytext );
tokenlen = tsearch_yyleng;
yyless( 0 );
token = s;
return DEFISWORD;
return HYPHENWORD;
}
<DELIM>\+?[0-9]+\.[0-9]+ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return DECIMAL;
}
<DELIM>{
NONLATINALNUM
}+ /* one word in composite-word */ {
<DELIM>{
CYRALPHA
}+ /* one word in composite-word */ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return
NONLATINPART
WORD;
return
CYRPARTHYPHEN
WORD;
}
<DELIM>[[:al
num
:]]+ /* one word in composite-word */ {
<DELIM>[[:al
pha
:]]+ /* one word in composite-word */ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return LATPARTWORD;
return LATPART
HYPHEN
WORD;
}
<DELIM>{ALNUM}+ /* one word in composite-word */ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return PARTWORD;
return PART
HYPHEN
WORD;
}
<DELIM>- {
...
...
@@ -264,17 +274,16 @@ ftp"://" {
<DELIM,SERVER,URL>.|\n /* return in basic state */ {
BEGIN INITIAL;
tokenlen = tsearch_yyleng;
yyless( 0 );
}
{
NONLATINALNUM
}+ /* normal word */ {
{
CYRALPHA
}+ /* normal word */ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return
NONLATIN
WORD;
return
CYR
WORD;
}
[[:al
num
:]]+ /* normal word */ {
[[:al
pha
:]]+ /* normal word */ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return LATWORD;
...
...
@@ -286,7 +295,13 @@ ftp"://" {
return UWORD;
}
.|\n {
[ \r\n\t]+ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SPACE;
}
. {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SPACE;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment