Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
2860041b
Commit
2860041b
authored
Aug 15, 2002
by
Bruce Momjian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
August 13, 2002
Use parser of OpenFTS v0.33. -- Teodor Sigaev
parent
12763562
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
129 additions
and
100 deletions
+129
-100
contrib/tsearch/README.tsearch
contrib/tsearch/README.tsearch
+5
-0
contrib/tsearch/deflex.h
contrib/tsearch/deflex.h
+18
-13
contrib/tsearch/expected/tsearch.out
contrib/tsearch/expected/tsearch.out
+4
-4
contrib/tsearch/morph.c
contrib/tsearch/morph.c
+14
-10
contrib/tsearch/parser.l
contrib/tsearch/parser.l
+88
-73
No files found.
contrib/tsearch/README.tsearch
View file @
2860041b
...
@@ -4,6 +4,11 @@ a searchable data type (textual) with indexed access.
...
@@ -4,6 +4,11 @@ a searchable data type (textual) with indexed access.
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov
(oleg@sai.msu.su).
(oleg@sai.msu.su).
CHANGES:
August 13, 2002
Use parser of OpenFTS v0.33.
IMPORTANT NOTICE:
IMPORTANT NOTICE:
This is a first step of our work on integration of OpenFTS
This is a first step of our work on integration of OpenFTS
...
...
contrib/tsearch/deflex.h
View file @
2860041b
...
@@ -2,28 +2,33 @@
...
@@ -2,28 +2,33 @@
#define __DEFLEX_H__
#define __DEFLEX_H__
/* rememder !!!! */
/* rememder !!!! */
#define LASTNUM
19
#define LASTNUM
23
#define LATWORD 1
#define LATWORD 1
#define
NONLATINWORD
2
#define
CYRWORD
2
#define UWORD 3
#define UWORD 3
#define EMAIL 4
#define EMAIL 4
#define FURL 5
#define FURL 5
#define HOST 6
#define HOST 6
#define
FLOAT
7
#define
SCIENTIFIC
7
#define
FINT
8
#define
VERSIONNUMBER
8
#define PART
WORD 9
#define PART
HYPHENWORD 9
#define
NONLATINPARTWORD 10
#define
CYRPARTHYPHENWORD 10
#define LATPART
WORD 11
#define LATPART
HYPHENWORD 11
#define SPACE 12
#define SPACE 12
#define
SYMTAG
13
#define
TAG
13
#define HTTP 14
#define HTTP 14
#define
DEFIS
WORD 15
#define
HYPHEN
WORD 15
#define
DEFISLAT
WORD 16
#define
LATHYPHEN
WORD 16
#define
DEFISNONLATI
NWORD 17
#define
CYRHYPHE
NWORD 17
#define URI 18
#define URI 18
#define FILEPATH 19
#define FILEPATH 19
#define DECIMAL 20
#define SIGNEDINT 21
#define UNSIGNEDINT 22
#define HTMLENTITY 23
extern
const
char
*
descr
[];
extern
const
char
*
descr
[];
#endif
#endif
contrib/tsearch/expected/tsearch.out
View file @
2860041b
...
@@ -690,8 +690,8 @@ select txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&
...
@@ -690,8 +690,8 @@ select txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty');
<i <b> wow < jqw <> qwerty');
txt2txtidx
txt2txtidx
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-------------
'ad' 'dw' 'jf' '234' '345' '4.2' '455' 'jqw' 'qwe' 'wer' 'wow' 'asdf' 'ewr1' 'qwer' 'sdjk' '5.005' 'ewri2' 'qwqwe' 'wefjn' 'gist.c' 'gist.h' 'qwerti' '234.435' ':8100/?' 'qwe-wer' 'readlin' 'www.com' '+4.0e-10' 'gist.h.c' 'rewt/ewr' 'qwe@efd.r' '/?ad=qwe&dw' '/wqe-324/ewr' 'aew.werc.ewr' '1aew.werc.ewr' '2aew.werc.ewr' '3aew.werc.ewr' '4aew.werc.ewr' '5aew.werc.ewr' '6aew.werc.ewr' '7aew.werc.ewr' '/usr/local/fff' '/awdf/dwqe/4325' ':8100/?ad=qwe&dw' 'teodor@stack.net' '5aew.werc.ewr:8100/?' ':8100/?ad=qwe&dw=%20%32' 'aew.werc.ewr/?ad=qwe&dw' '1aew.werc.ewr/?ad=qwe&dw' '3aew.werc.ewr/?ad=qwe&dw' '6aew.werc.ewr:8100/?ad=qwe&dw' '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32'
'ad' 'dw' 'jf' '234' '345' '4.2' '455' 'jqw' 'qwe' 'wer' 'wow' 'asdf' 'ewr1' 'qwer' 'sdjk' '5.005' 'ewri2' 'qwqwe' 'wefjn' 'gist.c' 'gist.h' 'qwerti' '234.435' ':8100/?' 'qwe-wer' 'readlin' 'www.com' '+4.0e-10' 'gist.h.c' 'rewt/ewr' 'qwe@efd.r' '
readline-4' '
/?ad=qwe&dw' '/wqe-324/ewr' 'aew.werc.ewr' '1aew.werc.ewr' '2aew.werc.ewr' '3aew.werc.ewr' '4aew.werc.ewr' '5aew.werc.ewr' '6aew.werc.ewr' '7aew.werc.ewr' '/usr/local/fff' '/awdf/dwqe/4325' ':8100/?ad=qwe&dw' 'teodor@stack.net' '5aew.werc.ewr:8100/?' ':8100/?ad=qwe&dw=%20%32' 'aew.werc.ewr/?ad=qwe&dw' '1aew.werc.ewr/?ad=qwe&dw' '3aew.werc.ewr/?ad=qwe&dw' '6aew.werc.ewr:8100/?ad=qwe&dw' '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32'
(1 row)
(1 row)
select txtidxsize(txt2txtidx('345 qw'));
select txtidxsize(txt2txtidx('345 qw'));
...
@@ -705,7 +705,7 @@ select txtidxsize(txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.e
...
@@ -705,7 +705,7 @@ select txtidxsize(txt2txtidx('345 qwe@efd.r \' http://www.com/ http://aew.werc.e
<i <b> wow < jqw <> qwerty'));
<i <b> wow < jqw <> qwerty'));
txtidxsize
txtidxsize
------------
------------
5
2
5
3
(1 row)
(1 row)
insert into test_txtidx (a) values ('345 qwerty');
insert into test_txtidx (a) values ('345 qwerty');
...
...
contrib/tsearch/morph.c
View file @
2860041b
...
@@ -75,19 +75,23 @@ static MAPDICT mapdict[] = {
...
@@ -75,19 +75,23 @@ static MAPDICT mapdict[] = {
{
NODICT
,
NODICT
},
/* EMAIL */
{
NODICT
,
NODICT
},
/* EMAIL */
{
NODICT
,
NODICT
},
/* FURL */
{
NODICT
,
NODICT
},
/* FURL */
{
NODICT
,
NODICT
},
/* HOST */
{
NODICT
,
NODICT
},
/* HOST */
{
NODICT
,
NODICT
},
/*
FLOAT
*/
{
NODICT
,
NODICT
},
/*
SCIENTIFIC
*/
{
NODICT
,
NODICT
},
/*
FINT
*/
{
NODICT
,
NODICT
},
/*
VERSIONNUMBER
*/
{
BYLOCALE
,
DEFAULTDICT
},
/* PARTWORD */
{
BYLOCALE
,
DEFAULTDICT
},
/* PART
HYPHEN
WORD */
{
BYLOCALE
,
NODICT
},
/*
NONLATINPART
WORD */
{
BYLOCALE
,
NODICT
},
/*
CYRPARTHYPHEN
WORD */
{
DEFAULTDICT
,
NODICT
},
/* LATPARTWORD */
{
DEFAULTDICT
,
NODICT
},
/* LATPART
HYPHEN
WORD */
{
STOPLEXEM
,
NODICT
},
/* SPACE */
{
STOPLEXEM
,
NODICT
},
/* SPACE */
{
STOPLEXEM
,
NODICT
},
/*
SYM
TAG */
{
STOPLEXEM
,
NODICT
},
/* TAG */
{
STOPLEXEM
,
NODICT
},
/* HTTP */
{
STOPLEXEM
,
NODICT
},
/* HTTP */
{
BYLOCALE
,
DEFAULTDICT
},
/*
DEFIS
WORD */
{
BYLOCALE
,
DEFAULTDICT
},
/*
HYPHEN
WORD */
{
DEFAULTDICT
,
NODICT
},
/*
DEFISLAT
WORD */
{
DEFAULTDICT
,
NODICT
},
/*
LATHYPHEN
WORD */
{
BYLOCALE
,
NODICT
},
/*
DEFISNONLATI
NWORD */
{
BYLOCALE
,
NODICT
},
/*
CYRHYPHE
NWORD */
{
NODICT
,
NODICT
},
/* URI */
{
NODICT
,
NODICT
},
/* URI */
{
NODICT
,
NODICT
}
/* FILEPATH */
{
NODICT
,
NODICT
},
/* FILEPATH */
{
NODICT
,
NODICT
},
/* DECIMAL */
{
NODICT
,
NODICT
},
/* SIGNEDINT */
{
NODICT
,
NODICT
},
/* UNSIGNEDINT */
{
STOPLEXEM
,
NODICT
}
/* HTMLENTITY */
};
};
static
bool
inited
=
false
;
static
bool
inited
=
false
;
...
...
contrib/tsearch/parser.l
View file @
2860041b
...
@@ -14,9 +14,8 @@
...
@@ -14,9 +14,8 @@
#endif
#endif
#define strdup pstrdup
#define strdup pstrdup
char *token = NULL; /* pointer to token */
char *token = NULL; /* pointer to token */
char *s = NULL; /*
for returning full defis
-word */
char *s = NULL; /*
to return WHOLE hyphenated
-word */
YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
...
@@ -57,21 +56,21 @@ int bytestoread = 0; /* for limiting read from filehandle */
...
@@ -57,21 +56,21 @@ int bytestoread = 0; /* for limiting read from filehandle */
%option nounput
%option nounput
%option noyywrap
%option noyywrap
/* parser's state for parsing hyphenated-word */
/* parser's state for parsing defis-word */
%x DELIM
%x DELIM
/* parser's state for parsing URL*/
/* parser's state for parsing URL*/
%x URL
%x URL
%x SERVER
%x SERVER
/* parser's state for parsing filepath */
/* parser's state for parsing TAGS */
%x INTAG
%x INTAG
%x QINTAG
%x QINTAG
%x INCOMMENT
%x INSCRIPT
/*
NONLATIN
char */
/*
cyrillic koi8
char */
NONLATIN
ALNUM [0-9\200-\377]
CYR
ALNUM [0-9\200-\377]
NONLATIN
ALPHA [\200-\377]
CYR
ALPHA [\200-\377]
ALPHA [a-zA-Z\200-\377]
ALPHA [a-zA-Z\200-\377]
ALNUM [0-9a-zA-Z\200-\377]
ALNUM [0-9a-zA-Z\200-\377]
...
@@ -81,89 +80,94 @@ URI [-_[:alnum:]/%,\.;=&?#]+
...
@@ -81,89 +80,94 @@ URI [-_[:alnum:]/%,\.;=&?#]+
%%
%%
"<"[[:alpha:]] { BEGIN INTAG;
"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
}
"</"[[:alpha:]] { BEGIN INTAG;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
}
"<>" {
<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" {
BEGIN INITIAL;
*tsearch_yytext=' '; *(tsearch_yytext+1) = '\0';
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return S
YMTAG
;
return S
PACE
;
}
}
"<"[^>[:alpha:]] {
"<!--" { BEGIN INCOMMENT; }
<INCOMMENT>"-->" {
BEGIN INITIAL;
*tsearch_yytext=' '; *(tsearch_yytext+1) = '\0';
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return SPACE;
return SPACE;
}
}
<INTAG>"\"" { BEGIN QINTAG;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SYMTAG;
}
<QINTAG>"\\\"" {
"<"[\![:alpha:]] { BEGIN INTAG; }
"</"[[:alpha:]] { BEGIN INTAG; }
<INTAG>"\"" { BEGIN QINTAG; }
<QINTAG>"\\\"" ;
<QINTAG>"\"" { BEGIN INTAG; }
<INTAG>">" {
BEGIN INITIAL;
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
*tsearch_yytext=' ';
return SYMTAG;
token = tsearch_yytext;
tokenlen = 1;
return TAG;
}
}
<QINTAG>"\"" { BEGIN INTAG;
<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n ;
\&(quot|amp|nbsp|lt|gt)\; {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
SYMTAG
;
return
HTMLENTITY
;
}
}
<QINTAG>.|\n
{
\&\#[0-9][0-9]?[0-9]?\;
{
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
SYMTAG
;
return
HTMLENTITY
;
}
}
<INTAG>">" { BEGIN INITIAL;
[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
SYMTAG;
return
EMAIL;
}
}
<INTAG>.|\n {
[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return S
YMTAG;
return S
CIENTIFIC;
}
}
[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
EMAIL;
return
VERSIONNUMBER;
}
}
<DELIM,INITIAL>[0-9] /* digit's and point (might be a version) */ {
[+-]?[0-9]+\.[0-9]+ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
FINT;
return
DECIMAL;
}
}
<DELIM,INITIAL>[0-9]+[0-9\.]*[0-9] /* digit's and point (might be a version) */
{
[+-][0-9]+
{
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
F
INT;
return
SIGNED
INT;
}
}
[+-]?[0-9\.]+[eE][+-]?[0-9]+ /* float */
{
<DELIM,INITIAL>[0-9]+
{
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
FLOA
T;
return
UNSIGNEDIN
T;
}
}
http"://" {
http"://" {
...
@@ -208,52 +212,58 @@ ftp"://" {
...
@@ -208,52 +212,58 @@ ftp"://" {
return FILEPATH;
return FILEPATH;
}
}
({
NONLATINALNUM}+-)+{NONLATIN
ALPHA}+ /* composite-word */ {
({
CYRALPHA}+-)+{CYR
ALPHA}+ /* composite-word */ {
BEGIN DELIM;
BEGIN DELIM;
if (s) { free(s); s=NULL; }
if (s) { free(s); s=NULL; }
s = strdup( tsearch_yytext );
s = strdup( tsearch_yytext );
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
yyless( 0 );
yyless( 0 );
token = s;
token = s;
return
DEFISNONLATI
NWORD;
return
CYRHYPHE
NWORD;
}
}
([[:al
num
:]]+-)+[[:alpha:]]+ /* composite-word */ {
([[:al
pha
:]]+-)+[[:alpha:]]+ /* composite-word */ {
BEGIN DELIM;
BEGIN DELIM;
if (s) { free(s); s=NULL; }
if (s) { free(s); s=NULL; }
tokenlen = tsearch_yyleng;
s = strdup( tsearch_yytext );
s = strdup( tsearch_yytext );
tokenlen = tsearch_yyleng;
yyless( 0 );
yyless( 0 );
token = s;
token = s;
return
DEFISLAT
WORD;
return
LATHYPHEN
WORD;
}
}
({ALNUM}+-)+{AL
PHA
}+ /* composite-word */ {
({ALNUM}+-)+{AL
NUM
}+ /* composite-word */ {
BEGIN DELIM;
BEGIN DELIM;
if (s) { free(s); s=NULL; }
if (s) { free(s); s=NULL; }
s = strdup( tsearch_yytext );
s = strdup( tsearch_yytext );
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
yyless( 0 );
yyless( 0 );
token = s;
token = s;
return DEFISWORD;
return HYPHENWORD;
}
<DELIM>\+?[0-9]+\.[0-9]+ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return DECIMAL;
}
}
<DELIM>{
NONLATINALNUM
}+ /* one word in composite-word */ {
<DELIM>{
CYRALPHA
}+ /* one word in composite-word */ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
NONLATINPART
WORD;
return
CYRPARTHYPHEN
WORD;
}
}
<DELIM>[[:al
num
:]]+ /* one word in composite-word */ {
<DELIM>[[:al
pha
:]]+ /* one word in composite-word */ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return LATPARTWORD;
return LATPART
HYPHEN
WORD;
}
}
<DELIM>{ALNUM}+ /* one word in composite-word */ {
<DELIM>{ALNUM}+ /* one word in composite-word */ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return PARTWORD;
return PART
HYPHEN
WORD;
}
}
<DELIM>- {
<DELIM>- {
...
@@ -264,17 +274,16 @@ ftp"://" {
...
@@ -264,17 +274,16 @@ ftp"://" {
<DELIM,SERVER,URL>.|\n /* return in basic state */ {
<DELIM,SERVER,URL>.|\n /* return in basic state */ {
BEGIN INITIAL;
BEGIN INITIAL;
tokenlen = tsearch_yyleng;
yyless( 0 );
yyless( 0 );
}
}
{
NONLATINALNUM
}+ /* normal word */ {
{
CYRALPHA
}+ /* normal word */ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return
NONLATIN
WORD;
return
CYR
WORD;
}
}
[[:al
num
:]]+ /* normal word */ {
[[:al
pha
:]]+ /* normal word */ {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return LATWORD;
return LATWORD;
...
@@ -286,7 +295,13 @@ ftp"://" {
...
@@ -286,7 +295,13 @@ ftp"://" {
return UWORD;
return UWORD;
}
}
.|\n {
[ \r\n\t]+ {
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
return SPACE;
}
. {
token = tsearch_yytext;
token = tsearch_yytext;
tokenlen = tsearch_yyleng;
tokenlen = tsearch_yyleng;
return SPACE;
return SPACE;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment