Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
7ac8a4be
Commit
7ac8a4be
authored
Dec 21, 2005
by
Teodor Sigaev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Multibyte encodings support for ISpell dictionary
parent
e3b98527
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
232 additions
and
147 deletions
+232
-147
contrib/tsearch2/ispell/regis.c
contrib/tsearch2/ispell/regis.c
+80
-56
contrib/tsearch2/ispell/regis.h
contrib/tsearch2/ispell/regis.h
+4
-4
contrib/tsearch2/ispell/spell.c
contrib/tsearch2/ispell/spell.c
+145
-84
contrib/tsearch2/stopword.c
contrib/tsearch2/stopword.c
+2
-2
contrib/tsearch2/ts_locale.h
contrib/tsearch2/ts_locale.h
+1
-1
No files found.
contrib/tsearch2/ispell/regis.c
View file @
7ac8a4be
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#include <string.h>
#include <ctype.h>
#include "regis.h"
#include "regis.h"
#include "ts_locale.h"
#include "common.h"
#include "common.h"
int
bool
RS_isRegis
(
const
char
*
str
)
RS_isRegis
(
const
char
*
str
)
{
{
unsigned
char
*
ptr
=
(
unsigned
char
*
)
str
;
unsigned
char
*
ptr
=
(
unsigned
char
*
)
str
;
while
(
ptr
&&
*
ptr
)
while
(
ptr
&&
*
ptr
)
if
(
isalpha
(
*
ptr
)
||
*
ptr
==
'['
||
*
ptr
==
']'
||
*
ptr
==
'^'
)
if
(
t_isalpha
(
ptr
)
||
t_iseq
(
ptr
,
'['
)
||
t_iseq
(
ptr
,
']'
)
||
t_iseq
(
ptr
,
'^'
)
)
ptr
+
+
;
ptr
+
=
pg_mblen
(
ptr
)
;
else
else
return
0
;
return
false
;
return
1
;
return
true
;
}
}
#define RS_IN_ONEOF 1
#define RS_IN_ONEOF 1
...
@@ -38,34 +39,32 @@ newRegisNode(RegisNode * prev, int len)
...
@@ -38,34 +39,32 @@ newRegisNode(RegisNode * prev, int len)
return
ptr
;
return
ptr
;
}
}
int
void
RS_compile
(
Regis
*
r
,
int
issuffix
,
const
char
*
str
)
RS_compile
(
Regis
*
r
,
bool
issuffix
,
char
*
str
)
{
{
int
i
,
int
len
=
strlen
(
str
);
len
=
strlen
(
str
);
int
state
=
RS_IN_WAIT
;
int
state
=
RS_IN_WAIT
;
char
*
c
=
(
char
*
)
str
;
RegisNode
*
ptr
=
NULL
;
RegisNode
*
ptr
=
NULL
;
memset
(
r
,
0
,
sizeof
(
Regis
));
memset
(
r
,
0
,
sizeof
(
Regis
));
r
->
issuffix
=
(
issuffix
)
?
1
:
0
;
r
->
issuffix
=
(
issuffix
)
?
1
:
0
;
for
(
i
=
0
;
i
<
len
;
i
++
)
while
(
*
c
)
{
{
unsigned
char
c
=
*
(((
unsigned
char
*
)
str
)
+
i
);
if
(
state
==
RS_IN_WAIT
)
if
(
state
==
RS_IN_WAIT
)
{
{
if
(
isalpha
(
c
))
if
(
t_
isalpha
(
c
))
{
{
if
(
ptr
)
if
(
ptr
)
ptr
=
newRegisNode
(
ptr
,
len
);
ptr
=
newRegisNode
(
ptr
,
len
);
else
else
ptr
=
r
->
node
=
newRegisNode
(
NULL
,
len
);
ptr
=
r
->
node
=
newRegisNode
(
NULL
,
len
);
ptr
->
data
[
0
]
=
c
;
COPYCHAR
(
ptr
->
data
,
c
)
;
ptr
->
type
=
RSF_ONEOF
;
ptr
->
type
=
RSF_ONEOF
;
ptr
->
len
=
1
;
ptr
->
len
=
pg_mblen
(
c
)
;
}
}
else
if
(
c
==
'['
)
else
if
(
t_iseq
(
c
,
'['
)
)
{
{
if
(
ptr
)
if
(
ptr
)
ptr
=
newRegisNode
(
ptr
,
len
);
ptr
=
newRegisNode
(
ptr
,
len
);
...
@@ -75,38 +74,39 @@ RS_compile(Regis * r, int issuffix, const char *str)
...
@@ -75,38 +74,39 @@ RS_compile(Regis * r, int issuffix, const char *str)
state
=
RS_IN_ONEOF
;
state
=
RS_IN_ONEOF
;
}
}
else
else
ts_error
(
ERROR
,
"Error in regis: %s
at pos %d
\n
"
,
str
,
i
+
1
);
ts_error
(
ERROR
,
"Error in regis: %s
"
,
str
);
}
}
else
if
(
state
==
RS_IN_ONEOF
)
else
if
(
state
==
RS_IN_ONEOF
)
{
{
if
(
c
==
'^'
)
if
(
t_iseq
(
c
,
'^'
)
)
{
{
ptr
->
type
=
RSF_NONEOF
;
ptr
->
type
=
RSF_NONEOF
;
state
=
RS_IN_NONEOF
;
state
=
RS_IN_NONEOF
;
}
}
else
if
(
isalpha
(
c
))
else
if
(
t_
isalpha
(
c
))
{
{
ptr
->
data
[
0
]
=
c
;
COPYCHAR
(
ptr
->
data
,
c
)
;
ptr
->
len
=
1
;
ptr
->
len
=
pg_mblen
(
c
)
;
state
=
RS_IN_ONEOF_IN
;
state
=
RS_IN_ONEOF_IN
;
}
}
else
else
ts_error
(
ERROR
,
"Error in regis: %s
at pos %d
\n
"
,
str
,
i
+
1
);
ts_error
(
ERROR
,
"Error in regis: %s
"
,
str
);
}
}
else
if
(
state
==
RS_IN_ONEOF_IN
||
state
==
RS_IN_NONEOF
)
else
if
(
state
==
RS_IN_ONEOF_IN
||
state
==
RS_IN_NONEOF
)
{
{
if
(
isalpha
(
c
))
if
(
t_
isalpha
(
c
))
{
{
ptr
->
data
[
ptr
->
len
]
=
c
;
COPYCHAR
(
ptr
->
data
+
ptr
->
len
,
c
)
;
ptr
->
len
+
+
;
ptr
->
len
+
=
pg_mblen
(
c
)
;
}
}
else
if
(
c
==
']'
)
else
if
(
t_iseq
(
c
,
']'
)
)
state
=
RS_IN_WAIT
;
state
=
RS_IN_WAIT
;
else
else
ts_error
(
ERROR
,
"Error in regis: %s
at pos %d
\n
"
,
str
,
i
+
1
);
ts_error
(
ERROR
,
"Error in regis: %s
"
,
str
);
}
}
else
else
ts_error
(
ERROR
,
"Internal error in RS_compile: %d
\n
"
,
state
);
ts_error
(
ERROR
,
"Internal error in RS_compile: %d"
,
state
);
c
+=
pg_mblen
(
c
);
}
}
ptr
=
r
->
node
;
ptr
=
r
->
node
;
...
@@ -115,8 +115,6 @@ RS_compile(Regis * r, int issuffix, const char *str)
...
@@ -115,8 +115,6 @@ RS_compile(Regis * r, int issuffix, const char *str)
r
->
nchar
++
;
r
->
nchar
++
;
ptr
=
ptr
->
next
;
ptr
=
ptr
->
next
;
}
}
return
0
;
}
}
void
void
...
@@ -135,51 +133,77 @@ RS_free(Regis * r)
...
@@ -135,51 +133,77 @@ RS_free(Regis * r)
r
->
node
=
NULL
;
r
->
node
=
NULL
;
}
}
int
#ifdef TS_USE_WIDE
RS_execute
(
Regis
*
r
,
const
char
*
str
,
int
len
)
static
bool
mb_strchr
(
char
*
str
,
char
*
c
)
{
int
clen
=
pg_mblen
(
c
),
plen
,
i
;
char
*
ptr
=
str
;
bool
res
=
false
;
clen
=
pg_mblen
(
c
);
while
(
*
ptr
&&
!
res
)
{
plen
=
pg_mblen
(
ptr
);
if
(
plen
==
clen
)
{
i
=
plen
;
res
=
true
;
while
(
i
--
)
if
(
*
(
ptr
+
i
)
!=
*
(
c
+
i
)
)
{
res
=
false
;
break
;
}
}
ptr
+=
plen
;
}
return
res
;
}
#else
#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true )
#endif
bool
RS_execute
(
Regis
*
r
,
char
*
str
)
{
{
RegisNode
*
ptr
=
r
->
node
;
RegisNode
*
ptr
=
r
->
node
;
unsigned
char
*
c
;
char
*
c
=
str
;
int
len
=
0
;
if
(
len
<
0
)
while
(
*
c
)
{
len
=
strlen
(
str
);
len
++
;
c
+=
pg_mblen
(
c
);
}
if
(
len
<
r
->
nchar
)
if
(
len
<
r
->
nchar
)
return
0
;
return
0
;
if
(
r
->
issuffix
)
c
=
str
;
c
=
((
unsigned
char
*
)
str
)
+
len
-
r
->
nchar
;
if
(
r
->
issuffix
)
{
else
len
-=
r
->
nchar
;
c
=
(
unsigned
char
*
)
str
;
while
(
len
--
>
0
)
c
+=
pg_mblen
(
c
);
}
while
(
ptr
)
while
(
ptr
)
{
{
switch
(
ptr
->
type
)
switch
(
ptr
->
type
)
{
{
case
RSF_ONEOF
:
case
RSF_ONEOF
:
if
(
ptr
->
len
==
0
)
if
(
mb_strchr
((
char
*
)
ptr
->
data
,
c
)
!=
true
)
{
return
false
;
if
(
*
c
!=
*
(
ptr
->
data
))
return
0
;
}
else
if
(
strchr
((
char
*
)
ptr
->
data
,
*
c
)
==
NULL
)
return
0
;
break
;
break
;
case
RSF_NONEOF
:
case
RSF_NONEOF
:
if
(
ptr
->
len
==
0
)
if
(
mb_strchr
((
char
*
)
ptr
->
data
,
c
)
==
true
)
{
return
false
;
if
(
*
c
==
*
(
ptr
->
data
))
return
0
;
}
else
if
(
strchr
((
char
*
)
ptr
->
data
,
*
c
)
!=
NULL
)
return
0
;
break
;
break
;
default:
default:
ts_error
(
ERROR
,
"RS_execute: Unknown type node: %d
\n
"
,
ptr
->
type
);
ts_error
(
ERROR
,
"RS_execute: Unknown type node: %d
\n
"
,
ptr
->
type
);
}
}
ptr
=
ptr
->
next
;
ptr
=
ptr
->
next
;
c
+
+
;
c
+
=
pg_mblen
(
c
)
;
}
}
return
1
;
return
true
;
}
}
contrib/tsearch2/ispell/regis.h
View file @
7ac8a4be
...
@@ -27,12 +27,12 @@ typedef struct Regis
...
@@ -27,12 +27,12 @@ typedef struct Regis
unused:
15
;
unused:
15
;
}
Regis
;
}
Regis
;
int
RS_isRegis
(
const
char
*
str
);
bool
RS_isRegis
(
const
char
*
str
);
int
RS_compile
(
Regis
*
r
,
int
issuffix
,
const
char
*
str
);
void
RS_compile
(
Regis
*
r
,
bool
issuffix
,
char
*
str
);
void
RS_free
(
Regis
*
r
);
void
RS_free
(
Regis
*
r
);
/*
1
*/
/*
returns true if matches
*/
int
RS_execute
(
Regis
*
r
,
const
char
*
str
,
int
len
);
bool
RS_execute
(
Regis
*
r
,
char
*
str
);
#endif
#endif
contrib/tsearch2/ispell/spell.c
View file @
7ac8a4be
...
@@ -6,6 +6,7 @@
...
@@ -6,6 +6,7 @@
#include "postgres.h"
#include "postgres.h"
#include "spell.h"
#include "spell.h"
#include "common.h"
#include "ts_locale.h"
#include "ts_locale.h"
#define MAX_NORM 1024
#define MAX_NORM 1024
...
@@ -13,7 +14,7 @@
...
@@ -13,7 +14,7 @@
#define ERRSTRSIZE 1024
#define ERRSTRSIZE 1024
#define STRNC
ASECMP(x,y) pg_strncasecmp(x, y, strlen(y)
)
#define STRNC
MP(s,p) strncmp( (s), (p), strlen(p)
)
#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
...
@@ -41,6 +42,18 @@ strnduplicate(char *s, int len)
...
@@ -41,6 +42,18 @@ strnduplicate(char *s, int len)
return
d
;
return
d
;
}
}
static
char
*
findchar
(
char
*
str
,
int
c
)
{
while
(
*
str
)
{
if
(
t_iseq
(
str
,
c
)
)
return
str
;
str
+=
pg_mblen
(
str
);
}
return
NULL
;
}
/* backward string compare for suffix tree operations */
/* backward string compare for suffix tree operations */
static
int
static
int
strbcmp
(
const
unsigned
char
*
s1
,
const
unsigned
char
*
s2
)
strbcmp
(
const
unsigned
char
*
s1
,
const
unsigned
char
*
s2
)
...
@@ -145,15 +158,17 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
...
@@ -145,15 +158,17 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
char
*
s
;
char
*
s
;
const
char
*
flag
;
const
char
*
flag
;
pg_verifymbstr
(
str
,
strlen
(
str
),
false
);
flag
=
NULL
;
flag
=
NULL
;
if
((
s
=
strch
r
(
str
,
'/'
)))
if
((
s
=
findcha
r
(
str
,
'/'
)))
{
{
*
s
++
=
'\0'
;
*
s
++
=
'\0'
;
flag
=
s
;
flag
=
s
;
while
(
*
s
)
while
(
*
s
)
{
{
if
(
isprint
((
unsigned
char
)
*
s
)
&&
/* we allow only single encoded flags for faster works */
!
isspace
((
unsigned
char
)
*
s
))
if
(
pg_mblen
(
s
)
==
1
&&
t_isprint
(
s
)
&&
!
t_isspace
(
s
))
s
++
;
s
++
;
else
else
{
{
...
@@ -164,16 +179,19 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
...
@@ -164,16 +179,19 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
}
}
else
else
flag
=
""
;
flag
=
""
;
lowerstr
(
str
);
/* Dont load words if first letter is not required */
/* It allows to optimize loading at search time */
s
=
str
;
s
=
str
;
while
(
*
s
)
while
(
*
s
)
{
{
if
(
*
s
==
'\r'
||
*
s
==
'\n'
)
if
(
t_isspace
(
s
))
{
*
s
=
'\0'
;
*
s
=
'\0'
;
s
++
;
break
;
}
s
+=
pg_mblen
(
s
);
}
}
lowerstr
(
str
);
NIAddSpell
(
Conf
,
str
,
flag
);
NIAddSpell
(
Conf
,
str
,
flag
);
}
}
fclose
(
dict
);
fclose
(
dict
);
...
@@ -253,9 +271,10 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
...
@@ -253,9 +271,10 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
}
}
else
else
{
{
int
masklen
=
strlen
(
mask
);
Conf
->
Affix
[
Conf
->
naffixes
].
issimple
=
0
;
Conf
->
Affix
[
Conf
->
naffixes
].
issimple
=
0
;
Conf
->
Affix
[
Conf
->
naffixes
].
isregis
=
0
;
Conf
->
Affix
[
Conf
->
naffixes
].
isregis
=
0
;
Conf
->
Affix
[
Conf
->
naffixes
].
mask
=
(
char
*
)
malloc
(
strlen
(
mask
)
+
2
);
Conf
->
Affix
[
Conf
->
naffixes
].
mask
=
(
char
*
)
malloc
(
masklen
+
2
);
if
(
type
==
FF_SUFFIX
)
if
(
type
==
FF_SUFFIX
)
sprintf
(
Conf
->
Affix
[
Conf
->
naffixes
].
mask
,
"%s$"
,
mask
);
sprintf
(
Conf
->
Affix
[
Conf
->
naffixes
].
mask
,
"%s$"
,
mask
);
else
else
...
@@ -277,37 +296,93 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
...
@@ -277,37 +296,93 @@ NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const
return
(
0
);
return
(
0
);
}
}
static
char
*
#define PAE_WAIT_MASK 0
remove_spaces
(
char
*
dist
,
char
*
src
)
#define PAE_INMASK 1
{
#define PAE_WAIT_FIND 2
char
*
d
,
#define PAE_INFIND 3
*
s
;
#define PAE_WAIT_REPL 4
#define PAE_INREPL 5
static
bool
parse_affentry
(
char
*
str
,
char
*
mask
,
char
*
find
,
char
*
repl
)
{
int
state
=
PAE_WAIT_MASK
;
char
*
pmask
=
mask
,
*
pfind
=
find
,
*
prepl
=
repl
;
*
mask
=
*
find
=
*
repl
=
'\0'
;
while
(
*
str
)
{
if
(
state
==
PAE_WAIT_MASK
)
{
if
(
t_iseq
(
str
,
'#'
)
)
return
false
;
else
if
(
!
t_isspace
(
str
))
{
COPYCHAR
(
pmask
,
str
);
pmask
+=
pg_mblen
(
str
);
state
=
PAE_INMASK
;
}
}
else
if
(
state
==
PAE_INMASK
)
{
if
(
t_iseq
(
str
,
'>'
)
)
{
*
pmask
=
'\0'
;
state
=
PAE_WAIT_FIND
;
}
else
if
(
!
t_isspace
(
str
))
{
COPYCHAR
(
pmask
,
str
);
pmask
+=
pg_mblen
(
str
);
}
}
else
if
(
state
==
PAE_WAIT_FIND
)
{
if
(
t_iseq
(
str
,
'-'
)
)
{
state
=
PAE_INFIND
;
}
else
if
(
t_isalpha
(
str
))
{
COPYCHAR
(
prepl
,
str
);
prepl
+=
pg_mblen
(
str
);
state
=
PAE_INREPL
;
}
else
if
(
!
t_isspace
(
str
))
ts_error
(
ERROR
,
"Affix parse error"
);
}
else
if
(
state
==
PAE_INFIND
)
{
if
(
t_iseq
(
str
,
','
)
)
{
*
pfind
=
'\0'
;
state
=
PAE_WAIT_REPL
;
}
else
if
(
t_isalpha
(
str
))
{
COPYCHAR
(
pfind
,
str
);
pfind
+=
pg_mblen
(
str
);
}
else
if
(
!
t_isspace
(
str
))
ts_error
(
ERROR
,
"Affix parse error"
);
}
else
if
(
state
==
PAE_WAIT_REPL
)
{
if
(
t_iseq
(
str
,
'-'
)
)
{
break
;
/* void repl */
}
else
if
(
t_isalpha
(
str
)
)
{
COPYCHAR
(
prepl
,
str
);
prepl
+=
pg_mblen
(
str
);
state
=
PAE_INREPL
;
}
else
if
(
!
t_isspace
(
str
))
ts_error
(
ERROR
,
"Affix parse error"
);
}
else
if
(
state
==
PAE_INREPL
)
{
if
(
t_iseq
(
str
,
'#'
)
)
{
*
prepl
=
'\0'
;
break
;
}
else
if
(
t_isalpha
(
str
)
)
{
COPYCHAR
(
prepl
,
str
);
prepl
+=
pg_mblen
(
str
);
}
else
if
(
!
t_isspace
(
str
))
ts_error
(
ERROR
,
"Affix parse error"
);
}
else
ts_error
(
ERROR
,
"Unknown state in parse_affentry: %d"
,
state
);
d
=
dist
;
str
+=
pg_mblen
(
str
);
s
=
src
;
while
(
*
s
)
{
if
(
*
s
!=
' '
&&
*
s
!=
'-'
&&
*
s
!=
'\t'
)
{
*
d
=
*
s
;
d
++
;
}
s
++
;
}
}
*
d
=
0
;
return
(
dist
);
}
*
pmask
=
*
pfind
=
*
prepl
=
'\0'
;
return
(
*
mask
&&
(
*
find
||
*
repl
)
)
?
true
:
false
;
}
int
int
NIImportAffixes
(
IspellDict
*
Conf
,
const
char
*
filename
)
NIImportAffixes
(
IspellDict
*
Conf
,
const
char
*
filename
)
{
{
char
str
[
BUFSIZ
];
char
str
[
BUFSIZ
];
char
tmpstr
[
BUFSIZ
];
char
mask
[
BUFSIZ
];
char
mask
[
BUFSIZ
];
char
find
[
BUFSIZ
];
char
find
[
BUFSIZ
];
char
repl
[
BUFSIZ
];
char
repl
[
BUFSIZ
];
char
*
s
;
char
*
s
;
int
i
;
int
suffixes
=
0
;
int
suffixes
=
0
;
int
prefixes
=
0
;
int
prefixes
=
0
;
int
flag
=
0
;
int
flag
=
0
;
...
@@ -320,37 +395,45 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
...
@@ -320,37 +395,45 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
while
(
fgets
(
str
,
sizeof
(
str
),
affix
))
while
(
fgets
(
str
,
sizeof
(
str
),
affix
))
{
{
if
(
STRNCASECMP
(
str
,
"compoundwords"
)
==
0
)
pg_verifymbstr
(
str
,
strlen
(
str
),
false
);
memcpy
(
tmpstr
,
str
,
32
);
/* compoundwords... */
tmpstr
[
32
]
=
'\0'
;
lowerstr
(
tmpstr
);
if
(
STRNCMP
(
tmpstr
,
"compoundwords"
)
==
0
)
{
{
s
=
strch
r
(
str
,
'l'
);
s
=
findcha
r
(
str
,
'l'
);
if
(
s
)
if
(
s
)
{
{
while
(
*
s
!=
' '
)
while
(
*
s
&&
!
t_isspace
(
s
))
s
++
;
s
++
;
while
(
*
s
&&
t_isspace
(
s
))
s
++
;
while
(
*
s
==
' '
)
if
(
*
s
&&
pg_mblen
(
s
)
==
1
)
s
++
;
Conf
->
compoundcontrol
=
*
s
;
Conf
->
compoundcontrol
=
*
s
;
continue
;
continue
;
}
}
}
}
if
(
STRNC
ASECMP
(
str
,
"suffixes"
)
==
0
)
if
(
STRNC
MP
(
tmp
str
,
"suffixes"
)
==
0
)
{
{
suffixes
=
1
;
suffixes
=
1
;
prefixes
=
0
;
prefixes
=
0
;
continue
;
continue
;
}
}
if
(
STRNC
ASECMP
(
str
,
"prefixes"
)
==
0
)
if
(
STRNC
MP
(
tmp
str
,
"prefixes"
)
==
0
)
{
{
suffixes
=
0
;
suffixes
=
0
;
prefixes
=
1
;
prefixes
=
1
;
continue
;
continue
;
}
}
if
(
STRNC
ASECMP
(
str
,
"flag
"
)
==
0
)
if
(
STRNC
MP
(
tmpstr
,
"flag
"
)
==
0
)
{
{
s
=
str
+
5
;
s
=
str
+
4
;
flagflags
=
0
;
flagflags
=
0
;
while
(
*
s
==
' '
)
s
++
;
while
(
*
s
&&
t_isspace
(
s
))
s
++
;
/* allow only single-encoded flags */
if
(
pg_mblen
(
s
)
!=
1
)
continue
;
if
(
*
s
==
'*'
)
if
(
*
s
==
'*'
)
{
{
flagflags
|=
FF_CROSSPRODUCT
;
flagflags
|=
FF_CROSSPRODUCT
;
...
@@ -365,43 +448,23 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
...
@@ -365,43 +448,23 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
if
(
*
s
==
'\\'
)
if
(
*
s
==
'\\'
)
s
++
;
s
++
;
/* allow only single-encoded flags */
if
(
pg_mblen
(
s
)
!=
1
)
{
flagflags
=
0
;
continue
;
}
flag
=
(
unsigned
char
)
*
s
;
flag
=
(
unsigned
char
)
*
s
;
continue
;
continue
;
}
}
if
((
!
suffixes
)
&&
(
!
prefixes
))
if
((
!
suffixes
)
&&
(
!
prefixes
))
continue
;
continue
;
if
((
s
=
strchr
(
str
,
'#'
)))
*
s
=
0
;
if
(
!*
str
)
continue
;
lowerstr
(
str
);
lowerstr
(
str
);
strcpy
(
mask
,
""
);
if
(
!
parse_affentry
(
str
,
mask
,
find
,
repl
)
)
strcpy
(
find
,
""
);
strcpy
(
repl
,
""
);
i
=
sscanf
(
str
,
"%[^>
\n
]>%[^,
\n
],%[^
\n
]"
,
mask
,
find
,
repl
);
remove_spaces
(
str
,
repl
);
strcpy
(
repl
,
str
);
remove_spaces
(
str
,
find
);
strcpy
(
find
,
str
);
remove_spaces
(
str
,
mask
);
strcpy
(
mask
,
str
);
switch
(
i
)
{
case
3
:
break
;
case
2
:
if
(
*
find
!=
'\0'
)
{
strcpy
(
repl
,
find
);
strcpy
(
find
,
""
);
}
break
;
default:
continue
;
continue
;
}
NIAddAffix
(
Conf
,
flag
,
flagflags
,
mask
,
find
,
repl
,
suffixes
?
FF_SUFFIX
:
FF_PREFIX
);
NIAddAffix
(
Conf
,
flag
,
flagflags
,
mask
,
find
,
repl
,
suffixes
?
FF_SUFFIX
:
FF_PREFIX
);
}
}
fclose
(
affix
);
fclose
(
affix
);
...
@@ -768,30 +831,28 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
...
@@ -768,30 +831,28 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
{
{
if
(
Affix
->
compile
)
if
(
Affix
->
compile
)
{
{
RS_compile
(
&
(
Affix
->
reg
.
regis
),
(
Affix
->
type
==
FF_SUFFIX
)
?
1
:
0
,
Affix
->
mask
);
RS_compile
(
&
(
Affix
->
reg
.
regis
),
(
Affix
->
type
==
FF_SUFFIX
)
?
true
:
false
,
Affix
->
mask
);
Affix
->
compile
=
0
;
Affix
->
compile
=
0
;
}
}
if
(
RS_execute
(
&
(
Affix
->
reg
.
regis
),
newword
,
-
1
))
if
(
RS_execute
(
&
(
Affix
->
reg
.
regis
),
newword
))
return
newword
;
return
newword
;
}
}
else
else
{
{
regmatch_t
subs
[
2
];
/* workaround for apache&linux */
int
err
;
int
err
;
pg_wchar
*
data
;
pg_wchar
*
data
;
size_t
data_len
;
size_t
data_len
;
int
dat
_len
;
int
newword
_len
;
if
(
Affix
->
compile
)
if
(
Affix
->
compile
)
{
{
int
wmasklen
,
int
wmasklen
,
masklen
=
strlen
(
Affix
->
mask
);
masklen
=
strlen
(
Affix
->
mask
);
pg_wchar
*
mask
;
pg_wchar
*
mask
;
mask
=
(
pg_wchar
*
)
palloc
((
masklen
+
1
)
*
sizeof
(
pg_wchar
));
mask
=
(
pg_wchar
*
)
palloc
((
masklen
+
1
)
*
sizeof
(
pg_wchar
));
wmasklen
=
pg_mb2wchar_with_len
(
Affix
->
mask
,
mask
,
masklen
);
wmasklen
=
pg_mb2wchar_with_len
(
Affix
->
mask
,
mask
,
masklen
);
err
=
pg_regcomp
(
&
(
Affix
->
reg
.
regex
),
mask
,
wmasklen
,
REG_
EXTENDED
|
REG_ICASE
|
REG_NOSUB
);
err
=
pg_regcomp
(
&
(
Affix
->
reg
.
regex
),
mask
,
wmasklen
,
REG_
ADVANCED
|
REG_NOSUB
);
pfree
(
mask
);
pfree
(
mask
);
if
(
err
)
if
(
err
)
{
{
...
@@ -804,11 +865,11 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
...
@@ -804,11 +865,11 @@ CheckAffix(const char *word, size_t len, AFFIX * Affix, char flagflags, char *ne
}
}
/* Convert data string to wide characters */
/* Convert data string to wide characters */
dat
_len
=
strlen
(
newword
);
newword
_len
=
strlen
(
newword
);
data
=
(
pg_wchar
*
)
palloc
((
dat
_len
+
1
)
*
sizeof
(
pg_wchar
));
data
=
(
pg_wchar
*
)
palloc
((
newword
_len
+
1
)
*
sizeof
(
pg_wchar
));
data_len
=
pg_mb2wchar_with_len
(
newword
,
data
,
dat
_len
);
data_len
=
pg_mb2wchar_with_len
(
newword
,
data
,
newword
_len
);
if
(
!
(
err
=
pg_regexec
(
&
(
Affix
->
reg
.
regex
),
data
,
dat
_len
,
0
,
NULL
,
1
,
subs
,
0
)))
if
(
!
(
err
=
pg_regexec
(
&
(
Affix
->
reg
.
regex
),
data
,
dat
a_len
,
0
,
NULL
,
0
,
NULL
,
0
)))
{
{
pfree
(
data
);
pfree
(
data
);
return
newword
;
return
newword
;
...
...
contrib/tsearch2/stopword.c
View file @
7ac8a4be
...
@@ -4,8 +4,6 @@
...
@@ -4,8 +4,6 @@
*/
*/
#include "postgres.h"
#include "postgres.h"
#include <ctype.h>
#include "miscadmin.h"
#include "miscadmin.h"
#include "common.h"
#include "common.h"
...
@@ -71,6 +69,8 @@ readstoplist(text *in, StopList * s)
...
@@ -71,6 +69,8 @@ readstoplist(text *in, StopList * s)
while
(
fgets
(
buf
,
STOPBUFLEN
,
hin
))
while
(
fgets
(
buf
,
STOPBUFLEN
,
hin
))
{
{
buf
[
strlen
(
buf
)
-
1
]
=
'\0'
;
buf
[
strlen
(
buf
)
-
1
]
=
'\0'
;
pg_verifymbstr
(
buf
,
strlen
(
buf
),
false
);
lowerstr
(
buf
);
if
(
*
buf
==
'\0'
)
if
(
*
buf
==
'\0'
)
continue
;
continue
;
...
...
contrib/tsearch2/ts_locale.h
View file @
7ac8a4be
...
@@ -57,7 +57,7 @@ int _t_isprint( char *ptr );
...
@@ -57,7 +57,7 @@ int _t_isprint( char *ptr );
int lll = pg_mblen( s ); \
int lll = pg_mblen( s ); \
\
\
while( lll-- ) \
while( lll-- ) \
TOUCHAR(
d+lll) = TOUCHAR(s
+lll); \
TOUCHAR(
(d)+lll) = TOUCHAR((s)
+lll); \
} while(0)
} while(0)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment