Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
c63c1946
Commit
c63c1946
authored
Nov 17, 2003
by
Teodor Sigaev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Optimize. Improve ispell support for compound words. This work was sponsored by ABC Startsiden AS.
parent
6a04c571
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
706 additions
and
294 deletions
+706
-294
contrib/tsearch2/dict_ispell.c
contrib/tsearch2/dict_ispell.c
+6
-6
contrib/tsearch2/ispell/spell.c
contrib/tsearch2/ispell/spell.c
+629
-276
contrib/tsearch2/ispell/spell.h
contrib/tsearch2/ispell/spell.h
+71
-12
No files found.
contrib/tsearch2/dict_ispell.c
View file @
c63c1946
...
@@ -27,7 +27,7 @@ Datum spell_lexize(PG_FUNCTION_ARGS);
...
@@ -27,7 +27,7 @@ Datum spell_lexize(PG_FUNCTION_ARGS);
static
void
static
void
freeDictISpell
(
DictISpell
*
d
)
freeDictISpell
(
DictISpell
*
d
)
{
{
FreeIspell
(
&
(
d
->
obj
));
NIFree
(
&
(
d
->
obj
));
freestoplist
(
&
(
d
->
stoplist
));
freestoplist
(
&
(
d
->
stoplist
));
free
(
d
);
free
(
d
);
}
}
...
@@ -71,7 +71,7 @@ spell_init(PG_FUNCTION_ARGS)
...
@@ -71,7 +71,7 @@ spell_init(PG_FUNCTION_ARGS)
(
errcode
(
ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
(
errcode
(
ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
errmsg
(
"dictionary already loaded"
)));
errmsg
(
"dictionary already loaded"
)));
}
}
if
(
ImportDictionary
(
&
(
d
->
obj
),
pcfg
->
value
))
if
(
NI
ImportDictionary
(
&
(
d
->
obj
),
pcfg
->
value
))
{
{
freeDictISpell
(
d
);
freeDictISpell
(
d
);
ereport
(
ERROR
,
ereport
(
ERROR
,
...
@@ -90,7 +90,7 @@ spell_init(PG_FUNCTION_ARGS)
...
@@ -90,7 +90,7 @@ spell_init(PG_FUNCTION_ARGS)
(
errcode
(
ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
(
errcode
(
ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
errmsg
(
"affixes already loaded"
)));
errmsg
(
"affixes already loaded"
)));
}
}
if
(
ImportAffixes
(
&
(
d
->
obj
),
pcfg
->
value
))
if
(
NI
ImportAffixes
(
&
(
d
->
obj
),
pcfg
->
value
))
{
{
freeDictISpell
(
d
);
freeDictISpell
(
d
);
ereport
(
ERROR
,
ereport
(
ERROR
,
...
@@ -132,8 +132,8 @@ spell_init(PG_FUNCTION_ARGS)
...
@@ -132,8 +132,8 @@ spell_init(PG_FUNCTION_ARGS)
if
(
affloaded
&&
dictloaded
)
if
(
affloaded
&&
dictloaded
)
{
{
SortDictionary
(
&
(
d
->
obj
));
NI
SortDictionary
(
&
(
d
->
obj
));
SortAffixes
(
&
(
d
->
obj
));
NI
SortAffixes
(
&
(
d
->
obj
));
}
}
else
if
(
!
affloaded
)
else
if
(
!
affloaded
)
{
{
...
@@ -168,7 +168,7 @@ spell_lexize(PG_FUNCTION_ARGS)
...
@@ -168,7 +168,7 @@ spell_lexize(PG_FUNCTION_ARGS)
res
=
palloc
(
sizeof
(
char
*
)
*
2
);
res
=
palloc
(
sizeof
(
char
*
)
*
2
);
txt
=
pnstrdup
(
in
,
PG_GETARG_INT32
(
2
));
txt
=
pnstrdup
(
in
,
PG_GETARG_INT32
(
2
));
res
=
NormalizeWord
(
&
(
d
->
obj
),
txt
);
res
=
N
IN
ormalizeWord
(
&
(
d
->
obj
),
txt
);
pfree
(
txt
);
pfree
(
txt
);
if
(
res
==
NULL
)
if
(
res
==
NULL
)
...
...
contrib/tsearch2/ispell/spell.c
View file @
c63c1946
...
@@ -7,15 +7,26 @@
...
@@ -7,15 +7,26 @@
#include "spell.h"
#include "spell.h"
#define MAXNORMLEN 56
#define MAX_NORM 1024
#define MAXNORMLEN 256
#define STRNCASECMP(x,y) (strncasecmp(x,y,strlen(y)))
#define STRNCASECMP(x,y) (strncasecmp(x,y,strlen(y)))
#define GETWCHAR(W,L,N,T) ( ((u_int8_t*)(W))[ ((T)=='p') ? (N) : ( (L) - 1 - (N) ) ] )
#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
#define MEMOUT(X) if ( !(X) ) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")))
static
int
static
int
cmpspell
(
const
void
*
s1
,
const
void
*
s2
)
cmpspell
(
const
void
*
s1
,
const
void
*
s2
)
{
{
return
(
strcmp
(((
const
SPELL
*
)
s1
)
->
word
,
((
const
SPELL
*
)
s2
)
->
word
));
return
(
strcmp
(((
const
SPELL
*
)
s1
)
->
word
,
((
const
SPELL
*
)
s2
)
->
word
));
}
}
static
int
cmpspellaffix
(
const
void
*
s1
,
const
void
*
s2
)
{
return
(
strcmp
(((
const
SPELL
*
)
s1
)
->
p
.
flag
,
((
const
SPELL
*
)
s2
)
->
p
.
flag
));
}
static
void
static
void
strlower
(
char
*
str
)
strlower
(
char
*
str
)
...
@@ -29,6 +40,13 @@ strlower(char *str)
...
@@ -29,6 +40,13 @@ strlower(char *str)
}
}
}
}
static
char
*
strndup
(
char
*
s
,
int
len
)
{
char
*
d
=
(
char
*
)
palloc
(
len
+
1
);
memcpy
(
d
,
s
,
len
);
d
[
len
]
=
'\0'
;
return
d
;
}
/* backward string compaire for suffix tree operations */
/* backward string compaire for suffix tree operations */
static
int
static
int
strbcmp
(
const
char
*
s1
,
const
char
*
s2
)
strbcmp
(
const
char
*
s1
,
const
char
*
s2
)
...
@@ -92,7 +110,7 @@ cmpaffix(const void *s1, const void *s2)
...
@@ -92,7 +110,7 @@ cmpaffix(const void *s1, const void *s2)
}
}
int
int
AddSpell
(
IspellDict
*
Conf
,
const
char
*
word
,
const
char
*
flag
)
NI
AddSpell
(
IspellDict
*
Conf
,
const
char
*
word
,
const
char
*
flag
)
{
{
if
(
Conf
->
nspell
>=
Conf
->
mspell
)
if
(
Conf
->
nspell
>=
Conf
->
mspell
)
{
{
...
@@ -106,24 +124,18 @@ AddSpell(IspellDict * Conf, const char *word, const char *flag)
...
@@ -106,24 +124,18 @@ AddSpell(IspellDict * Conf, const char *word, const char *flag)
Conf
->
mspell
=
1024
*
20
;
Conf
->
mspell
=
1024
*
20
;
Conf
->
Spell
=
(
SPELL
*
)
malloc
(
Conf
->
mspell
*
sizeof
(
SPELL
));
Conf
->
Spell
=
(
SPELL
*
)
malloc
(
Conf
->
mspell
*
sizeof
(
SPELL
));
}
}
if
(
Conf
->
Spell
==
NULL
)
MEMOUT
(
Conf
->
Spell
);
ereport
(
ERROR
,
(
errcode
(
ERRCODE_OUT_OF_MEMORY
),
errmsg
(
"out of memory"
)));
}
}
Conf
->
Spell
[
Conf
->
nspell
].
word
=
strdup
(
word
);
Conf
->
Spell
[
Conf
->
nspell
].
word
=
strdup
(
word
);
if
(
!
Conf
->
Spell
[
Conf
->
nspell
].
word
)
MEMOUT
(
Conf
->
Spell
[
Conf
->
nspell
].
word
);
ereport
(
ERROR
,
strncpy
(
Conf
->
Spell
[
Conf
->
nspell
].
p
.
flag
,
flag
,
16
);
(
errcode
(
ERRCODE_OUT_OF_MEMORY
),
errmsg
(
"out of memory"
)));
strncpy
(
Conf
->
Spell
[
Conf
->
nspell
].
flag
,
flag
,
10
);
Conf
->
nspell
++
;
Conf
->
nspell
++
;
return
(
0
);
return
(
0
);
}
}
int
int
ImportDictionary
(
IspellDict
*
Conf
,
const
char
*
filename
)
NI
ImportDictionary
(
IspellDict
*
Conf
,
const
char
*
filename
)
{
{
unsigned
char
str
[
BUFSIZ
];
unsigned
char
str
[
BUFSIZ
];
FILE
*
dict
;
FILE
*
dict
;
...
@@ -143,7 +155,7 @@ ImportDictionary(IspellDict * Conf, const char *filename)
...
@@ -143,7 +155,7 @@ ImportDictionary(IspellDict * Conf, const char *filename)
flag
=
s
;
flag
=
s
;
while
(
*
s
)
while
(
*
s
)
{
{
if
(
((
*
s
>=
'A'
)
&&
(
*
s
<=
'Z'
))
||
((
*
s
>=
'a'
)
&&
(
*
s
<=
'z'
)
))
if
(
isprint
(
*
s
)
&&
!
isspace
(
*
s
))
s
++
;
s
++
;
else
else
{
{
...
@@ -166,65 +178,49 @@ ImportDictionary(IspellDict * Conf, const char *filename)
...
@@ -166,65 +178,49 @@ ImportDictionary(IspellDict * Conf, const char *filename)
*
s
=
0
;
*
s
=
0
;
s
++
;
s
++
;
}
}
AddSpell
(
Conf
,
str
,
flag
);
NI
AddSpell
(
Conf
,
str
,
flag
);
}
}
fclose
(
dict
);
fclose
(
dict
);
return
(
0
);
return
(
0
);
}
}
static
SPELL
*
static
int
FindWord
(
IspellDict
*
Conf
,
const
char
*
word
,
int
affixflag
)
FindWord
(
IspellDict
*
Conf
,
const
char
*
word
,
int
affixflag
,
char
compoundonly
)
{
{
int
l
,
SPNode
*
node
=
Conf
->
Dictionary
;
c
,
SPNodeData
*
StopLow
,
*
StopHigh
,
*
StopMiddle
;
r
,
int
level
=
0
,
wrdlen
=
strlen
(
word
);
resc
,
resl
,
while
(
node
&&
level
<
wrdlen
)
{
resr
,
StopLow
=
node
->
data
;
i
;
StopHigh
=
node
->
data
+
node
->
length
;
while
(
StopLow
<
StopHigh
)
{
i
=
(
int
)
(
*
word
)
&
255
;
StopMiddle
=
StopLow
+
(
StopHigh
-
StopLow
)
/
2
;
l
=
Conf
->
SpellTree
.
Left
[
i
];
if
(
StopMiddle
->
val
==
((
u_int8_t
*
)(
word
))[
level
]
)
{
r
=
Conf
->
SpellTree
.
Right
[
i
];
if
(
wrdlen
==
level
+
1
&&
StopMiddle
->
isword
)
{
if
(
l
==
-
1
)
if
(
compoundonly
&&
!
StopMiddle
->
compoundallow
)
return
(
NULL
);
return
0
;
while
(
l
<=
r
)
if
(
(
affixflag
==
0
)
||
(
strchr
(
Conf
->
AffixData
[
StopMiddle
->
affix
],
affixflag
)
!=
NULL
))
{
return
1
;
c
=
(
l
+
r
)
>>
1
;
}
resc
=
strcmp
(
Conf
->
Spell
[
c
].
word
,
word
);
node
=
StopMiddle
->
node
;
if
((
resc
==
0
)
&&
level
++
;
((
affixflag
==
0
)
||
(
strchr
(
Conf
->
Spell
[
c
].
flag
,
affixflag
)
!=
NULL
)))
break
;
return
(
&
Conf
->
Spell
[
c
]);
}
else
if
(
StopMiddle
->
val
<
((
u_int8_t
*
)(
word
))[
level
]
)
{
resl
=
strcmp
(
Conf
->
Spell
[
l
].
word
,
word
);
StopLow
=
StopMiddle
+
1
;
if
((
resl
==
0
)
&&
}
else
{
((
affixflag
==
0
)
||
(
strchr
(
Conf
->
Spell
[
l
].
flag
,
affixflag
)
!=
NULL
)))
StopHigh
=
StopMiddle
;
return
(
&
Conf
->
Spell
[
l
]);
}
resr
=
strcmp
(
Conf
->
Spell
[
r
].
word
,
word
);
if
((
resr
==
0
)
&&
((
affixflag
==
0
)
||
(
strchr
(
Conf
->
Spell
[
r
].
flag
,
affixflag
)
!=
NULL
)))
return
(
&
Conf
->
Spell
[
r
]);
if
(
resc
<
0
)
{
l
=
c
+
1
;
r
--
;
}
else
if
(
resc
>
0
)
{
r
=
c
-
1
;
l
++
;
}
else
{
l
++
;
r
--
;
}
}
if
(
StopLow
>=
StopHigh
)
break
;
}
}
return
(
NULL
)
;
return
0
;
}
}
int
int
AddAffix
(
IspellDict
*
Conf
,
int
flag
,
const
char
*
mask
,
const
char
*
find
,
const
char
*
repl
,
int
type
)
NIAddAffix
(
IspellDict
*
Conf
,
int
flag
,
char
flagflags
,
const
char
*
mask
,
const
char
*
find
,
const
char
*
repl
,
int
type
)
{
{
if
(
Conf
->
naffixes
>=
Conf
->
maffixes
)
if
(
Conf
->
naffixes
>=
Conf
->
maffixes
)
{
{
...
@@ -238,16 +234,14 @@ AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const
...
@@ -238,16 +234,14 @@ AddAffix(IspellDict * Conf, int flag, const char *mask, const char *find, const
Conf
->
maffixes
=
16
;
Conf
->
maffixes
=
16
;
Conf
->
Affix
=
(
AFFIX
*
)
malloc
(
Conf
->
maffixes
*
sizeof
(
AFFIX
));
Conf
->
Affix
=
(
AFFIX
*
)
malloc
(
Conf
->
maffixes
*
sizeof
(
AFFIX
));
}
}
if
(
Conf
->
Affix
==
NULL
)
MEMOUT
(
Conf
->
Affix
);
ereport
(
ERROR
,
(
errcode
(
ERRCODE_OUT_OF_MEMORY
),
errmsg
(
"out of memory"
)));
}
}
if
(
type
==
's'
)
if
(
type
==
's'
)
sprintf
(
Conf
->
Affix
[
Conf
->
naffixes
].
mask
,
"%s$"
,
mask
);
sprintf
(
Conf
->
Affix
[
Conf
->
naffixes
].
mask
,
"%s$"
,
mask
);
else
else
sprintf
(
Conf
->
Affix
[
Conf
->
naffixes
].
mask
,
"^%s"
,
mask
);
sprintf
(
Conf
->
Affix
[
Conf
->
naffixes
].
mask
,
"^%s"
,
mask
);
Conf
->
Affix
[
Conf
->
naffixes
].
compile
=
1
;
Conf
->
Affix
[
Conf
->
naffixes
].
compile
=
1
;
Conf
->
Affix
[
Conf
->
naffixes
].
flagflags
=
flagflags
;
Conf
->
Affix
[
Conf
->
naffixes
].
flag
=
flag
;
Conf
->
Affix
[
Conf
->
naffixes
].
flag
=
flag
;
Conf
->
Affix
[
Conf
->
naffixes
].
type
=
type
;
Conf
->
Affix
[
Conf
->
naffixes
].
type
=
type
;
...
@@ -281,7 +275,7 @@ remove_spaces(char *dist, char *src)
...
@@ -281,7 +275,7 @@ remove_spaces(char *dist, char *src)
int
int
ImportAffixes
(
IspellDict
*
Conf
,
const
char
*
filename
)
NI
ImportAffixes
(
IspellDict
*
Conf
,
const
char
*
filename
)
{
{
unsigned
char
str
[
BUFSIZ
];
unsigned
char
str
[
BUFSIZ
];
unsigned
char
flag
=
0
;
unsigned
char
flag
=
0
;
...
@@ -292,13 +286,24 @@ ImportAffixes(IspellDict * Conf, const char *filename)
...
@@ -292,13 +286,24 @@ ImportAffixes(IspellDict * Conf, const char *filename)
int
i
;
int
i
;
int
suffixes
=
0
;
int
suffixes
=
0
;
int
prefixes
=
0
;
int
prefixes
=
0
;
unsigned
char
flagflags
=
0
;
FILE
*
affix
;
FILE
*
affix
;
if
(
!
(
affix
=
fopen
(
filename
,
"r"
)))
if
(
!
(
affix
=
fopen
(
filename
,
"r"
)))
return
(
1
);
return
(
1
);
Conf
->
compoundcontrol
=
'\t'
;
while
(
fgets
(
str
,
sizeof
(
str
),
affix
))
while
(
fgets
(
str
,
sizeof
(
str
),
affix
))
{
{
if
(
STRNCASECMP
(
str
,
"compoundwords"
)
==
0
)
{
s
=
strchr
(
str
,
'l'
);
if
(
s
)
{
while
(
*
s
!=
' '
)
s
++
;
while
(
*
s
==
' '
)
s
++
;
Conf
->
compoundcontrol
=
*
s
;
continue
;
}
}
if
(
!
STRNCASECMP
(
str
,
"suffixes"
))
if
(
!
STRNCASECMP
(
str
,
"suffixes"
))
{
{
suffixes
=
1
;
suffixes
=
1
;
...
@@ -314,8 +319,18 @@ ImportAffixes(IspellDict * Conf, const char *filename)
...
@@ -314,8 +319,18 @@ ImportAffixes(IspellDict * Conf, const char *filename)
if
(
!
STRNCASECMP
(
str
,
"flag "
))
if
(
!
STRNCASECMP
(
str
,
"flag "
))
{
{
s
=
str
+
5
;
s
=
str
+
5
;
while
(
strchr
(
"* "
,
*
s
))
flagflags
=
0
;
while
(
*
s
==
' '
)
s
++
;
if
(
*
s
==
'*'
)
{
flagflags
|=
FF_CROSSPRODUCT
;
s
++
;
}
else
if
(
*
s
==
'~'
)
{
flagflags
|=
FF_COMPOUNDONLYAFX
;
s
++
;
s
++
;
}
if
(
*
s
==
'\\'
)
s
++
;
flag
=
*
s
;
flag
=
*
s
;
continue
;
continue
;
}
}
...
@@ -351,7 +366,7 @@ ImportAffixes(IspellDict * Conf, const char *filename)
...
@@ -351,7 +366,7 @@ ImportAffixes(IspellDict * Conf, const char *filename)
continue
;
continue
;
}
}
AddAffix
(
Conf
,
(
int
)
flag
,
mask
,
find
,
repl
,
suffixes
?
's'
:
'p'
);
NIAddAffix
(
Conf
,
(
int
)
flag
,
(
char
)
flagflags
,
mask
,
find
,
repl
,
suffixes
?
's'
:
'p'
);
}
}
fclose
(
affix
);
fclose
(
affix
);
...
@@ -359,87 +374,266 @@ ImportAffixes(IspellDict * Conf, const char *filename)
...
@@ -359,87 +374,266 @@ ImportAffixes(IspellDict * Conf, const char *filename)
return
(
0
);
return
(
0
);
}
}
static
int
MergeAffix
(
IspellDict
*
Conf
,
int
a1
,
int
a2
)
{
int
naffix
=
0
;
char
**
ptr
=
Conf
->
AffixData
;
while
(
*
ptr
)
{
naffix
++
;
ptr
++
;
}
Conf
->
AffixData
=
(
char
**
)
realloc
(
Conf
->
AffixData
,
(
naffix
+
2
)
*
sizeof
(
char
*
)
);
MEMOUT
(
Conf
->
AffixData
);
ptr
=
Conf
->
AffixData
+
naffix
;
*
ptr
=
malloc
(
strlen
(
Conf
->
AffixData
[
a1
])
+
strlen
(
Conf
->
AffixData
[
a2
])
+
1
/* space */
+
1
/* \0 */
);
MEMOUT
(
ptr
);
sprintf
(
*
ptr
,
"%s %s"
,
Conf
->
AffixData
[
a1
],
Conf
->
AffixData
[
a2
]);
ptr
++
;
*
ptr
=
'\0'
;
return
naffix
;
}
static
SPNode
*
mkSPNode
(
IspellDict
*
Conf
,
int
low
,
int
high
,
int
level
)
{
int
i
;
int
nchar
=
0
;
char
lastchar
=
'\0'
;
SPNode
*
rs
;
SPNodeData
*
data
;
int
lownew
=
low
;
for
(
i
=
low
;
i
<
high
;
i
++
)
if
(
Conf
->
Spell
[
i
].
p
.
d
.
len
>
level
&&
lastchar
!=
Conf
->
Spell
[
i
].
word
[
level
]
)
{
nchar
++
;
lastchar
=
Conf
->
Spell
[
i
].
word
[
level
];
}
if
(
!
nchar
)
return
NULL
;
rs
=
(
SPNode
*
)
malloc
(
SPNHRDSZ
+
nchar
*
sizeof
(
SPNodeData
));
MEMOUT
(
rs
);
memset
(
rs
,
0
,
SPNHRDSZ
+
nchar
*
sizeof
(
SPNodeData
));
rs
->
length
=
nchar
;
data
=
rs
->
data
;
lastchar
=
'\0'
;
for
(
i
=
low
;
i
<
high
;
i
++
)
if
(
Conf
->
Spell
[
i
].
p
.
d
.
len
>
level
)
{
if
(
lastchar
!=
Conf
->
Spell
[
i
].
word
[
level
]
)
{
if
(
lastchar
)
{
data
->
node
=
mkSPNode
(
Conf
,
lownew
,
i
,
level
+
1
);
lownew
=
i
;
data
++
;
}
lastchar
=
Conf
->
Spell
[
i
].
word
[
level
];
}
data
->
val
=
((
u_int8_t
*
)(
Conf
->
Spell
[
i
].
word
))[
level
];
if
(
Conf
->
Spell
[
i
].
p
.
d
.
len
==
level
+
1
)
{
if
(
data
->
isword
&&
data
->
affix
!=
Conf
->
Spell
[
i
].
p
.
d
.
affix
)
{
/*
fprintf(stderr,"Word already exists: %s (affixes: '%s' and '%s')\n",
Conf->Spell[i].word,
Conf->AffixData[data->affix],
Conf->AffixData[Conf->Spell[i].p.d.affix]
);
*/
/* MergeAffix called a few times */
data
->
affix
=
MergeAffix
(
Conf
,
data
->
affix
,
Conf
->
Spell
[
i
].
p
.
d
.
affix
);
}
else
data
->
affix
=
Conf
->
Spell
[
i
].
p
.
d
.
affix
;
data
->
isword
=
1
;
if
(
strchr
(
Conf
->
AffixData
[
data
->
affix
],
Conf
->
compoundcontrol
)
)
data
->
compoundallow
=
1
;
}
}
data
->
node
=
mkSPNode
(
Conf
,
lownew
,
high
,
level
+
1
);
return
rs
;
}
void
void
SortDictionary
(
IspellDict
*
Conf
)
NI
SortDictionary
(
IspellDict
*
Conf
)
{
{
int
CurLet
=
-
1
,
Let
;
size_t
i
;
size_t
i
;
int
naffix
=
3
;
/* compress affixes */
qsort
((
void
*
)
Conf
->
Spell
,
Conf
->
nspell
,
sizeof
(
SPELL
),
cmpspellaffix
);
for
(
i
=
1
;
i
<
Conf
->
nspell
;
i
++
)
if
(
strcmp
(
Conf
->
Spell
[
i
].
p
.
flag
,
Conf
->
Spell
[
i
-
1
].
p
.
flag
)
)
naffix
++
;
Conf
->
AffixData
=
(
char
**
)
malloc
(
naffix
*
sizeof
(
char
*
)
);
MEMOUT
(
Conf
->
AffixData
);
memset
(
Conf
->
AffixData
,
0
,
naffix
*
sizeof
(
char
*
));
naffix
=
1
;
Conf
->
AffixData
[
0
]
=
strdup
(
""
);
MEMOUT
(
Conf
->
AffixData
[
0
]);
Conf
->
AffixData
[
1
]
=
strdup
(
Conf
->
Spell
[
0
].
p
.
flag
);
MEMOUT
(
Conf
->
AffixData
[
1
]);
Conf
->
Spell
[
0
].
p
.
d
.
affix
=
1
;
Conf
->
Spell
[
0
].
p
.
d
.
len
=
strlen
(
Conf
->
Spell
[
0
].
word
);
for
(
i
=
1
;
i
<
Conf
->
nspell
;
i
++
)
{
if
(
strcmp
(
Conf
->
Spell
[
i
].
p
.
flag
,
Conf
->
AffixData
[
naffix
])
)
{
naffix
++
;
Conf
->
AffixData
[
naffix
]
=
strdup
(
Conf
->
Spell
[
i
].
p
.
flag
);
MEMOUT
(
Conf
->
AffixData
[
naffix
]);
}
Conf
->
Spell
[
i
].
p
.
d
.
affix
=
naffix
;
Conf
->
Spell
[
i
].
p
.
d
.
len
=
strlen
(
Conf
->
Spell
[
i
].
word
);
}
qsort
((
void
*
)
Conf
->
Spell
,
Conf
->
nspell
,
sizeof
(
SPELL
),
cmpspell
);
qsort
((
void
*
)
Conf
->
Spell
,
Conf
->
nspell
,
sizeof
(
SPELL
),
cmpspell
);
Conf
->
Dictionary
=
mkSPNode
(
Conf
,
0
,
Conf
->
nspell
,
0
);
for
(
i
=
0
;
i
<
Conf
->
nspell
;
i
++
)
free
(
Conf
->
Spell
[
i
].
word
);
free
(
Conf
->
Spell
);
Conf
->
Spell
=
NULL
;
}
for
(
i
=
0
;
i
<
256
;
i
++
)
static
AffixNode
*
Conf
->
SpellTree
.
Left
[
i
]
=
-
1
;
mkANode
(
IspellDict
*
Conf
,
int
low
,
int
high
,
int
level
,
int
type
)
{
int
i
;
int
nchar
=
0
;
u_int8_t
lastchar
=
'\0'
;
AffixNode
*
rs
;
AffixNodeData
*
data
;
int
lownew
=
low
;
for
(
i
=
low
;
i
<
high
;
i
++
)
if
(
Conf
->
Affix
[
i
].
replen
>
level
&&
lastchar
!=
GETCHAR
(
Conf
->
Affix
+
i
,
level
,
type
)
)
{
nchar
++
;
lastchar
=
GETCHAR
(
Conf
->
Affix
+
i
,
level
,
type
);
}
for
(
i
=
0
;
i
<
Conf
->
nspell
;
i
++
)
if
(
!
nchar
)
{
return
NULL
;
Let
=
(
int
)
(
*
(
Conf
->
Spell
[
i
].
word
))
&
255
;
if
(
CurLet
!=
Let
)
rs
=
(
AffixNode
*
)
malloc
(
ANHRDSZ
+
nchar
*
sizeof
(
AffixNodeData
));
{
MEMOUT
(
rs
);
Conf
->
SpellTree
.
Left
[
Let
]
=
i
;
memset
(
rs
,
0
,
ANHRDSZ
+
nchar
*
sizeof
(
AffixNodeData
));
CurLet
=
Let
;
rs
->
length
=
nchar
;
data
=
rs
->
data
;
lastchar
=
'\0'
;
for
(
i
=
low
;
i
<
high
;
i
++
)
if
(
Conf
->
Affix
[
i
].
replen
>
level
)
{
if
(
lastchar
!=
GETCHAR
(
Conf
->
Affix
+
i
,
level
,
type
)
)
{
if
(
lastchar
)
{
data
->
node
=
mkANode
(
Conf
,
lownew
,
i
,
level
+
1
,
type
);
lownew
=
i
;
data
++
;
}
lastchar
=
GETCHAR
(
Conf
->
Affix
+
i
,
level
,
type
);
}
data
->
val
=
GETCHAR
(
Conf
->
Affix
+
i
,
level
,
type
);
if
(
Conf
->
Affix
[
i
].
replen
==
level
+
1
)
{
/* affix stopped */
if
(
!
data
->
naff
)
data
->
aff
=
(
AFFIX
**
)
malloc
(
sizeof
(
AFFIX
*
)
*
(
high
-
i
+
1
));
MEMOUT
(
data
);
data
->
aff
[
data
->
naff
]
=
Conf
->
Affix
+
i
;
data
->
naff
++
;
}
}
}
Conf
->
SpellTree
.
Right
[
Let
]
=
i
;
}
data
->
node
=
mkANode
(
Conf
,
lownew
,
high
,
level
+
1
,
type
);
return
rs
;
}
}
void
void
SortAffixes
(
IspellDict
*
Conf
)
NI
SortAffixes
(
IspellDict
*
Conf
)
{
{
int
CurLetP
=
-
1
,
CurLetS
=
-
1
,
Let
;
AFFIX
*
Affix
;
AFFIX
*
Affix
;
size_t
i
;
size_t
i
;
CMPDAffix
*
ptr
;
int
firstsuffix
=-
1
;
if
(
Conf
->
naffixes
>
1
)
if
(
Conf
->
naffixes
>
1
)
qsort
((
void
*
)
Conf
->
Affix
,
Conf
->
naffixes
,
sizeof
(
AFFIX
),
cmpaffix
);
qsort
((
void
*
)
Conf
->
Affix
,
Conf
->
naffixes
,
sizeof
(
AFFIX
),
cmpaffix
);
for
(
i
=
0
;
i
<
256
;
i
++
)
{
Conf
->
PrefixTree
.
Left
[
i
]
=
Conf
->
PrefixTree
.
Right
[
i
]
=
-
1
;
Conf
->
SuffixTree
.
Left
[
i
]
=
Conf
->
SuffixTree
.
Right
[
i
]
=
-
1
;
}
for
(
i
=
0
;
i
<
Conf
->
naffixes
;
i
++
)
Conf
->
CompoundAffix
=
ptr
=
(
CMPDAffix
*
)
malloc
(
sizeof
(
CMPDAffix
)
*
Conf
->
naffixes
);
{
MEMOUT
(
Conf
->
CompoundAffix
);
ptr
->
affix
=
NULL
;
for
(
i
=
0
;
i
<
Conf
->
naffixes
;
i
++
)
{
Affix
=
&
(((
AFFIX
*
)
Conf
->
Affix
)[
i
]);
Affix
=
&
(((
AFFIX
*
)
Conf
->
Affix
)[
i
]);
if
(
Affix
->
type
==
'p'
)
if
(
Affix
->
type
==
's'
)
{
{
if
(
firstsuffix
<
0
)
firstsuffix
=
i
;
Let
=
(
int
)
(
*
(
Affix
->
repl
))
&
255
;
if
(
Affix
->
flagflags
&
FF_COMPOUNDONLYAFX
)
{
if
(
CurLetP
!=
Let
)
if
(
!
ptr
->
affix
||
strbncmp
((
ptr
-
1
)
->
affix
,
Affix
->
repl
,
(
ptr
-
1
)
->
len
)
)
{
{
/* leave only unique and minimals suffixes */
Conf
->
PrefixTree
.
Left
[
Let
]
=
i
;
ptr
->
affix
=
Affix
->
repl
;
CurLetP
=
Let
;
ptr
->
len
=
Affix
->
replen
;
ptr
++
;
}
}
}
Conf
->
PrefixTree
.
Right
[
Let
]
=
i
;
}
}
else
}
{
ptr
->
affix
=
NULL
;
Let
=
(
Affix
->
replen
)
?
(
int
)
(
Affix
->
repl
[
Affix
->
replen
-
1
])
&
255
:
0
;
Conf
->
CompoundAffix
=
(
CMPDAffix
*
)
realloc
(
Conf
->
CompoundAffix
,
sizeof
(
CMPDAffix
)
*
(
ptr
-
Conf
->
CompoundAffix
+
1
)
);
if
(
CurLetS
!=
Let
)
{
Conf
->
Prefix
=
mkANode
(
Conf
,
0
,
firstsuffix
,
0
,
'p'
);
Conf
->
SuffixTree
.
Left
[
Let
]
=
i
;
Conf
->
Suffix
=
mkANode
(
Conf
,
firstsuffix
,
Conf
->
naffixes
,
0
,
's'
);
CurLetS
=
Let
;
}
static
AffixNodeData
*
FinfAffixes
(
AffixNode
*
node
,
const
char
*
word
,
int
wrdlen
,
int
*
level
,
int
type
)
{
AffixNodeData
*
StopLow
,
*
StopHigh
,
*
StopMiddle
;
u_int8_t
symbol
;
while
(
node
&&
*
level
<
wrdlen
)
{
StopLow
=
node
->
data
;
StopHigh
=
node
->
data
+
node
->
length
;
while
(
StopLow
<
StopHigh
)
{
StopMiddle
=
StopLow
+
(
StopHigh
-
StopLow
)
/
2
;
symbol
=
GETWCHAR
(
word
,
wrdlen
,
*
level
,
type
);
if
(
StopMiddle
->
val
==
symbol
)
{
if
(
StopMiddle
->
naff
)
return
StopMiddle
;
node
=
StopMiddle
->
node
;
(
*
level
)
++
;
break
;
}
else
if
(
StopMiddle
->
val
<
symbol
)
{
StopLow
=
StopMiddle
+
1
;
}
else
{
StopHigh
=
StopMiddle
;
}
}
Conf
->
SuffixTree
.
Right
[
Let
]
=
i
;
}
}
if
(
StopLow
>=
StopHigh
)
break
;
}
}
return
NULL
;
}
}
static
char
*
static
char
*
CheckSuffix
(
const
char
*
word
,
size_t
len
,
AFFIX
*
Affix
,
int
*
res
,
IspellDict
*
Conf
)
CheckAffix
(
const
char
*
word
,
size_t
len
,
AFFIX
*
Affix
,
char
flagflags
,
char
*
newword
)
{
{
regmatch_t
subs
[
2
];
/* workaround for apache&linux */
regmatch_t
subs
[
2
];
/* workaround for apache&linux */
char
newword
[
2
*
MAXNORMLEN
]
=
""
;
int
err
;
int
err
;
*
res
=
strbncmp
(
word
,
Affix
->
repl
,
Affix
->
replen
);
if
(
flagflags
&
FF_COMPOUNDONLYAFX
)
{
if
(
*
res
<
0
)
if
(
(
Affix
->
flagflags
&
FF_COMPOUNDONLYAFX
)
==
0
)
return
NULL
;
return
NULL
;
if
(
*
res
>
0
)
}
else
{
return
NULL
;
if
(
Affix
->
flagflags
&
FF_COMPOUNDONLYAFX
)
strcpy
(
newword
,
word
);
return
NULL
;
strcpy
(
newword
+
len
-
Affix
->
replen
,
Affix
->
find
);
}
if
(
Affix
->
type
==
's'
)
{
strcpy
(
newword
,
word
);
strcpy
(
newword
+
len
-
Affix
->
replen
,
Affix
->
find
);
}
else
{
strcpy
(
newword
,
Affix
->
find
);
strcat
(
newword
,
word
+
Affix
->
replen
);
}
if
(
Affix
->
compile
)
if
(
Affix
->
compile
)
{
{
...
@@ -452,205 +646,364 @@ CheckSuffix(const char *word, size_t len, AFFIX * Affix, int *res, IspellDict *
...
@@ -452,205 +646,364 @@ CheckSuffix(const char *word, size_t len, AFFIX * Affix, int *res, IspellDict *
}
}
Affix
->
compile
=
0
;
Affix
->
compile
=
0
;
}
}
if
(
!
(
err
=
regexec
(
&
(
Affix
->
reg
),
newword
,
1
,
subs
,
0
)))
if
(
!
(
err
=
regexec
(
&
(
Affix
->
reg
),
newword
,
1
,
subs
,
0
)))
{
return
newword
;
if
(
FindWord
(
Conf
,
newword
,
Affix
->
flag
))
return
pstrdup
(
newword
);
}
return
NULL
;
return
NULL
;
}
}
#define NS 1
#define MAX_NORM 512
static
char
**
static
int
NormalizeSubWord
(
IspellDict
*
Conf
,
char
*
word
,
char
flag
)
{
CheckPrefix
(
const
char
*
word
,
size_t
len
,
AFFIX
*
Affix
,
IspellDict
*
Conf
,
int
pi
,
AffixNodeData
*
suffix
=
NULL
,
*
prefix
=
NULL
;
char
**
forms
,
char
***
cur
)
int
slevel
=
0
,
plevel
=
0
;
{
int
wrdlen
=
strlen
(
word
),
swrdlen
;
regmatch_t
subs
[
NS
*
2
];
char
**
forms
;
char
**
cur
;
char
newword
[
2
*
MAXNORMLEN
]
=
""
;
char
newword
[
2
*
MAXNORMLEN
]
=
""
;
int
err
,
char
pnewword
[
2
*
MAXNORMLEN
]
=
""
;
ls
,
AffixNode
*
snode
=
Conf
->
Suffix
,
*
pnode
;
res
,
int
i
,
j
;
lres
;
size_t
newlen
;
AFFIX
*
CAffix
=
Conf
->
Affix
;
res
=
strncmp
(
word
,
Affix
->
repl
,
Affix
->
replen
);
if
(
res
!=
0
)
return
res
;
strcpy
(
newword
,
Affix
->
find
);
strcat
(
newword
,
word
+
Affix
->
replen
);
if
(
Affix
->
compile
)
if
(
wrdlen
>
MAXNORMLEN
)
return
NULL
;
{
strlower
(
word
);
err
=
regcomp
(
&
(
Affix
->
reg
),
Affix
->
mask
,
REG_EXTENDED
|
REG_ICASE
|
REG_NOSUB
);
cur
=
forms
=
(
char
**
)
palloc
(
MAX_NORM
*
sizeof
(
char
*
));
if
(
err
)
*
cur
=
NULL
;
{
/* regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE); */
regfree
(
&
(
Affix
->
reg
));
/* Check that the word itself is normal form */
return
(
0
);
if
(
FindWord
(
Conf
,
word
,
0
,
flag
&
FF_COMPOUNDWORD
))
{
}
*
cur
=
pstrdup
(
word
);
Affix
->
compile
=
0
;
cur
++
;
*
cur
=
NULL
;
}
}
if
(
!
(
err
=
regexec
(
&
(
Affix
->
reg
),
newword
,
1
,
subs
,
0
)))
{
SPELL
*
curspell
;
if
((
curspell
=
FindWord
(
Conf
,
newword
,
Affix
->
flag
)))
/* Find all other NORMAL forms of the 'word' (check only prefix)*/
{
pnode
=
Conf
->
Prefix
;
if
((
*
cur
-
forms
)
<
(
MAX_NORM
-
1
))
plevel
=
0
;
{
while
(
pnode
)
{
**
cur
=
pstrdup
(
newword
);
prefix
=
FinfAffixes
(
pnode
,
word
,
wrdlen
,
&
plevel
,
'p'
);
(
*
cur
)
++
;
if
(
!
prefix
)
break
;
**
cur
=
NULL
;
for
(
j
=
0
;
j
<
prefix
->
naff
;
j
++
)
{
if
(
CheckAffix
(
word
,
wrdlen
,
prefix
->
aff
[
j
],
flag
,
newword
)
)
{
/* prefix success */
if
(
FindWord
(
Conf
,
newword
,
prefix
->
aff
[
j
]
->
flag
,
flag
&
FF_COMPOUNDWORD
)
&&
(
cur
-
forms
)
<
(
MAX_NORM
-
1
)
)
{
/* word search success */
*
cur
=
pstrdup
(
newword
);
cur
++
;
*
cur
=
NULL
;
}
}
}
}
}
newlen
=
strlen
(
newword
);
pnode
=
prefix
->
node
;
ls
=
Conf
->
SuffixTree
.
Left
[
pi
];
plevel
++
;
if
(
ls
>=
0
&&
((
*
cur
-
forms
)
<
(
MAX_NORM
-
1
)))
}
{
**
cur
=
CheckSuffix
(
newword
,
newlen
,
&
CAffix
[
ls
],
&
lres
,
Conf
);
/* Find all other NORMAL forms of the 'word' (check suffix and then prefix)*/
if
(
**
cur
)
while
(
snode
)
{
{
/* find possible suffix */
(
*
cur
)
++
;
suffix
=
FinfAffixes
(
snode
,
word
,
wrdlen
,
&
slevel
,
's'
);
**
cur
=
NULL
;
if
(
!
suffix
)
break
;
/* foreach suffix check affix */
for
(
i
=
0
;
i
<
suffix
->
naff
;
i
++
)
{
if
(
CheckAffix
(
word
,
wrdlen
,
suffix
->
aff
[
i
],
flag
,
newword
)
)
{
/* suffix success */
if
(
FindWord
(
Conf
,
newword
,
suffix
->
aff
[
i
]
->
flag
,
flag
&
FF_COMPOUNDWORD
)
&&
(
cur
-
forms
)
<
(
MAX_NORM
-
1
)
)
{
/* word search success */
*
cur
=
pstrdup
(
newword
);
cur
++
;
*
cur
=
NULL
;
}
/* now we will look changed word with prefixes */
pnode
=
Conf
->
Prefix
;
plevel
=
0
;
swrdlen
=
strlen
(
newword
);
while
(
pnode
)
{
prefix
=
FinfAffixes
(
pnode
,
newword
,
swrdlen
,
&
plevel
,
'p'
);
if
(
!
prefix
)
break
;
for
(
j
=
0
;
j
<
prefix
->
naff
;
j
++
)
{
if
(
CheckAffix
(
newword
,
swrdlen
,
prefix
->
aff
[
j
],
flag
,
pnewword
)
)
{
/* prefix success */
int
ff
=
(
prefix
->
aff
[
j
]
->
flagflags
&
suffix
->
aff
[
i
]
->
flagflags
&
FF_CROSSPRODUCT
)
?
0
:
prefix
->
aff
[
j
]
->
flag
;
if
(
FindWord
(
Conf
,
pnewword
,
ff
,
flag
&
FF_COMPOUNDWORD
)
&&
(
cur
-
forms
)
<
(
MAX_NORM
-
1
)
)
{
/* word search success */
*
cur
=
pstrdup
(
pnewword
);
cur
++
;
*
cur
=
NULL
;
}
}
}
pnode
=
prefix
->
node
;
plevel
++
;
}
}
}
}
}
}
return
0
;
}
snode
=
suffix
->
node
;
slevel
++
;
}
char
**
if
(
cur
==
forms
)
{
NormalizeWord
(
IspellDict
*
Conf
,
char
*
word
)
free
(
forms
);
{
/*regmatch_t subs[NS];*/
size_t
len
;
char
**
forms
;
char
**
cur
;
AFFIX
*
Affix
;
int
ri
,
pi
,
ipi
,
lp
,
rp
,
cp
,
ls
,
rs
;
int
lres
,
rres
,
cres
=
0
;
SPELL
*
spell
;
len
=
strlen
(
word
);
if
(
len
>
MAXNORMLEN
)
return
(
NULL
);
return
(
NULL
);
}
return
(
forms
);
}
strlower
(
word
);
typedef
struct
SplitVar
{
int
nstem
;
forms
=
(
char
**
)
palloc
(
MAX_NORM
*
sizeof
(
char
**
));
char
**
stem
;
cur
=
forms
;
struct
SplitVar
*
next
;
*
cur
=
NULL
;
}
SplitVar
;
ri
=
(
int
)
(
*
word
)
&
255
;
static
int
pi
=
(
int
)
(
word
[
strlen
(
word
)
-
1
])
&
255
;
CheckCompoundAffixes
(
CMPDAffix
**
ptr
,
char
*
word
,
int
len
)
{
Affix
=
(
AFFIX
*
)
Conf
->
Affix
;
while
(
(
*
ptr
)
->
affix
)
{
if
(
len
>
(
*
ptr
)
->
len
&&
strncmp
((
*
ptr
)
->
affix
,
word
,
(
*
ptr
)
->
len
)
==
0
)
{
/* Check that the word itself is normal form */
len
=
(
*
ptr
)
->
len
;
if
((
spell
=
FindWord
(
Conf
,
word
,
0
)))
(
*
ptr
)
++
;
{
return
len
;
*
cur
=
pstrdup
(
word
);
}
cur
++
;
(
*
ptr
)
++
;
*
cur
=
NULL
;
}
}
return
0
;
}
/* Find all other NORMAL forms of the 'word' */
static
SplitVar
*
CopyVar
(
SplitVar
*
s
,
int
makedup
)
{
SplitVar
*
v
=
(
SplitVar
*
)
palloc
(
sizeof
(
SplitVar
));
v
->
stem
=
(
char
**
)
palloc
(
sizeof
(
char
*
)
*
(
MAX_NORM
)
);
v
->
next
=
NULL
;
if
(
s
)
{
int
i
;
v
->
nstem
=
s
->
nstem
;
for
(
i
=
0
;
i
<
s
->
nstem
;
i
++
)
v
->
stem
[
i
]
=
(
makedup
)
?
pstrdup
(
s
->
stem
[
i
]
)
:
s
->
stem
[
i
];
}
else
{
v
->
nstem
=
0
;
}
return
v
;
}
for
(
ipi
=
0
;
ipi
<=
pi
;
ipi
+=
pi
)
{
/* check prefix */
static
SplitVar
*
lp
=
Conf
->
PrefixTree
.
Left
[
ri
];
SplitToVariants
(
IspellDict
*
Conf
,
SPNode
*
snode
,
SplitVar
*
orig
,
char
*
word
,
int
wordlen
,
int
startpos
,
int
minpos
)
{
rp
=
Conf
->
PrefixTree
.
Right
[
ri
];
SplitVar
*
var
=
NULL
;
while
(
lp
>=
0
&&
lp
<=
rp
)
SPNodeData
*
StopLow
,
*
StopHigh
,
*
StopMiddle
;
{
SPNode
*
node
=
(
snode
)
?
snode
:
Conf
->
Dictionary
;
cp
=
(
lp
+
rp
)
>>
1
;
int
level
=
(
snode
)
?
minpos
:
startpos
;
/* recursive minpos==level*/
cres
=
0
;
int
lenaff
;
if
((
cur
-
forms
)
<
(
MAX_NORM
-
1
))
CMPDAffix
*
caff
;
cres
=
CheckPrefix
(
word
,
len
,
&
Affix
[
cp
],
Conf
,
ipi
,
forms
,
&
cur
);
char
notprobed
[
wordlen
];
if
((
lp
<
cp
)
&&
((
cur
-
forms
)
<
(
MAX_NORM
-
1
)))
lres
=
CheckPrefix
(
word
,
len
,
&
Affix
[
lp
],
Conf
,
ipi
,
forms
,
&
cur
);
memset
(
notprobed
,
1
,
wordlen
);
if
((
rp
>
cp
)
&&
((
cur
-
forms
)
<
(
MAX_NORM
-
1
)))
var
=
CopyVar
(
orig
,
1
);
rres
=
CheckPrefix
(
word
,
len
,
&
Affix
[
rp
],
Conf
,
ipi
,
forms
,
&
cur
);
if
(
cres
<
0
)
while
(
node
&&
level
<
wordlen
)
{
{
StopLow
=
node
->
data
;
rp
=
cp
-
1
;
StopHigh
=
node
->
data
+
node
->
length
;
lp
++
;
while
(
StopLow
<
StopHigh
)
{
}
StopMiddle
=
StopLow
+
(
StopHigh
-
StopLow
)
/
2
;
else
if
(
cres
>
0
)
if
(
StopMiddle
->
val
==
((
u_int8_t
*
)(
word
))[
level
]
)
{
{
break
;
lp
=
cp
+
1
;
}
else
if
(
StopMiddle
->
val
<
((
u_int8_t
*
)(
word
))[
level
]
)
{
rp
--
;
StopLow
=
StopMiddle
+
1
;
}
else
{
StopHigh
=
StopMiddle
;
}
}
else
}
{
if
(
StopLow
>=
StopHigh
)
lp
++
;
break
;
rp
--
;
/* find word with epenthetic */
caff
=
Conf
->
CompoundAffix
;
while
(
level
>
startpos
&&
(
lenaff
=
CheckCompoundAffixes
(
&
caff
,
word
+
level
,
wordlen
-
level
))
>
0
)
{
/* there is one of compound suffixes, so check word for existings */
char
buf
[
MAXNORMLEN
];
char
**
subres
;
lenaff
=
level
-
startpos
+
lenaff
;
if
(
!
notprobed
[
startpos
+
lenaff
-
1
]
)
continue
;
if
(
level
+
lenaff
-
1
<=
minpos
)
continue
;
memcpy
(
buf
,
word
+
startpos
,
lenaff
);
buf
[
lenaff
]
=
'\0'
;
subres
=
NormalizeSubWord
(
Conf
,
buf
,
FF_COMPOUNDWORD
|
FF_COMPOUNDONLYAFX
);
if
(
subres
)
{
/* Yes, it was a word from dictionary */
SplitVar
*
new
=
CopyVar
(
var
,
0
);
SplitVar
*
ptr
=
var
;
char
**
sptr
=
subres
;
notprobed
[
startpos
+
lenaff
-
1
]
=
0
;
while
(
*
sptr
)
{
new
->
stem
[
new
->
nstem
]
=
*
sptr
;
new
->
nstem
++
;
sptr
++
;
}
free
(
subres
);
while
(
ptr
->
next
)
ptr
=
ptr
->
next
;
ptr
->
next
=
SplitToVariants
(
Conf
,
NULL
,
new
,
word
,
wordlen
,
startpos
+
lenaff
,
startpos
+
lenaff
);
free
(
new
->
stem
);
free
(
new
);
}
}
}
}
/* check suffix */
/* find infinitive */
ls
=
Conf
->
SuffixTree
.
Left
[
ipi
];
if
(
StopMiddle
->
isword
&&
StopMiddle
->
compoundallow
&&
notprobed
[
level
]
)
{
rs
=
Conf
->
SuffixTree
.
Right
[
ipi
];
/* ok, we found full compoundallowed word*/
while
(
ls
>=
0
&&
ls
<=
rs
)
if
(
level
>
minpos
)
{
{
/* and its length more than minimal */
if
(((
cur
-
forms
)
<
(
MAX_NORM
-
1
)))
if
(
wordlen
==
level
+
1
)
{
{
/* well, it was last word */
*
cur
=
CheckSuffix
(
word
,
len
,
&
Affix
[
ls
],
&
lres
,
Conf
);
var
->
stem
[
var
->
nstem
]
=
strndup
(
word
+
startpos
,
wordlen
-
startpos
);
if
(
*
cur
)
var
->
nstem
++
;
{
return
var
;
cur
++
;
}
else
{
*
cur
=
NULL
;
/* then we will search more big word at the same point */
SplitVar
*
ptr
=
var
;
while
(
ptr
->
next
)
ptr
=
ptr
->
next
;
ptr
->
next
=
SplitToVariants
(
Conf
,
node
,
var
,
word
,
wordlen
,
startpos
,
level
);
/* we can find next word */
level
++
;
var
->
stem
[
var
->
nstem
]
=
strndup
(
word
+
startpos
,
level
-
startpos
);
var
->
nstem
++
;
node
=
Conf
->
Dictionary
;
startpos
=
level
;
continue
;
}
}
}
}
if
((
rs
>
ls
)
&&
((
cur
-
forms
)
<
(
MAX_NORM
-
1
)))
}
{
level
++
;
*
cur
=
CheckSuffix
(
word
,
len
,
&
Affix
[
rs
],
&
rres
,
Conf
);
node
=
StopMiddle
->
node
;
if
(
*
cur
)
}
{
cur
++
;
var
->
stem
[
var
->
nstem
]
=
strndup
(
word
+
startpos
,
wordlen
-
startpos
);
*
cur
=
NULL
;
var
->
nstem
++
;
return
var
;
}
char
**
NINormalizeWord
(
IspellDict
*
Conf
,
char
*
word
)
{
char
**
res
=
NormalizeSubWord
(
Conf
,
word
,
0
);
if
(
Conf
->
compoundcontrol
!=
'\t'
)
{
int
wordlen
=
strlen
(
word
);
SplitVar
*
ptr
,
*
var
=
SplitToVariants
(
Conf
,
NULL
,
NULL
,
word
,
wordlen
,
0
,
-
1
);
char
**
cur
=
res
;
int
i
;
while
(
var
)
{
if
(
var
->
nstem
>
1
)
{
char
**
subres
=
NormalizeSubWord
(
Conf
,
var
->
stem
[
var
->
nstem
-
1
],
FF_COMPOUNDWORD
);
if
(
subres
)
{
char
**
ptr
=
subres
;
if
(
cur
)
{
while
(
*
cur
)
cur
++
;
}
else
{
res
=
cur
=
(
char
**
)
palloc
(
MAX_NORM
*
sizeof
(
char
*
));
}
for
(
i
=
0
;
i
<
var
->
nstem
-
1
;
i
++
)
{
*
cur
=
var
->
stem
[
i
];
cur
++
;
}
while
(
*
ptr
)
{
*
cur
=*
ptr
;
cur
++
;
ptr
++
;
}
*
cur
=
NULL
;
free
(
subres
);
var
->
stem
[
0
]
=
NULL
;
}
}
}
}
ls
++
;
rs
--
;
for
(
i
=
0
;
i
<
var
->
nstem
&&
var
->
stem
[
i
];
i
++
)
}
/* end while */
free
(
var
->
stem
[
i
]
);
ptr
=
var
->
next
;
free
(
var
->
stem
);
free
(
var
);
var
=
ptr
;
}
}
return
res
;
}
}
/* for ipi */
if
(
cur
==
forms
)
static
void
freeSPNode
(
SPNode
*
node
)
{
{
SPNodeData
*
data
;
pfree
(
forms
);
return
(
NULL
);
if
(
!
node
)
return
;
data
=
node
->
data
;
while
(
node
->
length
)
{
freeSPNode
(
data
->
node
);
data
++
;
node
->
length
--
;
}
}
return
(
forms
);
free
(
node
);
}
}
static
void
freeANode
(
AffixNode
*
node
)
{
AffixNodeData
*
data
;
if
(
!
node
)
return
;
data
=
node
->
data
;
while
(
node
->
length
)
{
freeANode
(
data
->
node
);
if
(
data
->
naff
)
free
(
data
->
aff
);
data
++
;
node
->
length
--
;
}
free
(
node
);
}
void
void
FreeIspell
(
IspellDict
*
Conf
)
NIFree
(
IspellDict
*
Conf
)
{
{
int
i
;
int
i
;
AFFIX
*
Affix
=
(
AFFIX
*
)
Conf
->
Affix
;
AFFIX
*
Affix
=
(
AFFIX
*
)
Conf
->
Affix
;
char
**
aff
=
Conf
->
AffixData
;
if
(
aff
)
{
while
(
*
aff
)
{
free
(
*
aff
);
aff
++
;
}
free
(
Conf
->
AffixData
);
}
for
(
i
=
0
;
i
<
Conf
->
naffixes
;
i
++
)
for
(
i
=
0
;
i
<
Conf
->
naffixes
;
i
++
)
{
{
if
(
Affix
[
i
].
compile
==
0
)
if
(
Affix
[
i
].
compile
==
0
)
regfree
(
&
(
Affix
[
i
].
reg
));
regfree
(
&
(
Affix
[
i
].
reg
));
}
}
for
(
i
=
0
;
i
<
Conf
->
naffixes
;
i
++
)
if
(
Conf
->
Spell
)
{
free
(
Conf
->
Spell
[
i
].
word
);
for
(
i
=
0
;
i
<
Conf
->
nspell
;
i
++
)
free
(
Conf
->
Affix
);
free
(
Conf
->
Spell
[
i
].
word
);
free
(
Conf
->
Spell
);
free
(
Conf
->
Spell
);
}
if
(
Conf
->
Affix
)
free
(
Conf
->
Affix
);
if
(
Conf
->
CompoundAffix
)
free
(
Conf
->
CompoundAffix
);
freeSPNode
(
Conf
->
Dictionary
);
freeANode
(
Conf
->
Suffix
);
freeANode
(
Conf
->
Prefix
);
memset
((
void
*
)
Conf
,
0
,
sizeof
(
IspellDict
));
memset
((
void
*
)
Conf
,
0
,
sizeof
(
IspellDict
));
return
;
return
;
}
}
contrib/tsearch2/ispell/spell.h
View file @
c63c1946
...
@@ -4,15 +4,43 @@
...
@@ -4,15 +4,43 @@
#include <sys/types.h>
#include <sys/types.h>
#include <regex.h>
#include <regex.h>
struct
SPNode
;
typedef
struct
{
u_int32_t
val:
8
,
isword:
1
,
compoundallow:
1
,
affix:
22
;
struct
SPNode
*
node
;
}
SPNodeData
;
typedef
struct
SPNode
{
u_int32_t
length
;
SPNodeData
data
[
1
];
}
SPNode
;
#define SPNHRDSZ (sizeof(u_int32_t))
typedef
struct
spell_struct
typedef
struct
spell_struct
{
{
char
*
word
;
char
*
word
;
char
flag
[
10
];
union
{
char
flag
[
16
];
struct
{
int
affix
;
int
len
;
}
d
;
}
p
;
}
SPELL
;
}
SPELL
;
typedef
struct
aff_struct
typedef
struct
aff_struct
{
{
char
flag
;
char
flag
;
char
flagflags
;
char
type
;
char
type
;
char
mask
[
33
];
char
mask
[
33
];
char
find
[
16
];
char
find
[
16
];
...
@@ -22,35 +50,66 @@ typedef struct aff_struct
...
@@ -22,35 +50,66 @@ typedef struct aff_struct
char
compile
;
char
compile
;
}
AFFIX
;
}
AFFIX
;
#define FF_CROSSPRODUCT 0x01
#define FF_COMPOUNDWORD 0x02
#define FF_COMPOUNDONLYAFX 0x04
struct
AffixNode
;
typedef
struct
{
u_int32_t
val:
8
,
naff:
24
;
AFFIX
**
aff
;
struct
AffixNode
*
node
;
}
AffixNodeData
;
typedef
struct
AffixNode
{
u_int32_t
length
;
AffixNodeData
data
[
1
];
}
AffixNode
;
#define ANHRDSZ (sizeof(u_int32_t))
typedef
struct
Tree_struct
typedef
struct
Tree_struct
{
{
int
Left
[
256
],
int
Left
[
256
],
Right
[
256
];
Right
[
256
];
}
Tree_struct
;
}
Tree_struct
;
typedef
struct
{
char
*
affix
;
int
len
;
}
CMPDAffix
;
typedef
struct
typedef
struct
{
{
int
maffixes
;
int
maffixes
;
int
naffixes
;
int
naffixes
;
AFFIX
*
Affix
;
AFFIX
*
Affix
;
char
compoundcontrol
;
int
nspell
;
int
nspell
;
int
mspell
;
int
mspell
;
SPELL
*
Spell
;
SPELL
*
Spell
;
Tree_struct
SpellTree
;
Tree_struct
PrefixTree
;
AffixNode
*
Suffix
;
Tree_struct
SuffixTree
;
AffixNode
*
Prefix
;
SPNode
*
Dictionary
;
char
**
AffixData
;
CMPDAffix
*
CompoundAffix
;
}
IspellDict
;
}
IspellDict
;
char
**
NormalizeWord
(
IspellDict
*
Conf
,
char
*
word
);
char
**
N
IN
ormalizeWord
(
IspellDict
*
Conf
,
char
*
word
);
int
ImportAffixes
(
IspellDict
*
Conf
,
const
char
*
filename
);
int
NI
ImportAffixes
(
IspellDict
*
Conf
,
const
char
*
filename
);
int
ImportDictionary
(
IspellDict
*
Conf
,
const
char
*
filename
);
int
NI
ImportDictionary
(
IspellDict
*
Conf
,
const
char
*
filename
);
int
AddSpell
(
IspellDict
*
Conf
,
const
char
*
word
,
const
char
*
flag
);
int
NI
AddSpell
(
IspellDict
*
Conf
,
const
char
*
word
,
const
char
*
flag
);
int
AddAffix
(
IspellDict
*
Conf
,
int
flag
,
const
char
*
mask
,
const
char
*
find
,
const
char
*
repl
,
int
type
);
int
NIAddAffix
(
IspellDict
*
Conf
,
int
flag
,
char
flagflags
,
const
char
*
mask
,
const
char
*
find
,
const
char
*
repl
,
int
type
);
void
SortDictionary
(
IspellDict
*
Conf
);
void
NI
SortDictionary
(
IspellDict
*
Conf
);
void
SortAffixes
(
IspellDict
*
Conf
);
void
NI
SortAffixes
(
IspellDict
*
Conf
);
void
FreeIspell
(
IspellDict
*
Conf
);
void
NIFree
(
IspellDict
*
Conf
);
#endif
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment