Commit b8860533 authored by Teodor Sigaev's avatar Teodor Sigaev

tsearch2 module

parent a6053826
subdir = contrib/tsearch2
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I. -I./snowball -I./ispell -I./wordparser $(CPPFLAGS)
MODULE_big = tsearch2
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
snowball/english_stem.o snowball/api.o snowball/russian_stem.o snowball/utilities.o \
dict_snowball.o ispell/spell.o dict_ispell.o dict_syn.o \
wparser.o wordparser/parser.o wordparser/deflex.o wparser_def.o \
ts_cfg.o tsvector.o rewrite.o crc32.o query.o gistidx.o \
tsvector_op.o rank.o ts_stat.o
DATA_built = tsearch2.sql untsearch2.sql
DOCS = README.tsearch2
REGRESS = tsearch2
wordparser/parser.c: wordparser/parser.l
ifdef FLEX
$(FLEX) $(FLEXFLAGS) -8 -Ptsearch2_yy -o'$@' $<
else
@$(missing) flex $< $@
endif
EXTRA_CLEAN = wordparser/parser.c tsearch2.sql.in
SHLIB_LINK := -lm
include $(top_srcdir)/contrib/contrib-global.mk
# DO NOT DELETE
install: installstop
installstop:
cp stopword/*.stop $(datadir)
tsearch2.sql.in: tsearch.sql._in
sed 's,DATA_PATH,$(datadir),g' < $< > $@
untsearch2.sql: untsearch.sql.in
cp $< $@
Tsearch2 - full text search extension for PostgreSQL
[10][Online version] of this document is available
This module is sponsored by Delta-Soft Ltd., Moscow, Russia.
Notice: This version is fully incompatible with old tsearch (V1),
which is considered as deprecated in upcoming 7.4 release and
obsoleted in 7.5.
The Tsearch2 contrib module contains an implementation of a new data
type tsvector - a searchable data type with indexed access. In a
nutshell, tsvector is a set of unique words along with their
positional information in the document, organized in a special
structure optimized for fast access and lookup. Actually, each word
entry, besides its position in the document, could have a weight
attribute, describing importance of this word (at a specific) position
in document. A set of bit-signatures of a fixed length, representing
tsvectors, are stored in a search tree (developed using PostgreSQL
GiST), which provides online update of full text index and fast query
lookup. The module provides indexed access methods, queries,
operations and supporting routines for the tsvector data type and easy
conversion of text data to tsvector. Table driven configuration allows
creation of custom configuration optimized for specific searches using
standard SQL commands.
Configuration allows you to:
* specify the type of lexemes to be indexed and the way they are
processed.
* specify dictionaries to be used along with stop words recognition.
* specify the parser used to process a document.
See [11]Documentation Roadmap for links to documentation.
Authors
* Oleg Bartunov <oleg@sai.msu.su>, Moscow, Moscow University, Russia
* Teodor Sigaev <teodor@sigaev.ru>, Moscow, Delta-Soft Ltd.,Russia
Contributors
* Robert John Shepherd and Andrew J. Kopciuch submitted
"Introduction to tsearch" (Robert - tsearch v1, Andrew - tsearch
v2)
* Brandon Craig Rhodes wrote "Tsearch2 Guide" and "Tsearch2
Reference" and proposed new naming convention for tsearch V2
New features
* Relevance ranking of search results
* Table driven configuration
* Morphology support (ispell dictionaries, snowball stemmers)
* Headline support (text fragments with highlighted search terms)
* Ability to plug-in custom dictionaries and parsers
* Synonym dictionary
* Generator of templates for dictionaries (built-in snowball stemmer
support)
* Statistics of indexed words is available
Limitations
* Lexeme should be not longer than 2048 bytes
* The number of lexemes is limited by 2^32. Note, that actual
capacity of tsvector is depends on whether positional information
is stored or not.
* tsvector - the size is limited by approximately 2^20 bytes.
* tsquery - the number of entries (lexemes and operations) < 32768
* Positional information
+ maximal position of lexeme < 2^14 (16384)
+ lexeme could have maximum 256 positions
References
* GiST development site -
[12]http://www.sai.msu.su/~megera/postgres/gist
* OpenFTS home page - [13]http://openfts.sourceforge.net/
* Mailing list -
[14]http://sourceforge.net/mailarchive/forum.php?forum=openfts-gen
eral
[15]Documentation Roadmap
Documentation Roadmap
* Several docs are available from docs/ subdirectory
+ "Tsearch V2 Introduction" by Andrew Kopciuch
+ "Tsearch2 Guide" by Brandon Rhodes
+ "Tsearch2 Reference" by Brandon Rhodes
* Readme.gendict in gendict/ subdirectory
+ [16][Gendict tutorial]
Online version of documentation is always available from Tsearch V2
home page -
[17]http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/
Support
Authors urgently recommend people to use [18][openfts-general] or
[19][pgsql-general] mailing lists for questions and discussions.
Caution
In spite of apparent easy full text searching with our tsearch module
(authors hope it's so), any serious search engine require profound
study of various aspects, such as stop words, dictionaries, special
parsers. Tsearch module was designed to facilitate both those cases.
Development History
Pre-tsearch era
Development of OpenFTS began in 2000 after realizing that we
needed a search engine optimized for online updates and able to
access metadata from the database. This is essential for online
news agencies, web portals, digital libraries, etc. Most search
engines available utilize an inverted index which is very fast
for searching but very slow for online updates. Incremental
updates of an inverted index is a complex engineering task
while we needed something light, free and with the ability to
access metadata from the database. The last requirement is very
important because in a real life application a search engine
should always consult metadata ( topic, permissions, date
range, version, etc.). We extensively use PostgreSQL as a
database backend and have no intention to move from it, so the
problem was to find a data structure and a fast way to access
it. PostgreSQL has rather unique data type for storing sets
(think about words) - arrays, but lacks index access to them. A
document is parsed into lexemes, which are identified in
various ways (e.g. stemming, morphology, dictionary), and as a
result is reduced to an array of integer numbers. During our
research we found a paper of Joseph Hellerstein which
introduced an interesting data structure suitable for sets -
RD-tree (Russian Doll tree). It looked very attractive, but
implementing it in PostgreSQL seemed difficult because of our
ignorance of database internals. Further research lead us to
the idea to use GiST for implementing RD-tree, but at that time
the GiST code had for a long while remained untouched and
contained several bugs. After work on improving GiST for
version 7.0.3 of PostgreSQL was done, we were able to implement
RD-Tree and use it for index access to arrays of integers. This
implementation was ideally suited for small arrays and
eliminated complex joins, but was practically useless for
indexing large arrays. The next improvement came from an idea
to represent a document by a single bit-signature, a so-called
superimposed signature (see "Index Structures for Databases
Containing Data Items with Set-valued Attributes", 1997, Sven
Helmer for details). We developeded the contrib/intarray module
and used it for full text indexing.
tsearch v1
It was inconvenient to use integer id's instead of words, so we
introduced a new data type called 'txtidx' - a searchable data
type (textual) with indexed access. This was a first step of
our work on an implementation of a built-in PostgreSQL full
text search engine. Even though tsearch v1 had many features of
a search engine it lacked configuration support and relevance
ranking. People were encouraged to use OpenFTS, which provided
relevance ranking based on coordinate information and flexible
configuration. OpenFTS v.0.34 is the last version based on
tsearch v1.
tsearch V2
People recognized tsearch as a powerful tool for full text
searching and insisted on adding ranking support, better
configurability, etc. We already thought about moving most of
the features of OpenFTS to tsearch, and in the early 2003 we
decided to work on a new version of tsearch - tsearch v2. We've
abandoned auxiliary index tables which were used by OpenFTS to
store coordinate information and modified the txtidx type to
store them internally. Also, we've added table-driven
configuration, support of ispell dictionaries, snowball
stemmers and the ability to specify which types of lexemes to
index. Also, it's now possible to generate headlines of
documents with highlighted search terms. These changes make
tsearch more user friendly and turn it into a really powerful
full text search engine. After announcing the alpha version, we
received a proposal from Brandon Rhodes to rename tsearch
functions to be more consistent. So, we have renamed txtidx
type to tsvector and other things as well.
To allow users of tsearch v1 smooth upgrade, we named the module as
tsearch2.
Future release of OpenFTS (v.0.35) will be based on tsearch2. Brave
people could download it from OpenFTS CVS (see link from [20][OpenFTS
page]
References
10. http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/Tsearch_V2_Readme.html
11. http://www.sai.msu.su/~megera/oddmuse/index.cgi/Tsearch_V2_Readme#Documentation_Roadmap
12. http://www.sai.msu.su/~megera/postgres/gist
13. http://openfts.sourceforge.net/
14. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general
15. http://www.sai.msu.su/~megera/oddmuse/index.cgi?action=anchor&id=Documentation_Roadmap#Documentation_Roadmap
16. http://www.sai.msu.su/~megera/oddmuse/index.cgi?Gendict
17. http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/
18. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general
19. http://archives.postgresql.org/pgsql-general/
20. http://openfts.sourceforge.net/
#include "postgres.h"
#include "common.h"
#include "wparser.h"
#include "ts_cfg.h"
#include "dict.h"
text*
char2text(char* in) {
return charl2text(in, strlen(in));
}
text* charl2text(char* in, int len) {
text *out=(text*)palloc(len+VARHDRSZ);
memcpy(VARDATA(out), in, len);
VARATT_SIZEP(out) = len+VARHDRSZ;
return out;
}
char
*text2char(text* in) {
char *out=palloc( VARSIZE(in) );
memcpy(out, VARDATA(in), VARSIZE(in)-VARHDRSZ);
out[ VARSIZE(in)-VARHDRSZ ] ='\0';
return out;
}
char
*pnstrdup(char* in, int len) {
char *out=palloc( len+1 );
memcpy(out, in, len);
out[len]='\0';
return out;
}
text
*ptextdup(text* in) {
text *out=(text*)palloc( VARSIZE(in) );
memcpy(out,in,VARSIZE(in));
return out;
}
text
*mtextdup(text* in) {
text *out=(text*)malloc( VARSIZE(in) );
if ( !out )
ts_error(ERROR, "No memory");
memcpy(out,in,VARSIZE(in));
return out;
}
void
ts_error(int state, const char *format, ...) {
va_list args;
int tlen = 128, len=0;
char *buf;
reset_cfg();
reset_dict();
reset_prs();
va_start(args, format);
buf = palloc(tlen);
len = vsnprintf(buf, tlen-1, format, args);
if ( len >= tlen ) {
tlen=len+1;
buf = repalloc( buf, tlen );
vsnprintf(buf, tlen-1, format, args);
}
va_end(args);
elog(state,buf);
pfree(buf);
}
int
text_cmp(text *a, text *b) {
if ( VARSIZE(a) == VARSIZE(b) )
return strncmp( VARDATA(a), VARDATA(b), VARSIZE(a)-VARHDRSZ );
return (int)VARSIZE(a) - (int)VARSIZE(b);
}
#ifndef __TS_COMMON_H__
#define __TS_COMMON_H__
#include "postgres.h"
#include "fmgr.h"
#ifndef PG_NARGS
#define PG_NARGS() (fcinfo->nargs)
#endif
text* char2text(char* in);
text* charl2text(char* in, int len);
char *text2char(text* in);
char *pnstrdup(char* in, int len);
text *ptextdup(text* in);
text *mtextdup(text* in);
int text_cmp(text *a, text *b);
#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
#define ARRNELEMS(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x))
void ts_error(int state, const char *format, ...);
#endif
/* Both POSIX and CRC32 checksums */
#include <sys/types.h>
#include <stdio.h>
#include <sys/types.h>
#include "crc32.h"
/*
* This code implements the AUTODIN II polynomial
* The variable corresponding to the macro argument "crc" should
* be an unsigned long.
* Oroginal code by Spencer Garrett <srg@quick.com>
*/
#define _CRC32_(crc, ch) (crc = (crc >> 8) ^ crc32tab[(crc ^ (ch)) & 0xff])
/* generated using the AUTODIN II polynomial
* x^32 + x^26 + x^23 + x^22 + x^16 +
* x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1
*/
static const unsigned int crc32tab[256] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,
};
unsigned int
crc32_sz(char *buf, int size)
{
unsigned int crc = ~0;
char *p;
int len,
nr;
len = 0;
nr = size;
for (len += nr, p = buf; nr--; ++p)
_CRC32_(crc, *p);
return ~crc;
}
#ifndef _CRC32_H
#define _CRC32_H
/* Returns crc32 of data block */
extern unsigned int crc32_sz(char *buf, int size);
/* Returns crc32 of null-terminated string */
#define crc32(buf) crc32_sz((buf),strlen(buf))
#endif
\n
\n
\n
\n
\n
\n
\n
\n
\n i8 hy qo xa jl wr le l5 ja jx zf ro vw wd wa cc mm wh fn yd td l8 ec rv th oc ix ir sm y4 gh pr qg ue cx ww zv c9 zv tx eo f5 gd km b9 wb rm ym yl xj u7 xz uk iq tm ux di if uc hc ge
\n gr ty ph jh po wa iw ag wq r3 yd ow rb ip et ej yl a9 dk pu y6 su ov hf xe qe sd qr zt kp ml ea tp pg dq e3 s3 hh gn hz j7 hb qs qd v0 v4 w0 nu ee wk ez un rd sz wx e7 pn yf gh uh ki kx rb qv f1 bh sr yj ry r2
\n q1 q8 wp w9 vs ww rq de qt wo qp sa rv mc sn u8 yl
\n hv ra sa fr qs ps 4w z5 ls wt ad wy q6 zg bd vt wa e4 ft w7 ld es yg et ic pm sw ja qv ov jm ma b3 wu wi qy ug hs wh ex rt tj en ur e2 ut gv as ui dy qy du qo gv cy lx kw xm fl x2 hd ny nu hh dt wg wh rs wb wz yy yu tj ha ak rw sw io h1 ux ku v6 wc qa rv xb s8 qd f2 zo k2 ew w4 yh yu yi
\n rs tt gp qh wt q6 lg zh vr b8 uy uu lh px jm ww qe xu fp fd rs qu ki dr fn gq gw jv oq zt 2r lc ke wg l9 x3 x5 7g vs ar e7 u2 s8 t0 av dj kl nm u2 zp gf yw ee oc tw a1
\n qs uz wr gq q9 rl e0 pe dj a9 hp qw aw er kq pp uu pl zo wp fr r6 ej pv u5 hh av lw ko qc pn qj ez n8 wn eu tq
\n po h9 rd qs hr la u0 um me wp 0p rl mv rc ab r0 fe fj fk qn jh iy cn lb bl ln b5 ll yg yh qt qp uz od dq as gn cr qa wa cu fy zy vo xk eq vg mr ns yy t7 yi op th yo ov pv em tc hg az io s5 ct os wu lq dr mp hk si gx
\n hm k5 pw a5 qh nb q3 ql wr wt z7 oz wu wh kv q8 c3 mt mg hb a3 rz pz uo y1 rb av us ek dz q0 d3 qw j2 ls wy qq jf ng eo gl ed ix em he qt du hp jc f2 m9 qp hb l4 gy zf l6 qr dn cp x1 oh qk kk s3 hy wg zs ot wj sl oz ie e9 ay it u5 ai hm gh py hz qk ki h8 ja zu qb ei vc qj hg ev h6 yh u0 tb id
\n qg d1 bt c5 r3 iv g6 d7 rc ml gk uh yn y0 zo uh qd wh ib uo u4 om qg ql yz
\n hb a3 q5 pl yj lo qy ki sy fo rj kk zq dl wn 7a zi wn wm yr w3 tv r1
\n ft k6 iz qn qj q2 q3 bl zd av ro wo lk tg ea ew ed y1 ia yl ic g6 po aw sc zm qn gl wq qw zr jp wt j5 gs vt qt yc rr op yw tl ye hr i8 tb uu j0 xd lz vu nl qd fu wg pf wj bt ee wh t2 tp sz um oo tg ha u4 f5 sw pq pr ju qk mh ki zb vj ob cx df hj ef cj q6 u9 tv rv o4 sy ru fq ir
\n ps ko uk tz vv um t9 uk k2 ja o6 ob
\n qs nb gh ld q7 jc sp el w0 py qx i2 qe la rl qw tu ti dq ue iv oi wa qr ed t3 fg oa of rr fv qz xn wu wq te hx
\n yb ty pq az fi qg qn la bu ji lg wg q8 mi cv rl up lg om oq ym pv in aq gg js ha on ww qr bj vn pv he b5 mh qe cc mk qt rb eu qy rw tr qo ec op sn oh e2 ao iv e4 hy dt s6 qt p1 hb ih qs wg x1 bd l1 t1 ro r9 uv wb aw gu os t0 ah e0 s0 hj pe or qj zz ql fd ks qv bq qm bg ec ry oj u8 u0 yj ru r1 yx o7
\n z4 wr qz cg nq ir bb gb w7 e5 zc pj e9 px uo fp ts aq db q9 iy qe zv xu a9 l1 mb qw tc qu fi hw ur de e4 hk lj wo wf fi ep rl wh vh ek vp oi sv rh ay hj px aa er tv do ir
\n tr o9 gb tt pp qa qs a5 ps rf q1 kj by ub ru ox co o8 ny wp wa ws rd kk b1 zc rl rz uo ts ig fh db qm q0 bg rr fu ld lr wb en nd cw vr hy rn qr en em au p8 so oh ut hz gq wp ow be ky wj dw t1 pl er wc ot na r9 wl ou un um wx iq sc e8 sn re rr f7 hz h4 ce wz qx wx kp px tl tx ai wq hf ec 6u rz og yt ok yy yp
\n sa pp a7 qm qh of je qj lo ph wt h0 ji cg z8 2v xs zl mo ik hm on tu d8 av ot pn iv ez ja qn pq wy 7r mq qu p1 tu p6 ti ur pj uy ui qo i9 qa nj xm s1 ya fb 7j ro wn t6 wz yu iq yi go en pb aj f5 hf ug uh hk av pr wl wz im ja v9 u2 ks it br wv wn se ia o5 ox ei r2 ig aj sp
\n sa tn z8 ew uo eh g8 zt wy 27 ff uh te en pd eh hv 2e wh ty oi sw xx 2p qs mx wb q3 rl eq aa eu
\n d4 ef ta zq j2 em c0 vv wf kj dw uk ql y9 rn
\n sq nm kl w8 ur kz c1 pc y1 g4 oi jv wr zy ew by se ec yn ti gq gt rd l5 ej yp tk da qz qx ir wm on q2 to ew
\n rd gu z2 kj qk bl 6d wy nw xq iu 8t ri uc kq nx ql oa vi kd o6
\n ra gr he wy q0 ow ti ia pb ha qr lv ms qu pu qw qr ml qt ep sv i5 of fm oe nl xh x1 xz u4 ha ao fc ug pw nh n9 qv kh vx uq w1 u0 ei if
\n q1 d2 qz zd jd qb wj nt ah mj ea ed y1 et fj qe en b8 ty iv ht fv tn tm sg jb ky ai en us tl ud iu zj ql u1 ci ru iw tw
\n fr ub h9 pd ub jk vh z6 wu wh wp 5z yt w9 w0 uy om tl rc r6 ax d7 et y2 tw dz se vf ii m3 lf b4 jf vr qw qy uf es qp en tl to ye ue ph e3 uy i0 jl pz oe qo zp wp ft ka zf qd wd kr qf l9 mm wf qx ef t3 x8 ex rg ev s8 ys it da rw al hn tc f6 fv nd nc ad fj nr x0 bx yq ti rx ok tb hx o8 dp
\n o0 jq un xu q8 wo qq gg ta oj ec az dl bl wb
\n o9 ij pq gu gp nv qk gg la q4 nw bo z8 9a iw wu q8 eh wi nt jk ut ys c1 r5 up y1 yl py oy ht gd td db qn cz qw lp re c7 dh j5 ia bz dj qr qt wd wf qi rt sv ul uz tl ta yr e4 tm sg pc jv hc hv lc xg xm br vf r8 na wl ou td wc up rj s8 e8 ir ys ii qk p0 lt ho wb x8 bv lw w1 rz ew aa rv ry gx o8
\n tt hn gn un db fu uq qf d4 q3 pp ji lf wu bx q8 hx kb ny t5 bn hb ex yf ef yj g1 g2 to yk g3 ej sk hy dv qc gj qv sy bg wr na wy bx z0 rc rm ml ug te qp i5 ue oj s4 im oq qt gx sa gt l4 sv at v3 bq mv wd x3 80 x8 aq xk rg yp en gs us dq ak tz al tx o2 dg f9 kv or h4 jy k1 jo h8 kp lt os kh as tn eu ul tm su an tw sp
\n za yi pe sh pv y4 y5 hy th jg qy qt ke ti ue qk yy ie cq wl p0 lw mf er w5
\n k9 bt xu kc me is o5 z9 kb gv ur rc oe sk qn ve wi mm rn eu to ue uy qa xf by t1 td t7 aw up yf pr dk cg zr sc 3d at rw ec rl st zo rn do
\n o9 z5 wy vi ya ea ee fo gf va ov ww rr wr lb ro qq vr gj nw ru ym iv s4 hu tm wo wp zs br fs wg ej du y1 yt yu e7 eb em dd pq v7 cr um ae oz 0z kc tq rw zl rt wb y9 xv tm tq di eo te gc
\n tt un qs qn a7 qh je qj k0 o1 wr q6 wy ab q9 qm wr ea er eh pi hi sc hs m6 w1 bv lo zr tn yk ep op es ve xx sb ux hg sa gq qp wd n2 zh wf xf wj y3 wl e7 os u4 on ip kn ko qp s7 ly zn ba wu u4 kh f4 zo y9 q6 oh iw tq
\n qa a4 gu a7 cp z1 he ma q7 lu dp w7 ea rc ee d8 y4 tw ez im ae bv ii qe vb zt lc lv wm ro lk qr hp re tw yv es fp as zu oe qu qi bp wg cp p7 v4 ek rd wc ar rj tj e8 od e0 pm h2 h4 in qf wu wi 19 bj rl rc ee yj et tw ep
\n gv qd kj cd t3 c3 ih ws rg mc rx lh fd g8 gh cc vw b7 qe at j7 qo ws wg oy t6 t9 go eb e8 us u5 rq oe zj jy oz cj wb be ei pm og se w4 yu xw su yx if
\n o9 ub rd hw gs z3 ql nq ru wg jc 1t kv mr zm ah dd jk w8 ej aq ig y8 pp fj li wq jj cc qr no wy wu en bx yr qy oo es fy pd tk ix ph yr sf vx pn p2 jq fs ed oy yk os ie s9 u5 ak ud gd uf kb xc u1 xm eu xw 19 wn vh w1 to ee er aa rb rn ru an r1 ei
\n se kl 7h b6 xs ym tp an ta qb gn uo pt xi cl qp qy op vr ym ri ti tl i5 e1 e4 i9 ff i5 qp jx ht ql uo en pe ku h7 iw wn w4 ey ia si
\n ql xt wi k6 ew sf eg up eh oy sq ja g9 i3 qe cv l1 qq bv w2 la eu wg ec ef oh fs tb pc xd qs nl qu fn dy oi iu yf re fc hj hk xv zn zz w1 ew
\n po al hm qk jt cd ju nm li rs w9 ev ut ea 2f r4 d6 ey im pa nu wr m4 is bc xz w3 eu tb ha ft p4 ti to hr dy af i6 iz r4 jb x7 wj xg na rf gi at pn gd re wq qz ze bo wc vz sm zo my ye u7 oh dk w5 is yx tw fe dp
\n jl za gk cm wu vq jc zc iu mb oe fo fp ic sc 2l hy qr eb p5 pf dq pa fy lc td sz oo aw u1 rj fl tz nx aq xx oz xb 55 y0
\n uq wr lh jv ri i7 ss qo gy bt s3 u1 dy ox hg it
\n ps hr lf jx bn qq up eh ab yl pn jg ng bz gd qr yw i9 j8 zi 3v oz at hd cx oj u9 rt uz ro ov
\n sq ga ny se cj id rg r3 pk kv ee sh ek dk sz pp q0 mn az kp ei qi ry em ph p9 gw hc m0 cp ea mn yf t1 5y wx ol e6 ec u2 e7 uh uj uk av ql lw qx zr qv mw qg cq ww wb pw tu w2 mf ut gk af yo ie ob
\n hn um a6 q7 af du r4 up tp ej sk lo le m8 rp eu ei qi ky op of tp ur oj hu tb dy qu gt tf oz wc s7 e7 ua pw ax nb wx wy fj wn 18 wv es yq ok w4 uz yx yc
\n pa qg qh q4 fv qz kx q6 cp gb c6 pr eh id in qw we bk wn qq b6 qy qu es ic s1 og gn wp op qf ic ro os yp rj fj ag oc ay da fv wl qp f1 yx n7 ea w2 ly yj iq iw rm o5
\n o9 ps d3 lp wr qc md e5 rk w0 pm gx lf ku qt qp to tc pk fb tb qi lh nt yd vt ot ra tg gd zx wx vj rq cr hm ma jp vg u8 rt ei it
\n dx dv h9 rf qf uw a8 qh uv k3 ri is yr r3 eq uu tz yn y6 qc ps jf wq xe wx lc qr j4 ku xx nb 4z sr tr uq p6 uz of i6 s1 fs pj tc hu qu hz f1 hp lj s4 qx tg yp gs ob tz ds sw pm ug hm ip ql le vl wq tb xv eq w2 yg w4 st o6
\n qd q4 pa z6 qz ia 70 r3 mb iu es r5 gh t9 cj vz qw mb ko vt qr qt gh qo ty eb kq n1 xb ef rp ek gu rg s7 rj sn ai hg o1 uj pr jt fg v0 tq tx ww bj bm ct w1 zi rn ox iw ri
\n al rd w8 vp yd yk r0 pi po se sr qa l0 qk ir e9 hm kc rz aa w6
\n un pq qd a8 z2 qk z5 ws bi xy qx wg wp t4 mj gv qm rg c6 w7 w9 es y1 g2 ej yz gg qc qn wq qw m9 wx qe kr 27 fp fq m7 xp 3p qr rr tr ij il eh au s1 uc fx ut qu sj j8 j9 ya nr rz wg wh eg x8 sl t7 yu vf ay ds ap re dh qg qh qj hz qk zz qx k3 cy iq ox qv eu nx n6 6r lq n0 y0 uq tb sy iw fm an
\n yv dc qs gm q2 cv ok wt b2 cj wu mr zj kn e5 iu pz r8 pe fp ot tq a9 y5 sz ez cl wq qq wv a7 ln ky jd qe qr yx rm qi ea ln te y9 ev en eh iv tx e3 as tn j8 wf xh co fl nc wk xz es rx ee wh ub aq u1 ar e7 up it iu o2 wl ko jo cu pc wo al hm uq rn ul yz ro
\n pw na wu jd yf oe qr xr sk wa hw ql wg x6 s9 u7 am
\n uv tr ub k7 qg he u6 jt gs z3 by tn bi av z7 jc ck q7 2n ny cx km mk rf pj xi lh sf up yj to ia ab tq fq pm fd qc qv ps su qw fu xu cm zb bc qr qt tn ei rw gl p1 xi qo tt ed ef ri iz yw oh tc uy tv as qu l4 qr t4 wx e5 ae op oa em tz gd dq rw ug dr ux qj be ko cg nl je aj xw q1 vv ax rl w2 yt aa u0 eu ah
\n dc ph sq jt ql un q5 cg lk w9 ur uy pz uo sx qv qq cc ln fu ym ho su pn qa bq pd wj wj yk ou wl rk o2 pt uc km ja wm ry rm ob
\n gb pw qf we q3 ls q4 sy bl lg q8 t3 wl rg ed io ef if oi hp lo kw wy qw ei yz rt es p6 fp hi qo bn qw wg cy np uv yy oa uo ir of em ug x9 qh nj n8 ea u8 er w6
\n ij dg cd lw gk wu zl dd eb eq sg ia am in wq xt nk wr xj qq p5 pd pk as sd fn lj jw fk l9 nt wl oo fj sb u4 gs fx hg o1 dr fb hj h8 xc yq ch er e2 aa af ah ob
\n a2 o0 hn pd iz hw jg q1 jl qz ip le me wi bb r3 z7 g1 eh td sw g9 qq c9 vy ud qo es ec tj uw dq ur hj dy oe zp lk l5 fl wj ys t2 ej t4 ek rs sl yu oa u3 gd pm rw h1 pr h2 py wl 2p s7 wq 6r mi 10 ox o6
\n i3 qw ee ur cy nx r2 wj t2 ub ir aj cl qm u0 oz
\n qd qn un qz xy nq an kg hc c6 w8 93 eq ts g9 wy mg w3 rb 3f wf rw kt op es ef at em s6 pc wg bw x1 xl wg hl yk yo eb ud hm hl py wb u4 zp bj bm se sr sy ox am
\n rc ix qs ls qy at ut pk yo ys ec hs lq xv ks
\n yb al zf ws cn ac ih th ww vb kt b3 xo qe qi te ea p8 tn qd ci ix xk pk bg rc tl f4 wb rb ru
\n iy qd a5 jq jw qh sw fv oz cj hc qq ya ee yn pr av or us iv fa qb q9 bh ns d0 qe i1 b0 fh qy qu qi ry os ul hq ri ix e1 ao p0 qt sf qi uh ll ko lx nz sg jz hq sh p8 x3 wg rd sx yo yp u3 pv rq ds tc rr wx lr xb wn ep hh bk yw q6 og yr yg si tq do if
\n hv qa qf jg he q1 kj qz bh lr kn rj th kz ef eh av pp i1 ar gl ur lr bz xp yr ze qt tn es fl hw s5 qa ed t4 wz sx rg sv e9 fz hf al h1 av bg ym ee yg
\n k8 nn jy q4 wd lf xu q9 a1 4v yd mb r6 yh pb ta g6 dn d3 pl j1 jk wc cn wy 26 rr te ti fa e4 uy fb gr hb kd lc qf p5 wh au fa iv xo hf ot eg ra wv tp ec yo ah iu pw hj ac h3 py k2 u1 wb rl rz yt er w6 ru af yo ep
\n qd uq qh qm q3 vg qc c5 rd vp ut eq on yn ii xp up r8 d0 sz qx ue pl lx qe wr qr lm nh qt ha qo ki ri e2 tx iv ao s3 ow kp xf rh ya r2 rk cw nt by wd j8 t1 hk y1 ns t6 wc ev sq rq yf ux aw ch qs u2 zn sm rt wb bk yq dh 8w w3 rc yg o3 yi ox ov ir
\n u0 q7 qb ml or nu b5 1l xb tr tp in qt hz so v6 dq o2 qh wl nb rv fw
\n ss jr zf zh xt oy hy aw y8 js ob wq ny or vy fi en tb qi j9 gt ib ot oy rd e5 y6 tg th pt gq wz rt rl ew fm ie ri ir ro ah
\n o0 qj h9 wy ee g9 gk jd fg qt 3d fu ru iz tl fd tv ad hl wp oo wf nb ez sv tl f4 dr oy rp
\n ak il k6 qh q2 vd k3 zd bo lj k7 km 5c ut rz yd up ua is r0 qn zq wq j1 qe cv pw fu md bw yw qq ra rw qu ex ik at y0 ru ti yw fz ic ao ow gm jc i7 nf p4 fj xg kr br xk bs mb pk hl wl ta ez sv e9 us om rw ap gq wl k2 qz h8 gu kf et ru tq ag uz rp
\n yb az dd fu rf hw qg we u9 o3 q5 q6 ag c2 o7 wa kh w8 vo mc yg tu ua uh ta tw ih hu fj su bg ww bh kw ry ru wy ky wu wi fw 20 b9 qo ik oa ev hw s1 e1 e3 fc uu s5 tn qy hz jc do ou jq gb kf pf xl x3 yv lz iq eb e8 os sn fx dw qg ql wc ka n8 gf ly se tv yk di si o7 r2 rp
\n il mj vi sd ia y6 wq rm p5 ux ho nr ef ej wq iq fn
\n ft cs uo io er ic tw ig mm c9 xk ab ze uw i5 s1 e4 pl ui f2 lj p4 sf x4 kz ej ez eb ov of rw dy av qh f0 h5 ki qx cx eb og gk oz uc
\n ul io zd kn w9 y3 wt qq wp jl i9 jk ca h5 wx wb tm do
\n iy hv cs a2 ee yz y6 gk kq em qy uq ts w0 rq rr vt pb nc q5
\n qn q3 vt vu yk ej fp tw zm qq qy y9 hh wo wg rh ep x5 wk mr el l9 av hz w5
\n hq qz wy cx rh ur w9 e8 r4 fq im fj gj dm qn gl jn iz l1 yh mz rw e2 qo wh nt wk zw t7 e5 iq fh eb sn ud az uv fh sv dq q1 ku zs eb ue xq rn o6 do
\n ub lo sq wr d1 mt o7 ts t5 rd xe iu yg ot gg se pp qc js lu xt j3 j4 wt pc vz 5o yr qw zw qr eu db sy eb em fo i0 ad gw m9 ig ih lc od n4 pg rx bi ni kq wl aw e7 az jo mk bo wb ei mi ep wb eq di do
\n q1 ub xt db wt ws ik pl ee or to ej ic is fr jk ls c9 qq yg qt eo rw tp p8 dy pz gm hz or xs bt x8 t4 t8 s7 oj lt wv vx u7 w4 et ox yo
\n po o9 ih dx qa rf qf pd d2 kl ad lh kb bd qm bb b1 z8 ew d6 yg d7 ym ti eh ic iv oi y6 sz dx qn ut qm gz pj zw jj 4d bk wb lm xb ke yx oo qp yb yn en fo yw fp e4 aa fd jz qu gw qa zs nl v9 wf qt qi vg ni wx hk 9f sz tg t0 ga de re io av h2 jt x0 h4 wx wc fg rb rn nc yz iy zp ds ep zw pr xv rz yh yk zp do hc ep
\n hb ty z2 qz qz zh gw mg kb ve zz ti tp py el jp tg qc ar qv gx la qr cn lr nd ng ve qt 6g ml op pd uq uw eh i8 uy dt ho j8 wp wd qe xm w0 x4 qk el e9 pb sm pn tc gt ce oj jr mi ds wb ym ew u8
\n ij yb hn u7 cd gj co dp lp b2 r5 ed ti pn qx g0 jb jn jj we bl ri ot pi rb yc sv ty oh ph hh e4 hy sd wp ll ft l7 wh ca ys wf wb t7 sv uo sb sn ha pb sw de un qc bz wo en as tb eu af eo
\n d2 k0 wr q4 q5 c2 sj iv pm g8 m1 l1 5s ij aa lb xm vf ej ta ar th od sm cw gy bu qd q1 u8 ry rn
\n qa ux q3 mj ex yu zx rk gi rl ya is py am tw ja js db ps dn qb qn gn lc pe qq vr qr eo qi ec oa ev uz yq of in ho qo jj jk wk wd zp wf lz t8 tk ha pv fz pn ug o2 pe uk kv gq v7 oi qv wv dj tv fn fw
\n dx a3 k5 um uq jd og nn q5 qx cu wp rd ws d6 px ac oe rb up tp ej ek ih ff qc gj qm xk b4 dz jg sq jh eu yx eo re es ul yw tp i6 pj ho qi qf sn og xo yv pk wj wb go ar uo eb ir iy pq uh qg h6 vt wv sn n0 rx af uz hx eo
\n yv ub ty gn gu fu dm ca q2 d4 cn ad iw k6 bf zl zz 2o w7 uo ee yk ix g3 am fw oi jo se ha vs qn iy qq 24 bl j6 g4 cw jv 1l ei qy ke j4 qi ep of ao hh tb gm sh lh vc uf vu wd p6 xm qt kh rk l9 s4 wh mr t4 oi rf iq op ox u4 e9 fk u5 it re uk f0 kb nd qk ce jp lr cy js qd qb sb tq n7 n8 ed ue tn ox o6 id r2 it
\n qa pa jd qn qg jt gh q5 lg ag qv ah qn vr da rh w7 b2 rz rx d6 d7 eg eh yl a9 ek dl tw sc hp ha su gz lo qe le ns kt qy qi 1h kp mz qu es yb yn p6 eh fs ok as im dy px gq qp qs l6 iv rl zw dr 4r hi wj rp t6 go s8 e8 at e9 f3 ak dg f9 qh pt dz ww rv wb oc pv be wq cs q1 xr xx eq yr u9 sr tb yl tq if hc ig
\n a5 co dh bt lw ck lh w7 3e mp r3 rz yf yh uh eh td y8 fg pa ar va dm su q9 d5 qw re vh he jc 1g ib xz qq qw yg vt rn rb cb ry ym em i7 hr ff f2 qp rd lx wg lb kh va jv qi xd wh wc el un sz tf gu oz ae e9 e0 iu dr io dt fb dh jo um wx s5 oa kx ly rn oc zy f3 hb tt wb u9 oz hx if ig
\n ak o0 qd q7 eq g1 y2 pt dk g8 qb vs qe dh 5i pt yh qo ul tp oj sp oq di uh zg xn rx tp tf ie f6 cg rv zm xw zq 5f md sr yk ru ro
\n a2 tt ub rs ij ml ow pe el gd va ue zm sa pq lc yw qi qw lv ep qo uj ym tl ye hj s6 uf qp 82 fk y1 wl oi t8 fk pb tx o1 sk lm oo xv n2 ad fk n6 dp on q6 rv
\n qf jf kk nm oz q7 b2 xo fw kj rh ua oe yl gh vd qe gn wb pt wi z0 se gj 48 of i5 oh so hz wp ae wg nc kg xf ev pv ov au iy az f7 qb q5 eq yr tv yy ol ry o4 oc di ep
\n po o9 dc a5 jd z1 sq ws b7 ti r9 sl ez aw tg zm si ng qe ky b5 pp eb od jl ff oe ce qp gy yv qk r4 xf kw iw sn tx gg uh cq ql qa 2s mt eq rb dp
\n qs qz cd dl se q0 lv eu yi rw qo uh uj ul en tx wo qd e6 pv gg je zx kp qc q3 ye en
\n un qs qh se ws lf so eq yf ef y3 g4 zb hs q0 no qw j2 y0 uu fb di f1 kq oa ul t3 ot fh ak yf fv dt f8 jo sx wx at wn cs lq zc
\n ub qa qs ik pw uq a6 pd dm d1 qm d2 qk cv zd bi wd ne ah qb kg kh ij 1p rk w9 wt r5 d5 px uf eh yk oy pm i2 hp st qn si qm zw we ls px lr ri qr sr db hp qu xk fy os eg en uc ur i7 sa hp vn qs kw dn od rh xj w9 wk ph ap yh el oi oo e7 gp ay s0 f4 gf az jy qk ql qx 1k v9 qc jq zy n5 kg hd ww wv bj hj ur er rn ry eo o7
\n df a6 dn je ql no q6 ox wo zl bn rh ya mv e0 yn pr gd pi y8 i4 c7 g1 j6 wo rv eu xg eo c0 yx ea sv os wp qw wl ou un t6 u2 os of f6 dt f0 jt wc ja ae qv rm ds pq y7 qk ck aa ux
\n db iz jk zd wy wh c3 zk 2o rj hw vp on ed ac to g2 r0 id ta th qb dm pj m8 np oe pu bb tc gh ml rq uf tu eh ye tx gv pk jv j8 lk xs kd fi mx be wd t3 mr wk wl td eb ie tz dw rw pm re fb dj h6 ql wz wx qx qv u3 vz xe ex 2w ty ew xm oz an
\n ty a6 we wi ro lj bn rh r5 g4 aw jd q0 gz xy m6 wu qq et oo ex qp tr y0 fi au e3 oj gm px lg wk tu ek tg u2 ov em dg uk nd qj cy hp wq mi bj q4 ia fm r1 ei ie ux
\n tr qg h7 qk jl kc jr am mo w8 e8 td gl kw sd jo qr vl gs qe b9 mm fh eo uh ft ik e2 i0 uu ff qu f2 jq v2 wg kg ek aq wm yi yo s8 e8 sq ab cw wt ck pb pn xl bj yq wm ew xq su r2
\n qk wa q6 jj ws ut gd gf ly ec pj sa pd wl e5 wc da kx zk zz zb wv rm te
\n jq uz nv ql as jx z8 q7 o7 yt rl ea e0 ym y2 pr ia sz sq sr qq qr vk oe pe lr bl ll rm yx y0 eg ti e1 ue uu ui jx zd oq kd rg lv lb r1 fx ro me ts ay f6 fc io qg py qj qk qs ky qh y9 ok o4 am
\n qh dl jt wy a2 yk y2 i4 zq kq we bb dg m6 qq zw rt ta tc ff xs xd qf g0 1d yg du wz iy sw tc dd hj hk mh ov zi wn hk ee yj af
\n ra ak uw q3 cb ji wy fw gw t4 mu ts qw ww rj vo rl yd ug d9 gj i3 zw qw wy md qq bv rn qy pd tl ic p9 hr dr hh ui sf f1 i6 ws cy es ef t5 kr ek oo t7 ec e8 u4 od dq ji ch jr zm jy q1 zp yq 2e og yo tm
\n tr tt qa qd jf pg qh jr sw ao q3 qz za wt js bl vw q9 ws uu w0 ya pl yf rx ee tu r7 gg dv it lo ww up js qq qe lz eu qy rr yb ri ay ye ta tv im sh ss uk qd qf bw ro sl wl t5 e5 um th ha fx re ii fv je hk ot cq km h8 ks bk vl qn xe te tt rl u7 iq ry ag dp o8
\n sa z1 qj q2 nn wr z5 mq xu q7 gv t6 w7 r4 c1 mb sd ed ym ot ta ht ts tw gd tf g8 se ar gh fh qv qn zm hs qw qe oq wc xj vz xl wo rn i2 sr rq at yq uw s1 tx hy fm pc wo hv gy vu wd lc ul p8 wk wk el oz oa rh gp pn gf fx fc f7 rr dy x9 uk f0 py wl v7 cr ch qs wv nz lv lu 5o xe ym ly er yi ia gl ox r1 dp
\n uv qd hm qf gp k9 kj we lf bs ej 2i el wl t7 rj w0 rz yf ys r8 tp py tq tw dl im qc db qq sd ry c9 oe if qw aw qu uh tt p5 p7 p8 oj zi oe qu qi lk j9 sk zs ka lc wh wk zq mq vh t2 ej r9 mr ez t6 e5 op rk ga pb dq ap f9 py qk qz wc kd pv bd sm dr u7 mf o3 yk di r2
\n po a3 uw q5 q7 ck kb zj td zz yf jd wq xh ld qr w4 p1 ij fu tp qq sv y2 yt t6 e5 op dw iu pw jp ka qv 4u qf rm vb w6
\n fy a6 qg cs z3 ql dc jz wy me cj o6 ba kv wp w8 ea r5 uo fw ib ig g7 gg sy bg qr cb cq ro xl xv ex tt ru pd hg im oq gq ao rl pl aq sz t7 e6 os uf ug gg pr ql qz vt mj px wb ci qf ov be bg ww mp mi rz u7 w3 ei yc
\n gh cm ca rg uy pm y7 g8 lx yc qi re uh yn uq eh tz ph wo cr sv fp kh sl oi sx ov ga iu h1 je fd rv qv wi jy yy ry o4 tq si
\n qd iz qh q3 cg lf wy xa ez eq om ug eg yj fo fp yz qx qe wb or jg xb c9 p4 tj y0 iz tc oj tb i6 p1 ka zf qe yp wj mv ra ez rd uh pt zl lm sz wc lr bq oc zq sr af gl ei ux it
\n a2 qa h9 qh q3 fn kx ve wz us sj yz fd g7 vh c9 xq xj ln tz wp wf kg kk by vf j9 5y un yi e6 rh sn tx hj kn rb
\n un qs fi qm jk js bd o8 bx vt eq ya xp yg pz ym dj fp tp ta oy dl qc cx qq m1 rt wn d0 wm yr qw aq qt tb ha p1 uk yn ef tj gv im sd hk pz jx zi wa wf ba l0 wd mq ej wv t5 ek iq pv ov f3 em ak rq hn hh f7 uk qh ot ju ng ji h7 wz cr v9 bj bk rc er ia is iw ei id ov pu hc aj
\n pw gm qd jf u6 z3 jl q4 wt bi lr id wa wz w8 ya ev ew r6 yn ee io r8 ip ej td im si j2 jo d7 m4 pv iv yg qi il ti i6 ta ib ap fb hz wd vu wf wh kt kh og kk nm rp ti ek ns t7 y5 wc ae ir pv hf ub wc ho wb wn mi rn w6 yk tm te rp
\n o9 un h0 a6 pf iq xi tg w8 z7 r5 om oq eg or y4 sr fh zv vw zq tc ws rq db rw eo ym tl fv i9 pz jx j7 hx oi qf x2 l9 x3 qj by to el sx ys yd ao az uj hl dl tj nz wn kg kh on wv w1 w5 gl ei
\n gv az ql nm rc r7 yl ja zn q9 xw no iz qt pk x4 l4 tg u3 of zk wc go qb mo eq u7 tv rm ig fe
\n o0 ik qd um qg k9 mb bu wy bx ny ws hm ea mb iu pe eg ey sh uh g2 ic iv aq td qx ja qb ha 2j lp xr wc vl wn wi sl tx qt rq ec vw of uw sm ic qy j7 ns hb 2q kw wf vp 1f x1 5c zs y1 rg tg oz e7 fh eb ie up e0 ap ve wq zz cr wz h8 wv go ly fk az pr rz h4 ew w2 ok w5 ia si ro am
\n dv d2 cd qc zt 25 xp wd te es sh eo wn f4 wo tv oc
\n uv qd qn hr bj b1 mw lg io sh lo qq xh m6 28 rn xx p7 im qt jn jw bm qf r1 mn ny ed em ii dd wn cz ds vc wb hh q4 yw ur rt ie o6 ux r2
\n rs dh z1 jr cb vq r3 eq om y1 sg r9 if tw qb qn m2 vy dc b0 ik fa ib aa jz qu sg qs 1s be of w9 oz sv t9 oc pv rw dd o2 dh lq ka lu 1m qk q4 y0 ye yq w2 w6 si ob it
\n o9 db iz fi qn qj mw 3v wp li e3 km zz yy mo ya rz mv yg r7 yh pc or r9 pm a0 td ih db lt pg jf gl re ww qw m4 j5 xi wi yd nh yg qr rm et ey ug re rr y9 y0 sb tu od ay of iv oh i8 ok uu sf gq lg wa nn uj qs cu kd xb wg cs 3k yv hk pn ii hh cw km wz wx wc n3 jr wm qn bd zo mo jo wm q6 w3 rt an hx ah am uc dp
\n ps pt vq kc bs vu vo xu ee ib hv wj x3 nu ud yf qh wb lb gz
\n ra o9 qs ty rd ps db pu qj u6 k9 nb qk oj ql wt jx bo ri xo o0 mk rh bm mj ut mb rc yn et yl pm ih i1 g9 qm xq oq bj wt jp xu pt bc eo ep qi ky sv uk fy ri i5 im dy hk ui tm f2 uf ug qd kf nc s3 fs 12 t2 ro du wk ek yt ej t7 s7 oc ay s9 pv fl s0 gf tx fc ac py v5 qk ce qz cr jp ck hp u3 zm xr ii yv ea cf hj ye w3 tp do tw ux
\n o9 qa ik wt kc q8 wk sp yy w8 w0 ys ea om tu yz pn fe ae g9 ps g0 i4 qb fk qn qm ut j1 d6 4d cb vj vk xy j5 be wi ve qq gf qr j3 ug qo p4 sm s2 ut fd pl qt jz ui qy qu qa nk fj iz xh wk iv qz fb ro x6 ti sl rs hc oi wm us ai dw o1 hh ab qh qj ju ng wl zr vk tw 5a vc hk md yr u7 w2 yt tn eu ul ah
\n uv ra qs ty jh q4 o4 bc vr o9 rg jz mc r4 ui g1 ey g3 sj am sq fj qn it xt ln dl jh b9 g8 dv qt yz ea ue ss w9 wj kk bi ym tg t0 ob ys iu uj qk nf v6 nj ox qb wy mw 1n pq eq w1 rx rp ge
\n a2 pa e4 xy yd sj vs jj xj lm qy qp ri ux p8 pj tv xs wd wf oz gd sw rw uj uk qj k1 xx um eu bx my em ey
\n az h0 qz iq bl kb xp yf y8 qn rv hx oc re gt k2 bo qg cf rl
\n yv uj d2 mq hx ws w7 mv yn r8 ab an ae jn xw al up be qr zr ep re qo ec ur ap hp pn wp i9 rf wf vo qk t8 eb yd uk kv ww wx wt ox kh mi eq yj oz fn ie am rp
\n ik df qg jg k9 wy kc ro wi ve bb rl ew io or eh sq oi qc qe d7 m4 pu gd db oo yv yq ix eh fl pg ib hu pl cr fr xd cy ke mx yh wk ag hf hk qg sj we mz gp u4 ak ma rz rv af ox di yx ob
\n hn pa pw qd il qh q1 z3 wr t3 wo ws vu uu ld pe fo dj ot dx pp vl qq rr ls j4 fs dl ve c6 rq ln xk ec rt ty ik y0 tu yq fz s5 sd sh jn wa uj ws lx qr ca rz wd nu ek yy yi y6 uo os up f4 fz qk h8 qc wr at 18 ca ww rv sy ox o6
\n dv wu wo uo m2 we rp b6 qe ik e1 bq w8 x5 ez fh u4 iy jy wu li f5 u8 w3 yl te
\n sa ds qd no q5 ra jd qo ru r7 uo ar ud on ak fv dg wl qx qv ye yl ep ge
\n ss rf qn bu gj rj uo yz tf m1 kw zr oo y0 pf tc dy qu v6 xh t4 oo um df je qh dz v8 ho wn wo 0w dj rv o3 du ro
\n ij uj k7 me lg ih hv ws rj pl sd uo y1 yk d0 pt y4 g4 ou tw sq td fj ha qm qq 4a kw d7 xy m5 bx c9 yx nw tr qo uj fu en p6 s1 ht tb qo zp kq x2 wk wj wk yt wz sz ae iw ay fk ao ug pq qg k1 ql xx qc cl qk 56 bn oj yt et ut uy tw ir yc
\n k5 pg cp z5 wr no zd tk ej an qx gj i3 su we up 3q yq fx ib tv qp ik wj yf u1 os rk jt qo qx n9 w1 rb
\n k9 uv gs wr 3b mh km bm we w9 es or yk r0 g5 aq gf nq qv ll m5 yd zq qt qp sv ed p5 of eh i7 pz hl sg jn wa m0 nm kf w8 wj de e7 ar iy pn ly wn fx w3 rb ey am
\n o9 d2 vg gk ex rf rc hy qm j4 ga qw rm ls yl cm en tl tp fp tb i3 qy qo j9 vn zf wf qg mb kj qi jb mq wl rj s8 lw um zt wb f4 xw f9
\n ra go ls qx wi c6 b0 rw g1 yz fe g8 ow qq ra mz ex oa fu iz tl uc e1 p6 x1 tf rh tk fz ap hl qh k3 xb mw zm yb yw q5 aa rp
\n yu qx sc xe j2 oq gs i6 i9 l0
\n yv ss tt gu fi qj bt ql ls io nw gk hl up zv gl ni xt wy dz qe ud nw rw qu uw e4 qy px qf zw za ty ek t7 pv dg ho wn uq rx yx ep
\n ga 1w ld wy o7 xr pk r9 g6 hu jg lx sd no xt wr zy ku l2 nw 9r rt i5 to tp tc s6 f1 ud ko xb rj qy es t0 f4 fx ii rr hm hj fb ji oi n1 vk ci 9e mt yc 2r tv gk yp ux
\n hb hn k0 wy m4 w7 rc ts y6 j3 qe ve qy rt so di qo dp lk xf mq wl em f5 pr wl wn 3k ew yt w4 ri
\n qa ss lq wr bx t3 r5 ed eg sx dn we 7n ra qe b9 rm wd rw eo oa ri e1 e2 ut ap hu qo ws uz ai tz nl cu wq ln wn ie aj
\n yb rs un hm dg qm qk ao mw fn kv ur uo pj e9 sf ia tp tw a0 td sx fg su xq m1 om na vk wy xk em l1 z0 nh b0 mz qy p2 ru au iv p9 pz ug lz xn wf xg fk zu wj wd u1 e9 tl ak hf sw o1 pw dt gq v5 lm h7 nn nl wq uq zt o3 ad ry id ig
\n a3 pp dv qs gn u6 jy kk io dt wt ck rg ua yd ya yh ax ac y1 pe pc pv fp fw dc qv zb dn q0 ju jj m1 ui lx qe qr t9 ja he wi vw m7 l1 jn qe wa qt xg rq qy yi qu p3 yb ed en tz so s3 tb ho fm px gw zo lx wf sm mc dq wj yg l4 uv yk tp t5 wz ol fk f4 pe kc dj wz qb zm wi br zk ww ty 6i vm eb lt eq w2 ey yp yz o6 ei
\n fu ga io nt wp jz yf rv oe eh pt dz ih sx qx g0 qm hf xe lz gc d5 bh 2z cn d9 1r sz li bv qe 6d bb er xv yx p2 ea tj p5 ay uq dq pg oh qt s6 px sh ko qa nn oa bq cs kk hh cr wg tu y4 t6 e5 oz th sn ov u5 dw qg dh uk n1 zr qv 3d n4 yx xe wv eb h4 yo ro hx o8 rp
\n ra gu hm a7 jw qm qh jr gs k0 ql xt q5 dt ru wy k5 wh fw lu kb am m6 bx vy qq ev rk 2s mc yd es io pc pv g2 ek tq tw in ih ae qc d5 ui qe wt qq m8 vr nb ee hu rw rr tt ed fi em e1 e3 hh hi sh zp qo wp l4 ws qf qf pg eg to un gu u1 t9 ox e7 u4 od ds de hh py ql h7 gy js vz gf y8 uq se do ro rp
\n qs qg gk ta bf r3 hw r7 r9 sh ua g3 sq td g7 ha lu qw xr wy wu rz ko bb i6 uy as di qi za hv jw rf 1f 2u va ap qi rc du wk yt t7 u2 ob re ax v8 cg qb wy wn kg pn yi rn ru
\n gr ra jq qf go ga jh gs q3 tm q8 k7 o8 mj ym er ip ua ej hy i1 dv qb vg cv m5 wy xk g5 wi ng w3 3w ud rw ug ep hq ta fc fd aa i5 hx qp wp v7 qs l6 l9 l0 jn ty t6 ie rj tk od ys on pq tc zl nh qc xv wc cu ks ei lm vm cj yi ad r1 si sp
\n qa hm a3 ac q9 na rj if qw rr vw tj ib su qu wo dp j0 wf pf 2y wk ym ra wb ae ga gs f8 gq im ar pb ec f9 yu rm
\n t8 ej an y4 td ez ln z9 lj qy sm uw dq us cp nu tg vn
\n qa ds k5 hw k8 k0 ql hl 1r wi fw c6 w7 mz rj xy r4 e0 ym yh eg r0 us fs ib oi qv q0 ww lp gm ln bx nc qi l1 qe wg ea qo eb tk eg to ur jc oe dp hv wa 2q nk at rg wf wg ca xk jn pk yg er ot uv wb aq ol wx e6 ev sv uo vf eb ah ud da pe qg jr kn ju ng ae wv n3 iw ly kf cl pb wv tt vn eb vm u7 ew aa w6 rm gx r2 o8
\n q4 q6 vk d6 eg pc pv r0 tw i3 q0 we tw sm e4 ow sn kg up hm qx zv nz wm u9 ul ri do
\n po uv dc qs qd hm qg q2 jj sw kl me q8 xa wa xf z5 yd r4 rq sf px ti ia r9 yl dj dk ek qx i2 sr qv qb lb wi nf wu qe tv fh qo rr yb fy eb ri ai ok qt pc ud qi qa ws qs lc zh nc x2 cs t2 di ke wk sz oc yp s9 ys ai ln wz cg wc wv os qb f2 ec y8 dg wm rz yr ee rn sy du su eu fm ei o6 dp hc
\n ft qj q6 wq up ut lg er uw db ll ws of og e4 1i r0 wx fh th vf re hm zi
\n a3 ty pw ph cg uo r7 oi q0 lb c0 vl xx mh hu b9 qy tt sv p5 eh to hh ow tm oe si sk oi gt kq cu vi j5 wf el tf yu u3 ya uj dy qh ql ct wc el y0 o3 o6 if ge
\n a3 dd by wt lf 2v bl 7c bn cf yo go yf ii et ey yl aq aw g8 ho i3 qb dn qn lu vf vg 2k le ml wy t0 4h xk qw b7 bb eu xr qu tt y0 os sn tl pf og tz tx pl ss us xd cu oa qd xn ke qf vp kh ny wk r8 ej rd t7 sc e6 rh ud tx al gg re hj ux qj gw xx zv xm iy ca vb yw en oh u9 aa w5 w6 ul oc an uc
\n qa un dn hq hw d1 jr jy kk kl wt kz zf z7 cm q7 me xp wp mj rh ue e7 ys rz eq ew ed xp ee yj y1 to fw aq po i2 jn li on m2 na vq wu ck er yu db yi gl ty eh uw fp tx e2 fs uu sf jx oe jb qp cy bn qd wg 3z nu j9 mr t6 gp pv ha tl ai fx uf fc kx qh gw xb zt qv qb ir cq vb y9 ct ol fn ah hx sp
\n db wt cm ch jc wi dd ys on td po y8 q0 wq kt eu tc tv or fr s3 na e7 uf gg re f5 tt aa tb ie
\n a5 jw qh q1 qj oj xy my b6 es yg yl y5 zm pv qw qt qo ea ri ao in s4 gv i0 ad lh wa qf gm rk vs oy r0 ez ab lm qs qh ry ox
\n ga ca z6 nr wo rg bm vu uu rj e0 ui io pe eh d9 ab tw fe tf fk wy ln md rk sk qq qw hy kp dc qy y0 p5 p6 p7 ic pg e4 jb ge wp qa bn xf ks zi oy e5 um wx ie yp fv je ng oj ja v9 bp qv er an pu
\n a4 jw a8 o1 q3 un gh le nq q8 ig rg rl ea io er tu fp dk hy sq ae lo qq wt wy 5i xj cw dz oo qo yn ty y9 y0 sb ef tj uw ta ur i8 tb oq af hz qi wo sk zs qs vi wf kt nb y1 oy wk r0 ol ex ec tg t9 eb ap fv qk ji cr s6 et xq bf ep mi ax rt yu iq af am yc
\n gr uv hv tr a3 qs lo u7 jy qz kl z6 gk gz ag kn rg k0 w7 wr rl pj ii yh up ac ot d0 g6 td y6 fr se pp dc g9 cl gx qw pl m1 ii qr vz oy nf eu eo p1 os y0 ri ix au uc ai fx p9 nm jj lz pa kw ul rg gw qh 4m eo qc l5 rp oy ej un yy yu t8 sv ud fx ac dy av gq qj ve sl bu th u3 rn nz n5 zm yz bd tx el ex n9 es rt rz rx ol sy rn yo
\n ph cb jx wu ib vb ih ty oy tl vu
\n df qd k7 z2 q2 ju jz zf cm mw yr gu rx yh ym ef pc qx jd q0 ow pw wt rj xo mf xl qq qw ud tw ku ik oa od ti hk f1 xs qd wf dm s2 ph xo ou sx ae iw t9 eb u3 rk ak hf dw ax oe zl zz wm sm el cx cw lq za tu yw rx yu rn fn yi ei
\n a6 ql wr jx z7 wu xi ym fo if a0 dv ww lx zv dk tu sn hh ff hu zo ws rf wf aa ni kq uv t7 um go e8 ob sm tz hl uc zz ol lr kc n6 bk ry if
\n qs gm tn rp iu pi qe ec to l1 wh ra wl it kx fd vx q1 ri
\n gu pw qg we d4 ws q4 cn q5 me qv zj zl ex wr xo yg r7 eg et ey us iv po aw se cx az lb nz nc qq ew rs rq yx ep tj uq eh fz hg gv jc di wp sa nc ya cs fv qz ti wn aw e7 ox u6 pn re o2 hm fv qg hl dl v9 qv tz 6y rl ye rx ur tn eo
\n gr qj z6 ld tm jw hc ed y5 se ke ht tn jb 12 yt ek ao io wv ew ey fm tw ir
\n gv fr ak o0 gb rd dv gu qf qg qh jg ux qj ph k0 oj wa jz bi ja eg c1 fe qn b5 rs rg b1 vo z7 us d5 r5 ii tu yh y1 or ek sl pm hy dc th sy ww ze vb wt m6 iv mj qe 6f qt gk tq ru yq au ap dr hh qy sf qa ik kt wd rz ej t3 ot ej ub wx oz th s7 t0 ag ga pv em fz sw o1 ip qh nd h5 et ho cu yz tq wq wn et tn yo si ov a1
\n ij rs rd qd pd qg qh z4 ql ip nq q5 xu bz lh o7 my 3w xe ws 2p w9 rk es er d8 pu y6 qc gl bv qa rv qt rb os ru fn qy qu hx or wa qs at zg mx xo bg yh ec os eb hd rw dw ip vw ki ok qx cu wb sn wm yj o3 tm ei ah
\n tr rd pq qd um qj u7 q3 cf db k4 gl mr gw c3 bs k8 vi 4v kz cg rz et ey tp fq y5 el gd dx qx hp mn cz wq xh m4 av t0 vz m6 qw tv rq ei il sb tk eg uq tc wo qo zs rd nx 2y fo j5 l0 l1 hy vy t3 t4 yt va y5 rg e7 uo ox at ir ys hd uf fx re rr ac kc cq qk cw h6 kp xn zu bd cz ca pn pq w1 rx vc w6 yo is fw ir ov
\n ra ij a3 qs qn jf qm nv cs cv kz q5 um q7 q8 km ya ys rx yn d8 sh pt fe se js ue rk m7 wp et ei qy to tz s4 af 3i lc wk ej hc ex t7 oc sm s0 tl fx re fb jr jp qc kc jr cc w3 yl oc ob ep
\n sa yb qn k8 lf d1 c3 wp vr wl yd iu kb sz g7 mn jm lz sd m3 lv qq j1 ex qo ry ru em pk i3 hi rf fk nc wd vu yt td sc tg s9 tz tx dh x9 qh ku dz my yr w3 oj se ei gz tq hx ah fe
\n w9 rl rc or fq a9 pp db gj hs lc qr ec p4 ph hb x1 ez u5 qx ea 6t tn
\n a3 qa dd qf qn qm qj vj wi ag wo ig e3 wz r6 d7 ax pe rb ey r9 is ot tq oy if hy se qx ar qb vd qw qe np xy nd wi in gj qu y9 ev ti tb qt px ud wo ll cy wd hw kh fp wk wg wh ym vo ub rd t7 iq yo ox eb yp ys au u6 rq ii io pe qh nz vl be n8 wv hk og rc er yu u0 rn yl is do eo
\n dd nn oc el yu tl rc rv r7 y2 hi qc qm wu cq qw xc kp tr fu ib zi qu wp vi ci qj nu zw t1 wl fh ev os f4 f6 f7 cd zc qx zy wu bs qn u0
\n o9 gr fu a7 qk xu q7 wp el yu fp ou y5 pm pp qm jm st op uc fx tn hl zs kp bq p7 hi ys qj ki qc qa n6 oj ey w6 yk si
\n q7 xo sr he uu sd s6 gy ws iz fk sw al v6 lq fh ie oh uz pu
\n of ch zj rk rx rc g8 i1 jk tv ul fi e1 ic sp in jl jv j7 nm rp r8 go hf wx tb oz tw it
\n se kc tj rx yh eh td pa zb qv c8 j5 ri eq b9 rm ik ev ul ti p6 en ok tn wp jm ws ke br wj rp en gd rq f6 ac ab zc rz ew tb ro
\n qd wr d6 i3 j1 ww if qt yn fd e4 qf j5 yh t8 u1 ev qc wv pw u7 oj ok yz tw o8
\n un pa jd qh qm dz pi z3 ny gs k0 wt xy z6 cj k5 gl bz d1 fq ye yr rh t7 ot if g6 im pa ps fk zw lx kr lv na wu vw eo cm te qo tr ec ty sb y0 i5 to ye so tc i5 sg ct qs sc ws qr xj 2u n5 rl cw dw ys qj yn qc y1 sl t9 ox sv s8 ya s0 tl ys rw tx fx ds rr cq cd ql qa au qg vg rx yt iq tn yl uz ei si r2 ob
\n rs gm qk gg m3 rj eq mv yf sk gx ve eh iv i7 n3 pb uf gh uj tg ox ww bg oz su o6
\n ih a3 uj rd qs df h0 jd d2 kj q2 ap wr ol nw bz q7 fq ir ra w0 eq ya r6 d6 eg ej pn py pp sr qb jb wq ni xe we lm be xo w2 qo mj qr hp tr qo qp ef of yw ai e2 i8 fb tm do dp i8 l4 wd p5 sn pf gr vs rx kz vh t2 wj ot ar t9 at ir dw qj nc cw fd sx qc mz lt pv br wv dr q3 yq vn ye yw dk oh rc w3 yt se ov ge
\n ds h0 jw he qh jr jt ql me na ah xa tf wt pj pk om 97 rc yg ym oe yj eg dl fe sz g9 lo qq qw wx rr c8 ns vq m7 xl gs vr qw qr kq qi qo eb tk ue dw e2 i8 i9 hy hh qt f1 pc vv qs bn ij i0 uj xh wk qz ns ej un oi yi rh od ha tl re tc o1 uh ac ip qj we qh lq eb w3 w4 ia oz eu ri uz ep
\n fr ij dl qk z3 qz wt z9 gq mr wo zz rd dd rz ee sw pp g0 sy vg ww iu pz uo cb t9 ld qr ei yx rw es ts zi wp wd gw wj hf r4 tt x8 wl t6 hc gp eb aj ai iu o2 nh qv ey kg dp wq f5 rt cg yw sr tb gj rb fm ro ah ig
\n ss ux q4 ji xa mj mi ld rl pj r4 rx yg ti ix a9 ig gj j1 ww ii qe j3 mz vl qq ye m8 b8 yl qp ik ki eg uq fi ok fb oq fm sf oe hp v4 nk wg mx kt vs j8 yn wc wj ot td wn iw os u4 tl u5 rq de io x9 dl ql n2 ji wb if hx
\n iy jk ql q4 wt kz fb q7 vy w8 ur ax uf ym yl py ou dl pm in sq ho j1 qr ls j6 ic sl ko xq jm qe qr qt yc es ry pf he i8 s3 pj tm oe qo lk wp j9 nb yd bu rs e5 yi ar rh ga ud al hh oe wx s6 do kv be pm w5 fn ey du do pu
\n po rd qs il rf uq of jf nf ih w8 b0 ur pl us ed tz tu ef rb sj tf ff i1 pa dv ue fk m2 qr wt j5 c0 vw xo b7 p7 qt zi wo gt qa oo qd bt nt zq x8 ou e5 u2 fj s0 yd sw re cd qx jp wc ja ga jt bh hm eq rv hx gx
\n iy ij pa gy qd qg he d2 qk d4 qz q5 nw wu am qm ft w7 rq yn ef oe pv ek fq sk y4 ts am fd qx q9 jf ju wy lm zw wp er sy qu oa ta jl ss gq fe wa p2 kq ws 2w dn xz t2 ej rp rs yy e6 iq ag e0 u5 tx dd pq fv jr qj ku oj ql wz fd s5 nj qx zt 2d qb nx pm ce f9 w3 er tw rp aj it
\n ss qa gm dm qk c1 jd k0 t8 mk rk tk om yn tz r7 px ac av ot ts if fw ez y6 se qb ha su qn cl wx we qr zt kr mj rc qo es tj ym iz tk i6 fa i7 s3 pl jx du qu j0 ws v2 wj ys yv wd s4 wf nm dt wv ub ez ta wx e0 fz al ap qg wr ar wb fj n4 cz qg wq fc mp yq ev se eu am gx dp te
\n a2 hv yb nv h7 jt lw xt lu wp yr rg 2o 93 uo pr ej ez jb lz ww az oq bk b5 wi qw rq hs te ea es ed fu ti uz fd tm jc do qi j9 zs j0 wd xv iz hr wx el ns oi t8 sc t9 sb fk hg cd rb mz wn 4i ov ln yr oz tm ro o8
\n iy pi jt kz st tm rh ya b2 om ef eh tp el in sc qc g0 ps zq nu pq j3 oe a7 ja js ng tc qe pp eo em fc s3 hh i9 jl qy i8 lk wa ae 1p vh ox rk em hf dd jt rn tq is oc o6 so pi
\n qs gm qn gw qb cx w8 ur yp up uy ek ez ar sy qb hd bx rl qt yi nw tt eb fl eh pg oh ib qy qi bp jz lf eo ph wh oy y5 om az tc ab wv wb kg ww eq ok aa uy w6 ag ig pi
\n ra a5 db co qn d4 bu qz kx me nr q8 my lp t8 gu rk yn et y1 ej g7 yq d0 j6 b6 qq rn kw ei yc uq e2 s3 oj s6 jl kf rl ny wg mw t2 co el yy ez eb e0 al qg km k3 n2 zr tk qb n5 n7 et tm ul
\n po uv a2 o0 rd hq dh hw a8 d4 wi z2 vt ww kb d7 pv tq fa ta dl oi y6 im ff ae qv sy si wq pq bn b3 lm b5 wi ku qu ru ul ri tx fb ss or sk wp qd w7 kh nn es hy wh rp um sx e6 rh rk pn sq rr hm dt ip dh pt wl h8 qc vj ly bq zn gd wn q4 hj yq xb mf ok tm ge
\n ub pa fy qf dh qj q4 wt mw cm k5 gw kb el w7 w8 mx ya ii dj dk gd dc gh st qb iu jk qr bz vz ab b5 mf pu qe xd nq eo yb pd i6 ue dq e1 qo wo sp 1o n1 4v at qf fi of xj rj dq ew nm x5 wh na ub e5 um sb ob em pb re ip x9 h7 zv xm bw 1v mp zr w3 xm ee yg rv rt ia is ro ep
\n cm bp hc rx y4 sr q9 jj rt qo uk ev to ff so bg eg y4 l0 go os ay tx qh hl qc wb
\n z3 nn o9 xf fs gd g8 ns ec p0 tb wf uv iw jt wr dq bj u7 e2
\n da td ta tw tf tt ay dq sf gi ae rl e1 gk af dp
\n un fi dx wt m5 vo ys j3 i5 ad nr wj mn tg ox bs ia
\n hv yv qa qf dg qj do ek w0 is sl ez sr i2 ww we rl vr qw y9 tu p5 uc hj i6 ud ws l5 qf xh kh lg wf wj uv tf t7 e7 dt qz ka xn cx xe fn it
\n iy yv rs qg uw oh q3 lr vq bz ab zm wa ds b1 w9 rl rz uy wy om uo ef fo py tw fe qx i2 qc qb qn ww vg ke wr j5 j6 oy qq ng sl qw mj yh xf yk xg qu te p2 ft y9 uk ym uq so fx ff fn qy du f1 na lh wo qo ge sk v1 wg mc dq wj iv 1h pk 3s ej oy ek td ex ae yi t9 go e8 rk tz ud rq ax hz dk qz kf wm yq cy w4 h6 rt ry tn r1 gz ux pi
\n d2 we aa cb o3 xi tu ti gd wq pl xg wc lm de e4 sj hc ic wc ra wc go o1 dd ip wl in wx js tv rx yi yc
\n ty um hq co ux ql q6 wi bc kn q0 r7 yz ib pm g7 po qv re we bh 8j ru xo ra eu ud qy uh ec ty ry yn hw sm e1 pg p0 dr qy oe lc x1 kt xz pl t4 el t5 ex sn us dq rq ao f8 pr md ql v7 v0 n7 kh vn wm u7 cz et w6 gl yo ei di tw
\n ub jg ph q1 q2 d4 q4 qz kl ld cn ji z9 ro ek gb w7 rh pk ea ax yj pv ot yl an sl y5 po im i1 zb fj qb i4 gl xq si m1 jj lb l2 ul sn ue s1 ta hg zu lh nd j9 ci qu wd bh ef ro ra aq t7 ex t8 od en fz fc df h7 qz n1 v9 zy 4a tc bb ea yw mf ia yp eo aj rp ob
\n o9 qa h9 dn vo a7 qj jt ji ne kc cj zh wo q0 w7 e5 ui vi ya wy c2 r6 ui px yh y2 to pr ab dj pn a9 pm tw sq ig hi bg ni ry lv wy ic bc li qe im dv rq xy ki i5 fa fv uu af do vv za l3 bi kd nx w8 nt lh hu ra ub un ec rj ua fk ir s0 f4 uf dw jt k2 kz ml cl km wv vv es tb yj w5 rn yo af ru ah ig
\n vg wr zh wo on ew ef ae ha id uf eg p9 ef gi al ng 16 rz o3
\n qs jw qh cv z4 ok wt k8 kg km wa uu us sj oy iv tw jm c9 fo nd 20 qw w3 yi re qo yv rr op qp ue oh s3 uu px jc hx wf v2 br ep wg pz t7 t9 au re zj kn xc bo kg 1v hb wv tt u9 gj yu ry iw dp o8
\n qd wi ij rh ef fe jm kw xj wh uk ef ti e2 j8 ou xo ny wh rp wj ub s7 pb nb qv ev o6 o7 yx
\n gr dc ft qs gm qd dn k6 lo k9 nb as zg bz lw ui ee g4 dl qv q9 lu jg rx w4 yj ep oo sv uq hq yw ao fc e3 ui dy du sk gc gy qs l7 kz ed ej wl un yu wm oc gq qk qc ks tk ti eq em ly vl er ry sy yo ro eo
\n lq d7 i4 7w y0 qt gw ch o6 eo
\n fr hb dc o0 yb hn gi jh sw kj we o1 vg nm q8 bz zk bf ml ev ed r8 iv ht fg th qv vz d3 ng xj 0h 42 ew vt yg qr qt ha qu hs qp ij yn eg of tl p8 fz oh iv jl ss dy zu or sk uj co kt rp wb wx fg ev t9 rj yp u5 us ys ak rw al io kc dt jr hl ln wl wz gy wy qv qb mu hd ky ku zp ww yw rl oh ee w4 yz
\n fu dg qf pg jg o1 dc by q4 st t3 lj ve jr am 2i rz ea lh pl ed pz y4 g8 i2 db g0 fj q9 qn bl en hr m8 qw rn qt yi ei yk qu xi uh fy yn ix uy gn jx f2 gr fi x2 zo pl vh ek sz u1 s7 ya em u5 da re f7 hl qh ju oz ar zb ci tk ob n7 vh og w1 ok er o5 ri ro tw rp it
\n gv ra fr ub h0 hm pf qj kk zf zh rj eq d7 oe eh ib oi gg i4 jd ph nu gc qw rr m3 vj ry is dk qi rm qy qu ep p3 ed pd ta s3 tc fd sa im ow jc oe qi j0 gt bm vm zf nj rg w7 x2 nr wf hi rp wk co t6 t7 e6 ag eb u3 e9 f4 om o2 dk h4 gq jo cr oz ka kx rn wn do ep wb vn ef rz ew yi r2 ro so ob
\n ft a4 qs pq iz pd u5 cs q3 qz ra rh w7 rk mv kv ee y1 to dj sj ta pn oi tf i2 th q0 vx vf ww 2l cb wt yq ku ye gs qe w4 qy qi xi tt es qp ed ef ti i7 tc pl jz ho zo qi za fy zy rk x2 r3 ht yv ex op ae iq u2 ag pb of dd h4 lq wx cy cu zy wm ry ef dj vx st ia ey te
\n rs al qd uq ga qj sw we pa bi ba e4 yy mo d6 er et ti rb py ek am ib fe y7 fh jv mn qe qr oe c0 l1 qi mh 44 xe ei ev hq ix e1 pg pj ui hp pm fr qs kd nk 1v wj fa wf yt t5 vp ex wx fh pn ug fc pq io gh dg oy nf v6 bt jo qz gu me wm n7 br tx mt q1 su eu di uz am if uc aj
\n da a6 q1 ph uv oj ji mp t5 mi rj cf jl w0 pk ew ii rv oe r9 ic id sl se su q9 vd we j3 ac d9 yw ew w3 y0 tk ao hr in e4 hu du qu jb wp cr qs v9 p5 vi xm kf s1 ea t2 wh y1 co iq yo au iy on ds fx yf qa zv qv f1 y8 wm u8 rc o3
\n iu r5 el dz rt m9 hb lc x2 zp aw uz
\n k0 px qe qr i2 yz qo ap t1 ou n4
\n qg q1 wr wt wu 5x ij rg lq eg ia r9 is dl aw g9 xx w2 qt au i7 us jc f2 ge qa gt l7 lb mc x3 3p tz u6 kx f8 fb ku ag hd oj o3 fn tw
\n ds rs k5 go qg ga qj gs by q3 xy q6 k5 4k o8 ws td mo w8 th ys eq pk yf r5 uo rb r9 td y8 tg ho qn gz li m0 oq kw qr g1 wy iv b7 vt qr qu ti to ta ut sa i0 pl oq sd ho qa gy qq l4 ks fu wg qg kj eh ez yu tf s7 os s9 ya em pq tc fv qg ve sx af ci ah qj bj df ry rl wm zy tv ol ey ox ri ie tq ir yc
\n ak ra yb ds gt fy qh d3 ql jk jl ni zs q5 zf lf so wo mu yt wa w8 kl ue e7 2d mb yn tu ac pv id pm sq sw jo dv jd jg qq qw qe wr j5 wu 1h b6 vr yf cx lz rn ho gh qi es ev ty p7 fx fs s5 pl sf lh sh i8 qa xs 1o kq zg qh wk fs vo wl ez iq uo tj u3 gs ii je jr hk ql xx 1j v8 nz kf vz ww yw yt w4 rb ol o4 rn ux ig sp gc
\n yv fr qa rd gm ps jd a8 qh ls vg q5 lg eh z0 vt mi vy rg lp ex ew d6 yg rv oe fs sz g6 sy ha cx qq wy j6 dk hr l1 qe gl ex ln uk sv ty at ru uc ts hi hl lg jv qi vc m0 fy xg qg eo hf mu mo kz ot np oy na el yy wz fh gp up ir e9 s9 f4 gf pw uh uj jr ab qh uc wl ce qz h8 v9 wv ie 37 eu gf yv 1m ma yw wm oh dk sr oc ei o8
\n qn cd zf y4 oi dv xq q0 lc av cw ki xd lx qi gn bh em uf we ja ox iw qb wn my zs y9 ux
\n qd qf we ls lf k4 eg bc e5 rl ea r4 oq er ip g2 yl ot iv ps gx qr wy xj vz xl bx 3o qr eu qi uj p7 uc ph in pk qt i4 gq wp v6 kw kd xk zw 11 yj wj rd oz th yo eb ya tl au tx qj wl dz wz cg zv qa rb wm 7a zs vj yw ee eo
\n jd go qg d2 ji qn wa bf t8 ys eq ui d6 ed yn r7 is qb q9 lp lz qe c0 wu tx wa te qp 64 uq in qt qy wp j0 lz l5 og ca sz un ec rh pb pw h2 kv aw wy qf 16 rw ew tb aj
\n a2 gr qs fu db qn q1 uc jr qk cn q6 b2 ne lg q7 q8 wi wp b1 ec rk yj pc fo iv sk gk jb qm zw m1 wx zt xy wy em 41 ee gh xg cn yv qp sn od ao pj fs ut s5 tb ad jc j9 xa uj ws kf wg vp nv fa wk mq x6 vh wv t4 ex iq 7r y6 sv ox ev eb rj rk em aj pq gh f8 th os sb mt ak q1 xr yw ti ee tb as ox o5 yo gx uc
\n qj lp z0 aj wp vr wa bb xt w9 ya on ew ym ia ix pt tw dz jo ae cc qe lc qr cn b3 c0 ib ml qi uj qp pf p8 e1 s3 tn ui sg pn i8 hb ij qw pd ld fo ap ty ro 3b r0 sz ie gp rj e9 fk gd pw rr uj cf qz zr rq 4p kp pr vj w5 iq ey rn ie eo ir pi
\n gr rs gy pw qd ga jj z3 kj ql nn bg dm zz uy pl e0 lh ef oe am y5 fd qx hi uw i4 q9 hs jb vd cx ni qw wx zt qr d0 wi 43 w3 cc b9 qa rw oa ev ry p4 en tk ti yq pd i5 og ic ye so tc de pj ff hl oe sj qs wf v9 xn gm wg xm 1f ph dr vg wk ns t6 um oa e8 sb t0 gs sm fx o1 de h1 uk qh zj zk ng ct gp nx xe 3z wm rz yk tn ro
\n qa pt k7 og kl wy rp hx wp wa ui mx eb 95 ac eg dj yz aq in ih i2 q0 cz cb dg xi cq jc qe qt es ed sb en iz fp ta fc tv tn gw ka i0 lz sd il qf 1s iz qf nc xj xk ep r7 rp gu t7 wc t0 en tl iy iu pw kn km ql ct qp ch fl wm n6 rw eo qm vx ty ee ru ig
\n um rf qd db qf od d1 mb u0 le xu wy q6 mt bc qw cm uu us r5 uf or tq ek sx i1 it la cb ax t0 wu ab 1t qq g6 ko g7 mk qr ey ha ea qp y0 en ue tv ho i6 i8 sp xs qf v9 jl kt rk qy ot 14 na ub aq op yo en tk ob on tx f0 qk jp vi iw tj x9 zi n6 wo wb se aa ag oc gx
\n iy ub gy pt pd qf me xp w7 rj tk r4 rx ui ii r7 us pb pt g5 fw gf dm wi w1 eu re tq oo es pd ri tl og s2 fx ap ok i4 di lh f2 1i vm cp bh wj wx of on tx dt h2 hl qk wq qz lr tl f3 ce kp yr yg ro yx
\n k6 qf cp la wp gv es pl uo eg am tf y7 i3 hd jk we d7 rl b8 gg ug es rt p5 eg em tz ow 3y eo wg t1 lc wk ol tj en ak fc f6 df gt ol qc rn tz wv rx di ov
\n pp qd iz qm vk jg r4 pl ym y7 sc qn jf qy rq p8 yr di qu hb wd rf ks gw qg s1 x7 ec ae iw eb ai sq v8 h8 le ea vh yw yp
\n ik a7 cp sq q1 lq ql wa qz lr zh rp ra gb w9 ys ui ym px up r9 pr ek qv qb hs bg wt ku pu dc p1 qo ik uk y9 y0 en hr tx ts pk jl ce lj l5 p6 v4 wk nu vg oy aq aw rg os az uj kc py ql oj qc pc fj jr bf cx es vn q4 y0 og w2 ue u8 is ag ie yc
\n rs dd ik k5 hm dg k7 go q1 qk wt q7 wi ws t6 k0 go ii ee io ym ey sl sz sw jg si d3 qq qw nh lp cc kw xt m3 ip ln nf zm qq tc ex ry at iz p7 ux of he og dq e1 i7 pj sp s4 ok qt gt sd xf ow qr pd hd wj qh x3 yb lx wx um e6 t8 s7 uo u2 it sw pm rr qg h3 aq ze h8 ks zb kb bh ec wb vb w2 oj af
\n ak ds dh jg cp ws q5 nq wy su q7 kb o7 ys sf et r9 ta sq y6 dn sy cx na j6 jc qi qw qe qr rb tn 3g eo uh tr ft ri uw of i5 ue ta fs s3 uy as ss qu ns lj wp zf wg sm x1 ix mc va mi rx ej yy y4 t7 ex u1 y6 u3 up en au ds ap kv qh kn gw k1 zv eu lu kh tx qk dr dh wm ti h5 o4 w6 yk af fq so aj
\n uv sa hb ps q4 as wi ej qm zc yd yn fp y3 td hy ue qw qy es tu uq tx e3 jz ud sv l6 fu xh dq wk wx yi dj qz v0 qd ga mp wm yy tn fn yx
\n qh qz ar qq ma kq rx qa st ei
\n dc df il he c1 jt qn yd yn pe et pn pi d7 ke g2 j6 rl sk ng z0 m8 mh qw j1 eu qu rr es ec uk ev ul pf e4 sg jv m9 qf vd wk gu rh e9 f3 on qv vj dh aa ru ux yx o8 a1
\n ra qs h0 qh bf q3 dv bl mr if ws df ev b2 pl om tz ax yk ta y7 aw dn zr ax qt m5 xx wp qy qi qo at ti p7 tv i0 fm qu sh so lk qp hb p5 xk ib vd hk t2 np ek yt um u1 ir sm yf ug az qj v5 wr fg zv af qv ck ay cs ww pq wn w1 yh as yk ei
\n tr yb df um qf iz k7 q3 we cb cj ne zg a2 e6 ya r3 ut on rq io ow qx ja qv cx cv bh vj qr lv pc 3a rm ep uk ed ev au p8 so fx p0 ts e4 fb hj qt dy px sf f1 zo vx qa wa sa qs vm wf xg kf fz r3 bu t1 tu ez t7 va e6 fl tz uf gg io qg qj h5 zz nh qz zt et ba lu tq vz xe bb md u7 oh 5k rv rt tb yu tn ah
\n gr da ty qj by we ls av kc qc wi wo xr mx cm yg oe xs pr ua pt dk oy hp qm qq zw vk xi ln he rx ko dz yt qe tv eu qi yz tt y9 ev ry ym ay uq pg oj aa s4 sg hp f1 qu wp qa bn vi os iz hw kt t2 rs wl r0 ez rf pv hs om dd f8 uj dj pt dk km k1 qz qx wc n3 nl wv qn zo vx ww dr yr oj r1 tq
\n ra jg jr ao c1 wh rj fp gz iy lo gc dh qw qr 8p eo ev fu tl i5 uy uu ui qp mb hk yt ou aq oi e9 ip dt k2 qx vb mf id
\n rd h0 qn ql la vg qz lw q8 ra sp ts pr av qc vs vg ku am z0 lo ry ev eh i8 aa pl dt du i4 zs w7 wj xl yg yh ra ex u4 pn lw gu pc on n9 n0 wm em tn
\n gb ik rd ql xi bd yr e3 qq w7 ex rz on ui yg ax fo pv ab ta jp qw xi wi qw qe fh mz eo gk qu uj ed ev en fo ux ye fv jv ws lx kr kf n5 qj ea s4 vh ez um tj ir od ga tk f5 dh uk pr pt in v9 js sv qb zn wb vl zj wm ca mu zs ef rl yw u8 er id uz ah
\n iy ij ub qs lo ql jk dv h0 wy cm q7 wu eh fq w8 hm w9 mv yd rz rx rv r9 eh pr ek dk el hi qc sy i4 qq lp jj we m2 g2 fo j5 wy m6 ve tx yg w3 rv rn rq qy hs tt y9 ry ym eh to e1 ur ff hk do wa kq jw p7 yp ky r2 wx oy uv ra yt t6 yy sz t7 wc s0 of ds om kx ng ql qz vj wt wb ly wm lw dh md ew w3 tv er as yu an gz si ro do
\n o9 k7 q2 dt 1i wa uu t8 ut mv ef uo g3 gj hp jn nt cm rj ms wi b6 im qu eo yc ex qp eg sf do sh i8 ih qa wa wf kd yo xj ql wf ek un wx t7 s8 rj f9 qh qk k1 lq h7 in nj um bu qv ov n7 bh bn 3z w1 yt et o4 gl
\n a2 gt rs ty rd rf qd qn jf qh k8 q1 qj ql d4 cg wt q5 z7 lr wf wu q7 sd yg yh g1 eg to el ih sw tf fg qx dv q0 wq qq uu px vl xi js jd ze la ud qy rr ky ft i5 em p8 p9 i8 hg im as jz tn qo ul wg 8d vs ap mq x6 no t3 ub wl tf iw rh ox ua pv ir us pb tx pw dg h1 uk ux cr sz ko wx jw vl rc tv af du ei
\n gm we cm jx lf vq vw kb wk e3 df r3 r4 ew yf ti id fe fr su xr sl jg rq rw uq tp ss qy ws od nv wg ro t7 ar th ak da yf sw io jt cq v9 kb iy u9
\n qd ga q1 h8 xt um wt nq wy wp a2 rg w7 hm cf tj ut r3 ch oe r8 pa qb jb zw mm wq pl m2 wr wy mh hi ei qy nw uf yv s1 fx ut sa tb ss hl qu qi zp nf zd ar l5 5h gm vo ix xk wk wf vf el r0 sx e6 uo rj f3 em dd uh qj cf wz n9 ga tc qk mu rt ye w4 o4 ad ag
\n qa jr kz c3 c6 vp e0 ng wu ug ty uk tu to hr sp ud m0 ar pa qf wf kr fi ya kk wl xs ed mp x6 ub gu fh rj e9 ya om wl vj ha ex y0 id
\n qm q2 oh cd q7 kk ld ys yd rv yk id wt qy iz ri fi i5 ic e1 ht 5z iq ha ai sq pn al gh un kt wq mi dr ax u8 u9 gk ru ov hc ep
\n iy sa un h9 rf fi he uc u6 cd q6 wu zl zz rk lf yd rx d7 ef er rb d9 r9 im hu zv ps qb jf qm m8 qq ji g2 kt qq ew la xy qo es ft ik tl ye ur as tb m9 i8 qa ka qs bm zg ix ya kl t1 wj r9 oi um aw yo ie ys yf hg gq nh zc sb nw qf xm bc xr bj es rx w3 yj iq tm di gx o7 pi aj sp
\n il qf pd k6 h0 na is q8 4p zl jl z5 hm ec io sf dk if gd qw 1a ld lf qr yx re tq y9 pd iz yw sa wp bn jq w6 v3 x2 br ta yi ha en o1 io ip pr kp nl lt kd eu kf kn n8 zs rx ux
\n ih db gm jd wr zj xp vp qb c8 pc g7 uf uz p7 sh or xh xm wh mt no fh dh wv tk li qm vb ms if
\n he ql wi bn c1 rc ip ia av or y8 mx yr dx ex gz 1p ic wf aj kn 51 bj wn o6
\n hb ty dv gu ps qj ls qz ch q8 zh xp bs vt rh oe ot pb y5 y6 fr ih sc q0 re zx lm id xp yy qr ry ay p6 he dq s4 ff qt sd vx jb qo qp gb ws wd sd co fp kg s1 nm rp cu 8l y2 tf ev sn au us fz hj qg wc u4 au qh wv bn eq r1
\n uw vr eq rx et rb fa ek id qx ui kr wn uf p4 tl au hw tx im sf yd dz bo wb xw
\n uv yb ik qd gm gp k8 qk ao z6 ps mw zf jc eg a1 wa 7c zz rh yi lf pl r7 yh d8 g2 r0 tq su cz pl qe qe wr wv ku ho qt yv uj ij es ec ik yn ym uw tl sm he p8 fa ho wo gy ws zf bw nb 5q ql t1 ro rp ej xg uv el l8 rd wz rg go rh sv fh ya it pn hd ao az tc dr ac dy ot sj nd qz ok um ol sx xb wb wi n8 ji rz yr sr h6 et o3 ru rm pi
\n rs fi ag c3 lw ys ef sg qu qi uq eh e4 gy qt ya ro hx oa f5 1j qa cl wq rl yh pu
\n ub rd qd fi jl zk oq r8 y1 tp sl i2 qn sd cq 6d mj w3 p6 ta fm bo nv qi wh yj e0 ao uh kn h6 r2
\n pa q1 fm c4 ig ex 2a yi mx ek ez dv jf qw qe 4s xt ld dh qq mg qr yc eh s4 hj yy s9 pv rr uj or qj cd wc ly x0 wv hh ye ew yh rb yk o5 tm
\n pp q3 mw rd up td j2 lv af ih hb ee xh yy ua ug aa tb
\n sa tr ds az qd fi dn hw qg dh qh nt z3 qz ad q7 q8 tf vu ue mx vp lg tz er yj to hy fr sw th qn hf gx jj pz wt lb cm m7 wi b7 vr lo yl qi ry ef sn uq ri fx oh i3 sd i4 ho vb wa qa ik uk ar hw l8 ya cw s4 wg r7 ot wk gu u1 fh th rk en sm u5 iy iu re pr hk qg kn gq cf h8 nj ct gp wb qg hj wm cy ok er tv u0 sy fq o6 gx eo sp ob
\n yv ak ra co wy zj e7 ew tl fo ek ez im q0 jm bj lc tc rm ec ou bn sd os x2 lh wj ot oi y6 e6 yp ob sq p0 js qh el bg rr rc xw pu
\n ih um k9 q4 ls jx ej om sf uh dz oi qx cl zm qw qr zc qe i2 i6 uu qp wp ws qd sd fj mx qk yn wj ub gu ar pn rr qg ln dl al vg mf w6
\n tr ub ds jd gp qk jk d4 kv xo ws gi yo sj sl el tw i3 ow qe zx nx b4 qq ee eu uf uh ex p2 rr ea ry ef eb y0 en ri eh e1 oh fx fv sj jn xf qd vi l7 wg x7 r9 uv ek yt ns aw sx sc vf tk ud ds o2 pr kv ab gt v6 un qz wr wv rb os ie u4 rm zm rw n8 vc za q3 zu yy o3 yi ag pu
\n ij a4 uj gg jy dt 1w rj a6 r3 ii pe r0 ej ta ts ff i2 ho ov wq kw qt ot m7 qq xv ei lv kr yx yb ri fa ur de pj hi si jq wg r4 x5 hj oy 5i u3 tx tc nd v6 oz wc qv bz qb qj zl dg ed ka vh w3 yt ey w6
\n o9 ft az ps hq uq a8 ql we wg z8 ye wk bf rs wa c6 dd ys rl wy om pe ix y3 g4 dz gf se tg pa va jn jj al qw sf ma j5 qy wu xo dc rn se eu xb nw qu qi p4 ef ru sm eh im ad gm jv pm zd g0 wg qf ai qz ym qc t8 op iw ox ay tc av k1 ko vj qb zo wq bg q2 n0 rl yt as rn uz
\n o0 jq qf he qh 7k q7 kb wp z5 tl ew yg et or ez jp g9 jv gk lx vk vw qa qo ou qg kk xz rx ro wc oa us ip x0 ku hp jr o3
\n pp dc gi fi qf ql by we la za un qz q4 jc zh wp kg o9 qm bm wt r6 rw eg ix yl tp am a0 aw i2 fh th xq gc eq xx yr qt xt nw qu ri uq tl ue pj p0 hk vx uf jm kq ws jl 1s p6 ca he x2 wk wd r5 wg bi hk ro wj t5 sx fh sv e9 ya aj e0 pn ao ug ac kn h4 gq nj wr cu qs kf vx xw k1 og yt u0 yk di gx dp
\n ij tt ss dv a6 uz gp qk cv lq ql un kz gj wt 4y fq lh z0 6h w8 vp r5 ee tu sg pv y4 pu a0 tg q9 gx qq qw we la wx rr ls zy qu wi xz wi xp ew qe rn ei qo uj rt fy ik tr tk sn pf i5 tc sp s4 in gv i0 f1 si ks nl kw bq co mu eh oi ec sc wc u3 ga fk sm om kb wx bo zj hv y0 en og q6 er tv tb rb u0 w6 tm
\n rd ik og q3 q5 cn xp c4 ig mi e0 rc ym id tq ou po gg qb sy ob hf pk xr qr up j6 ng xx b0 qt tm eo vw ux yw pg tc e4 gy p4 xv pd wg n5 r3 wk iv rl ht oy uv ub wc ar t9 ga s0 em pw x0 pt bw wm vv vm yg fn ad af do
\n hb gg kl q5 t1 mi a4 b9 r4 ee up pr g8 gl q0 cc kr c9 vq yy wa qy mz ty yn yq ai og tx tn nd ws 1d ky x3 sz td gu t8 op gs tz de av sk cy zm be wv qk og uc
\n ph qj d2 cd q3 q4 bi wp vt oq y1 ps cw kn gz ij p4 sp e4 wa sx nj v1 w7 me s7 e9 tc k1 lw zc vj wb kb tw a1 aj
\n rd a5 hq qg qh q1 la cd kl mp k8 mj vy zz yu ut uo sg yz hi gk sy q0 m1 qw b5 wi dz qw rc aw eu zr uk ti em yr lk kw nz wg fx tt wg x7 rr lm ju th tt eq oc o6 yx ro o8 a1
\n a2 dm wy ej rh rg pe a9 oi y7 zt vk ga yf pp fh ml tj p5 sn tk im jv v6 lb pf zq ty wh t5 go sv e8 it f7 ac p9 cu bw kg qk f7 w2 ee w5 tq ep
\n vs rg rj 70 ys nq uf ex hh jn kg ep e0
\n df q3 u9 4j bn rj hw up td i1 dc hi zb wv g3 l1 rz qt qy ty tj ef eg sm tx ap in px af mx r3 r6 t2 t4 rd uo e0 iy ii hh qg zc v8 qc ch px zy zi ye og er tm if
\n gr pq se pp qe lq n1 cy qb wb ey
\n gn q1 ji sc nh pe yh qt ss rw hf kx zm o6 gx
\n ih hq ap bl wi wa 3y er pc eh r0 yl ta ts dl gg fg i3 hp kw lc ls rl ff wg qi uh uk yq yw ok as wp gc jm qf to yy fh e9 pn qk sz nm li q1 vb yq 2e rl tv sy di sp
\n o0 yb rf k6 qm fv q5 wi rp fe dd mz rz ee oe ia yz am ig hp fj su gl li we m2 ls xj md z0 uh yx eb eh ur i8 qy oe i5 jv ce jb jm lx ci kg oy hx e5 u2 tj kv qh qo af pv tz az vg yz ri ge
\n il z2 d2 cs ba wa 1o ys uy ed er d8 sh qx sr qb jd ov xq nu lv g1 ku pu rp qq ee et rm eu xh i5 p8 tx in i3 bn v3 ap r5 oy sl oo dr dg hj lm sk ff vu cu wb kd zi wv mu w3 yt ok rv ol ey yx ah
\n hv qs qf qm ph o2 zg wa a2 rl pz ow uo y2 ey ix us d0 ek fg ww j4 wv a7 wu qq bv te tr uj tp ue ts do qo cy uk j5 ra ou aq iq ev tj u5 tx gg df dg h4 qj nc wz v7 im kv jt y8 rz w2 yy ei ge
\n gt df jg og k0 lq kz zf iw kh gv cn ur ea eq yj ix id ez dc qc st wq c0 lm fa lj qq mj sw qe xb rq qo yn ru at e1 p9 s2 ts i8 s5 i3 sf jc xa hb qq gy wd p5 ow qf wj 1n wd qz yt ex e5 op at sm ud tz yf tc ax f7 dg qg ve sl qv tl wn 2h ky yv f8 rt o8
\n yv dh kj jl wr bi kc 4j nt c5 lo z3 e8 on or g3 g8 uq sr qm hd ll c0 nh ws qu ug ph tc dy oq qo nj wn t7 ox zj qj km h6 ql zz qz qc hn bk fw ob
\n un qs fu k7 co je gp o1 bg dt vu rj mc rz rx r6 g3 ek qc uw fj li qw lx ku z9 br b6 mz yc ty tl yw yr e4 i3 ud ce i9 rf bq xh ep wf ej rp vu wl u1 os ay of pm ap gg dt hk ab ql lq qs lt nl u3 wq n7 bm ef xm yh w6 ru fq tw
\n qn ql zj rg ed hs we qw re p7 p8 yr tv pc lh gx i8 wp lz cp fv lq 1b di
\n rs q0 is q0 yq rl qq vr qa v3 tu in h7 zy u9 o7
\n ak pw af z0 wa jh tf to ey r0 ta fe tf th wt m5 xu wu xo l1 b7 bv se w4 qi es dy ge vm l8 nv kh l0 j9 bi ez iw ux f0 gt zc 6w bf cx u8 w6 yo o6 fw
\n gb py gp pg ql um lf bx 6q ra w7 vu rj ui pj tl ii y1 r8 ac eg yl sj tp y3 py im tg zv ll ip wt av fq qw dc yu ei uf qu tw y9 em tl dq fx hk oq zu jx hb i9 bq iz mv wd qi pz lx ek aw fg ag u4 ir tk of pb az f7 qk wz le qs wy wn dd bj mp yq zb cj rc tm yo
\n o0 pa rd qf dn qg nm ji q6 cm wp ec uu ax r8 us aq se lt g1 bl vz jc mj 6h ul en yw e1 ye tc i0 do ge gn v2 r1 7f ed x8 rf oo t8 s9 u5 pb fc ug ab pt uc ce qz h8 u3 ga fl bc yq iq iw id o6 r2 te
\n da qf qn nm wr jw c2 ig rj vi ys pl ed ii ax y1 ua r9 ia ab tq an ek dl sl jo g8 qv gl hs jf d5 c7 kt ix wy wi lm qq dz ko qe ff pp qt eu wd yx ec p3 rt ty ik p6 pg i6 e3 sa fd dr gb jl as hz qo j9 j0 qs qd wf p7 br wd x5 hh t2 wl el rf tg ar rj tk em ud pn rw av iq nl qv x8 ov wq wb ec vm y0 w2 ew 5k w6 fw a1
\n a4 a5 z1 hr qk q3 mq zf qc wu q8 bd a3 xf es yn rc et d8 pr or yl fa se dv dn uy vz wu en mb qa tj uq i6 pl du jx f1 xs qs qd vm kd wh kh mv ed rp ra co tg oa u2 ie ha ir da pr cq uv oy qb qb eo ye rz rc ei o6 id if do yc
\n a3 qs w0 po gx xo nh uj e1 lz os wj uv ud tx rr gq ve ch cs xz
\n rs rd qs qd hw dh q1 ql dy mj zz ws vy pj ea mv yf om uf sg up ix pb ab ej tq fr tg i3 nq dn gc wx wc qr wr lv t0 cm wy wu ko qe qr ze ug oa ed ym p5 os uq i5 tp pg s1 ok tb fb vx ns p1 wa qs ka qd lx ps sf zy kj pl ro rs rd wz tf ev gp e8 u3 ir od f5 dh qj cw qk h6 ql ad qb fj tl al zs h2 rl eq rx er w5 eu yz id pu hc te
\n pa pw q0 j4 4f vq iv yu ry fa e4 kw xm wj t3 ff ye w1 oj rv ul ep
\n mw di ec wt rx ko i4 qo l3 iq iw py yl
\n dx uv yv qa a3 ij ps qh cg qc 2n mc ut eq rz ea kc pz sj dl in db fk su qm qe m2 we 1q ke bn xy wb yq qq vr qw xc gf tn re oo qo p8 iv e2 tc e3 e4 fv ff pl sd qy qi si pm ws aa zw bu hu fn yj pv hs gh o2 dy ln v5 qv zb tk bq 4p qj zz wv 8n h1 y9 wm yw og mf u7 tq ov sp
\n iy wt gx nd t5 r0 yl tg dc qc th wt ld nd zn tc ke qu qo qp tk fa hh gn qp 5d qd ar rg oz fh at ag 2a kd nx w2 w5
\n il wp ee ym tu ef dl el qb cl qn ob qe qr m6 mf xq i2 ud fn pc gw m9 l7 fs eh gi e7 fk ys pe ok 30 mf o8 uc
\n db k5 a7 je kj q3 we wt q6 ie ck kg kn gr lf pj io us id in aq jo qx su hd qm li qq jm we lc wc ld d8 lb xj em rn j4 tq oo eg fl sm he ue so hg ok dt qt px j7 qi nz l9 lj x8 wj na wk ez ex rg e7 u5 h4 kn ad cu oa qd do q4 eq w3 yy sy su r1 ri if yc uc
\n fr ij ft db dn gp qh ph ga gg kl ws wt ab q8 lk wp mi w9 tj pk yf ew us yz id gd y7 dc qx hp qv gl jn q0 d4 qw re qe cn cq pt wu lm dk cz yy qr rq te qi rr ea ex ye dq ok qt sg qa nk wh kt bt bd lh wf yb hu mw el rd rg sb em on qg wq oj zx wv n4 bo qf bf re wb ev yq cl yu w5 ad ag id a1 gc
\n qd ga dt ej kn r5 ax sg ix av am pu g5 ez fe qm nu ii zt wr vm qt jd im rb ml yx yv ru hr gv ad dw x4 ub hx wz rd ol oo rg yf f6 ii uk dl wl yw yu ie id
\n yv ga z3 kk pp nq le lr bp qc t1 c2 hc vt lw jl w9 ur r3 ys iu es rc ud yj r9 pb ot ta sk gj jm gx xg sf lc qr ht ve sl g7 qy nw vw fo ta tb oq fn qu hc qp op nz ow wk zo zq yg wg ke yt kr yy sb tk rk tz iu o1 rr pr qh dl dz p0 n2 ci cd vn ms aa yh ry w6 af du gc
\n q1 q2 mw bz k6 xf ec r4 rx yg ed d8 pv is sj qc q9 zw vf gc cv d0 xx rn ex uj ij ts ff ge i8 zg 4m x5 vh oy yy y5 wc tg at fk ob fv f9 ql bo k4 zn be ww ea ry tv w4 ru ov
\n tu g2 tq od pk tm fi y1 uf ku wn ew
\n hn rd um qg qh ph aa zn bc gv w8 rj ea es ui r7 r8 ey fw gh jf qn nu cc la 3u bx ve po et ei eo ea qp ul i7 uu fb dt pz qt m0 zf l8 kl t1 ej wj oy rf th rk gs sm em ap hg o2 ub wx ka hd br q2 hg y8 2e eq tb an oc it ep
\n tt um gi qh lp kl lw q4 q6 ro b1 if y6 qc th g0 q9 qm j2 we xt xi nd is ng bc ku yw sm ye dw e2 f1 lg qo wp zs gy l4 ul lv w9 br xl ql vf as yg y3 t5 wc ec iy hf ii f6 re hk qz jp oo qx xv v0 f2 vx cd vb yq ew zu yx
\n lo dt mt z3 rg av us pa xq wq qe qt yx y9 ev ry tk hu oe oi r1 x7 wk wz td jy ww qc 15 ba hd mo 72
\n rs wi vt rh we jl ur tz tw ht y8 fh i3 qb qm b3 qy ep op yn tu ay hw fd ug qp qd x7 rs yy td u1 t9 sm uh dz ql 4t rw kn wn rc eo
\n a4 d3 kk q5 q6 wf wg m4 2b vt w7 ur uo rw pc g2 sl if y8 fj va rr ld wn xo qw rn ml la qu ep re rr qo pd eg og e1 i8 ui qt px i4 jc gq oe jj ws ks qd ul zh sm ql tf go e8 os tk rk ay us u6 dw dh pr qh qj oy lm jo cf ff wm br k1 en og aa rb yj o4 te
\n gb dd rd qd a6 qj wt jx z7 xi q7 kv mv uy pk or yk ek el y5 ez aq dx qx th sy jn pe vl fp xz m8 ng jh kq qr la ei ft p9 oh hj tn ho ud xs wa jq vn il zg p7 fp ic qk wd bu e7 go hd sq ug hh ip sl ch qc px wb hs rq qh bk rl ef yw dj yt u9 st yi uz uc a1
\n al tt pq um uq un z6 wa vu w8 ed sj r0 tq pu dz qx js 2j na ip vz lm qq qe yj ud ei wf qi te p1 tu il tk iv dy i0 xg sf ix gp fk ai qk lm ql ch fg qv vg yw rl yy rv rm pu uc
\n rd ik qd jd ph gs k0 q3 qz ia q6 af q7 6w o8 tg gi sj ou aq po ja qn q0 qq gc nh xt wr fs m8 qw aq wp ho qu rm uh tr qp tj ay fi ao i8 aa i9 tv gv qu wo lj vv wa jm qa qd uk nl g0 qd dn gw ic kj nn wf wg dy ej rp y3 rs yy e6 wc oz fh eb e0 of hd yf uf ab gw qc ww yc cd ji y7 n0 wn cg u0 rn yp ie a1 dp o8
\n ih h9 qd xy yl ez g9 lt qm on vg rz dx qa kt p1 ex yx od uz e3 in sd oi qa cq dr me e8 ua ya dk wx bw 4p vx tn o7 pu
\n ij rf fu jt gs ld wu q7 wo 2u bd k8 rg ws gi oe yl if sq hi dn jn qw bb cb wt lm sq vr qe wd qi qo tk uc hp i0 pa w6 fo v4 1m x3 t7 u5 sq ai hg ap hm ju fl tz wv w4 ry tn yl fm gl ox
\n ql db ch wu rl ih qc 25 pb qq ty yw fp ao qy sp p1 qa rf iu rw qg uk gq km dz wv at qb jo eq ur rb ad
\n ra fy gm iz qk qz lf bp kc is nr ws mb es tl d6 up pe ey sj fq ek iv pn sx ly qb jd jn q0 lp m2 xl fs xx rs b8 hu rn ey qo yn vr yq ti ai e1 so ts jz du do oi sa i0 oq ow 3r 1a wg qg r4 yv ty r8 wk wl s7 u4 of e0 gd pm yf h1 qg n1 lt sv qb eu wi mt ez w2 yk su dp
\n rd qf dz aa bo pd pm qw rn tf ah wu
\n dv jd he jt ao ql zg eq pz oe dj fw sc fj nt io fi lr xu ns mx qe qt ei rq qy yw em e1 i8 sp in jv wo ci fa x6 fn ej wv oi um yi oc au yf mt te az yv yi ad yo yz
\n uv hv ty qd nv jh qj q1 ql we fv vj q7 gw fe wl vu w8 vp 6k yd r7 yj ia ey ot g6 dx tf im hu ae qx fh g9 lt fk hs su ov lo m1 cn t9 wi ki qq qw xx aw nw es yb vw yn ry pd ix em so ut oj du f2 lj qp jm w5 xh ny wg 13 wc qc ek el sx oo th uo yo at pv hn uh hj qj zk ql lw zx qv cc wv en yr w3 yy st uy iq ox an ah
\n rs pa df il iz qm k9 bu jz bi ji du lo ts yy xf cv gu mc ii rc up d8 ey pv av to yz y5 fd qx sc qv jv qr xt bk m6 ot md qq qt rq ex p2 yn sb xc fp pj qt ce wp i8 j0 wa qd gn ps qf be qt wh ky l9 za wg rp wn fh rh em vn dh vw ng wz k2 h8 f1 nz zh yz q5 zy e1 yh ad tw ep te
\n db wr a1 pk ew uu r7 d0 pn fe g8 la qq b0 ef os oh pk fm wa wk yi ev ua sq wu n5 tw xc er
\n fr yb qa hm d2 q2 o1 oj ox dm oc km kj r5 px rb et r9 y7 vs pj q0 4s lm gs qe eu ep ti tl of jq od pf nr nb ea ej yy rh u2 iu qh dk zl qv xe 2k vc vb zb xn yg gk ox tw
\n hb dc rd fu gu zd js wu xa c5 a5 w9 vo r5 d0 g3 oy ib i3 ha jm nu rr wy m8 b9 ws qu ru eg uz hw eh hg sp sf do jb zp dp nz wj wv fg ae ah ob hd dd gh dz qz xc s5 vi je n4 re bh ma wm as st tn yl
\n wi w7 om ug qa x4 yn r7 gi re n3 n7
\n ij pa qd qg uw qm d4 z4 q8 t3 mu yp uf ia pm ez ih qn jm nu kw qr j6 qw bb yu qs rw wf re qi ru jz tm gw lh ce xa wa xs bn nl ys t1 wk r9 gu oc u4 hd ku hp yg rv as yz ro dp
\n qa ss dc gu qk cs cv fv nm z7 lf z8 xs a3 rk r5 ys eg y1 po qv cl jf xq vh we wr qr b3 bv yr wf re qo sv tj ti eg dq ic fx jz du qp vb ws nl wd wj zq vg t2 zf wb tf yu ex yi yp tj en ua ud dr pe qk dl lq qz h7 ol v8 cy vi wv ck el gd q2 yw w1 ye xq w5 gk o6 ob
\n k5 hr jk ju k3 jq q6 zg wi id bb wa gb rf rq g1 et ot pn ht dc ww c7 we c8 cm xo mg w1 1l yx tr p2 oo lm ry ao e2 i9 e4 hi tn di lv cp ca 2u t2 no ub ex rk ys pw qg av py ql qo lb pn eo wb er tb yk ie id r2 tw o8
\n ra qa qd ph jh d2 dx d4 2z jl q5 ld cm wu wi 2t wa dd lg ui to id in uq ww rr g2 wu rl qe 1l qi qo ec yn ed uw p8 ut sj ig p4 zh xm p8 vs vg x7 ot cu l6 sx gu yp t0 gs az pe nf wl qz nj lr cy wr qv tj s7 u2 ly be br ym w2 af ri it ob
\n pp uv nb bu kz wi ah z3 c6 rg la vi oe ia ot pm dv gk 2h xq kq xg bv qr b9 j2 ec od ay p8 qi wp zd ay kg ea mq 6b qc un fh tl u5 av ub ji zx k2 wc zy 1x kc ah rw vc wv yq zv e2 rp
\n a2 iy sa ft pp un qn qz ol lf mg wo fr vu ya rk w0 pj pl el dz i3 jd su ob c8 pb id b9 ep yn ru tk s3 sh sj xa l3 wa nj ke kr ic xl bd ej rg yo f3 al f5 sw re uh h2 av cq bo vk kf bd mu wq wm ew ue tv ol tb o3 ul ov
\n iy a5 gu q2 se ls dt zf o4 dm ez jj uu ik ue w0 ya ea on ui tu rv y1 et r9 tq y5 ht dc fg i2 vs q0 av iv ku in il en ri p7 uc e2 ut sp fv qt gn f2 wo qa op v7 ws l6 wh ys zq t2 wc y3 sx yi t9 t0 ys of rq ug o2 av kn h5 ju ji ko v0 nz wn kf te dw u8 yt fn r1 ie yc it
\n qj lw ji eq oe g7 jf jc yr qo v7 p7 wd ma xg wz qb u7 w3
\n un ol eh g5 px b8 rr og gn mx yf wv sl on jo uz
\n qg qm q5 wy eg ri bm mz d5 rx pt ek fs pi td ez ho gh q0 ll pl kq wr d0 l1 qq ko er qt wf ei p2 ru uq ye tx s4 hc zd vn ps ix zo wk t4 y3 xh ez rf u3 up ys ou xx zv qa wb at rm eu qj wv za zs eq zy rv ry tn tq yc ob
\n ss qa pp rd jf a7 lp h8 um kc q7 wl rg r3 w0 wy tl a0 ih ly qm qq m1 xg qr up ja b8 yh dc lx rw ep ea ev ay ux to p7 tp tb i3 qu gq do gr za l8 rj og oy ub e5 ae tg t0 sq tx hj ad gs it vc bh yb eb w2 yg u9 w5 fn iw si di ah hc
\n ub o0 ik ps qd q1 ga lp cf kl m3 z7 q8 ue ee ud ix g5 ib gd aq fh th pa qc pg ue ur xw ww qr vj m5 jd ng in tx ff xc er qs eo qi p2 ky pd eg e1 yr ut ib oj tb hi hk ho gm qu qi hc ou gn wg sn wh ix wj wj wg ot ra wl un e5 rg s7 t0 oc sm tz hf fx pw x0 wz th qv 1z zt n4 qb cl wn xq xr y8 rt y9 rz tb iw vb
\n qn lq bu eg iw wi 2u 3q t6 k0 yd sd ed rb eh ek yz if pu y6 in fr qc qm ob il ma b4 en wu dk nh b8 qe bb mj ws qt ho yl ug qi ea tw uh p5 eg tl yr i9 pl lh ce i7 wp qa xs dn 7p kr p7 eo vs mb pk ni yh ef rp wj ej y2 iq y6 u5 em e0 ii md jy lw nj fh bq pc xm km q2 wv k1 rx u7 ut et gj tb iw gx fw yx
\n qa pw k6 qn qg qh as u0 dy q7 wp hv 4z 4c w0 d7 et aw wr bl mx md j6 an wi qt lc yx ec tj ri fx ht in gm ua qo qa ik ys eq n7 wh rs wz wx e7 eb ak gg ip sj py ka rl su ag
\n gb uz q2 qz wr q4 z7 ia ad je am my vi zc mx ym r7 yk ua pt g6 hy y7 sx ih qx pa hp jd sy gk nh no qq yg rc 3s qy ep p2 yb oa tr eb p5 en ic yr dw tc in hk qt zp i8 lz ks ci lg x3 wd xa x5 zf yt y5 op u1 oa iw fh oc rk ay pb pw uh qg zj qh h5 nf cd nv qx kp qs qb 6q cl kh xe u7 ew tv sr as rt o4 ey tn is
\n hv qj bo ru z9 t3 lj q9 rg vi rj sd r8 g2 sj yl aq fe po pa qv jf dm qq re we la wt wu qq vt gj ei yz rr xk uk pf so pk im zu ua sg j8 sk zd sd xz zw kl wk ol um yi rh sv u5 pb tz dl oi wz h7 s8 qf wn cx f4 mo wb ed oh ee er ry eu ei oc fw
\n hn a7 cv q6 cj q8 fs jv rl qq qi od dt l5 co qr zq ex u2 ah on pr wx kp wb yh gx
\n gv qg je zg jc q8 fr r6 yn ii g1 pe sj ta el jo sr jv ni jj zr bj ns qr qi ur hz vu wh cs ep s3 hu ez rh u2 t0 dw uj oi wx n5 18 bf wb yq oh ov
\n gb dc um jr mn we bl t6 vi pj c4 d7 rb ia yz tf qn dm ke xb ft au ix tv xd qq xg rx x6 vg r8 wz op h3 qj qx lr xv qc kc wp lq ea rn rm ri eo yx
\n ra gt qs dv ik gn co qg qm qj cb qz z6 wt ji q6 dy qc b4 ws ds vu cf on yg d8 eh py hu tg qc hp qn d3 wq c9 pv pr or qq ml eo ug tw es il os fi uw to em ic oj ho px wo qp m0 qa 1o ks 7o cp wh wk wg x5 ee yn bi ef wj ns r0 ez um u1 iw eb ir fk ov s0 fl hn h1 pr x0 ux cd wz aq jp im k4 qv bp wm n0 vm u7 w4 gj tm uz te a1
\n gv il ps db he nb wr ql kc zh tf mp lw ab us pn a0 tg pa th ps hf wc 1a yw l1 fs eq wp qr rw yv tr eh so i0 qf wf l7 wg na ou ah ay f4 io ip f8 cd h7 nj rn wb qb qn wp oj w3 w6 di id pu eo
\n hm fu pd qk bi wf q6 wu b2 q7 q8 oc lj c3 o7 6s a1 jh rg rj 2s z7 ya id ez fr gh vl cl zq hd jh xh ru c0 bz wu dl qw km kp b9 rn eu yc yc p4 ru tk ux fo ue p9 iv tv s5 do l4 cu rg w6 os fi 4b uz l7 ld l8 fx jb ee wx rp ek tg e8 uf de qh hz h4 qj gq nb wx qc sv go wm zi zo tc 3k ez ec rz ye oh ck w2 sy ia gk rm ei si dp
\n gi go z5 qz wj mg kl yh g5 y6 g9 xt p6 eh ap sa qu dw j8 ql yg aw t7 ir zj v5 v7 ba tw yq cz gc
\n ps qn z3 sw gs q4 ie gx ye wz r3 us ef d9 pb y6 tg y8 qb gc ww az c8 cb lv wy a9 qq qw l2 c8 qu uf yx qo ic de ut e4 uu tb fn oe dp wa uj bq sg mx lv v3 ya xk wd by n7 ra cp gu va yo u2 sv rk ir ya hf kc kp bo qb gp qb yc ku q3 dj o3 ey ad si o7 tw ge uc
\n dx yv ij pw a8 qm ph k9 dz q1 q3 cn wo wp my el bb uo on eh id yz am fe hy sw ha m8 vg wt vl wm qq w3 ls gj yx eo ef en ta e3 i3 zu hl m0 wd co zy l9 nn ea yj e5 rg gi fg gp u4 ir tl tz pm dd kc p9 zx sx qc qv kf ln on qm lw vn vm ew yg se as is di ro gc
\n al tt gu qf qj xo q8 c4 ws e5 ur vp ea rz g3 fw sx th db kq wt sv tb ad hv 1u gt ss xk wj qj pk rp e7 ha fk f6 dr rr hk dk nf qo lr ka ie fk cz yz q3 ym ks gl
\n gb q1 qk we q3 q4 t1 ox di ny wa ws gi ea rx yg r6 io ow y1 d8 ey ab g3 is ek pu ez dx qx th i3 jv fk io xh wt oe kr nd md pb vz wi ro se b9 tr yb p4 i5 p7 ux fp p8 sp in ok hj qy hz wa qq zd qd 3t wj aa dy 5u el yi uo go t0 u5 tl dq gd rw uf gg kb ux dj qj go wb zm lu tx vc es ev ry zt w2 tp w5 tn o6
\n ra ps hm qf qg 4q we ql q4 z6 d1 wp vt xs tg 2s e7 r3 ys oq ef c4 av dj pn aw sr th hf gx uy wr ac zv m6 wn ko c5 qt qy yl yc uh od ri uq fz dq i7 tx fc aa uu qt oe i5 ge ce wa gb vn xg wh og ya xk fs ea yf zw wh ub 8x th iw rj ah ya e9 tl yd tx yf ii fc kx hl zj or qj mh ww kf zm lb ob qn wp ww wn ym u7 rv ie pu
\n uj by wo ml dl qx m3 8i 2y r1 u4 hj h6 qa xv rn rm
\n qa gm qh ql u9 ls e3 yk fa ts wr vj ac en id ud ke ye i7 fn tn f1 ks at me l8 kl hk lx rp ek l6 ek oi e6 wc u4 2h pn 9y zq qk ec cf yq oj vz tb o4 iw ox gx te
\n gt ub hn qd qf hq dh q1 lp qk by ql lq we wr sy wy lh z0 ge k9 w8 th vp pz yg ti fo tp r0 g4 yz ig sx im i1 jv qn q0 wq nu 5t pw wm id qe gg qt xg wh en uc tz pj e4 tv i0 ff qt gm dp jb qp cr gr qa nk ws os wf ne wd mm wf t1 vu wz t8 e7 t0 od hs dq df av km v5 kl we fg cx al ax yq y9 rx yh ul hc
\n we cf q4 cj bf ws ww yd tk ef ek y8 qv fk wt ko qe ep rt ik ut op mr j0 ej t3 s8 ir pt qk km ww cg wc gi lu n6 yr rc oc
\n ak ft gy rd hn a6 uq q2 q4 lr ia eg d1 eb on sj dj pn pp qv i4 hs gx ww xj m8 ko im rt fi tc uy tb pl qy pc uf kf kt mv l0 qj x7 oi um tf ap uk wl ql zb vj wv tk re y7 de q4 rc ad rm ul is fq yx r2
\n dx gb he dl k9 z3 qk lf ad ch js o5 vq zl rj wr th r4 tu uo r8 fp ic g4 fs g6 im fr wq bg no wt dg ru ln rp wi yd qq xz ew i1 kq qt rq wg sb pj hk qu vx oi jm pa vi x5 wf ni ro ot oy di un y6 yo rh sb us tz ac f8 av ve h5 ji mj n2 ci rm yx ep rt 5f yq u9 rb hx aj
\n ih a3 pi mt do w7 zc nh qe wv rs xc qr ts ut pj im hp xa lv x3 ph tt sc od rr qh km oc rq xl vv jp ef st tw
\n ft ik a7 lp jz jx k4 hz wo bv q9 6t ur rl qx qv js dm fw wd re ea fd hu jc qu zo p3 lx pf wk vf fj 2o wx sb gs it ol yp di eo ro
\n tr ih yv h9 k5 qj qk nb wy gk q8 c6 mj a5 yi ur rl uy eq up yj r8 xs sh a0 ez oi y8 ly lp lx fu il ke zy cj sk xq 7u ey p2 uh yv qp rt y9 eg pf so ph tx tc uy e4 tn j0 gy ik vm ul mt nm mm x6 wj rp eh sb u4 ov of tc jt pt qj jy k1 s5 qs 7z do zi cx wq wb ma wm uw sr w5 o3 o5 su r1 yx fe
\n h9 gm cp z2 fb zd gk ve o7 mt bc wp bd rg w8 rk kx mv uu rb or yk eh r0 y5 ht pu tf se ar fj hp su m2 cb c9 c0 b3 ns qq qw rv gg ij y9 oa od of pg i7 hk do pm 2n sj wa vm zw vg yy om qx nj v9 f3 ee w5 w6 iw sp ep
\n da jq iz z1 ls z5 cg nt zk 1i gt w7 yi r3 xi yn pc fa ta ez in i2 qc uw si qw d5 kw il b4 fa ib 6c ud rq yl wg tr qp p4 ry sm ut s5 hi qi do j7 jn j0 qs iz p7 wg aq ex go ax ku nc h8 n3 v0 oc ah wm li zp rl h5 is eu o6
\n gr sa a4 ik dm gp q1 wy m5 fw a2 t6 rj mx e9 et ej q0 ot wu em fa mj qe yg gh j2 te tj p5 eg tk em ao i7 di lh ce m9 wa wf ys eq l1 t3 ej el tf t9 rk u4 ay rw gg dr hj ac qg zk h6 dz ok zy kc mr fz iu hk yj oz ey ag id r2 ov
\n ds fu ps vo qf qn pf a8 ph q4 cb le cn h0 jg gy b9 rk r3 pj yd oe ht pu ig ez hu q0 qm nu ww qw ow xh b4 is a0 qq hu bb cn xh oo qo y0 fz ue e2 yr fa pj in sa hj ui sp nj zg wj ge wk xg ra ex vs oz eb pv tl iu x0 ln cq xx iq s7 wv qs zn tl wn lr yr r2
\n az qf fi qn u5 we jq zh wh c4 sd is y5 po oq ki sz qe rm qy yv p4 ye tb ho 1u gq r3 pl td ov u4 hg ax zj wz wb vl vv se 5k eo
\n tt gi pd jk lq q6 wi gx mj ut ax av g7 qv zm lo 5y dh cw xo ve vy xg yv iv i0 qq xg jv l2 yi sc ga pv pn iu ug gy k3 cl oj tv yl di
\n qk se pd gt rz uu d6 io d7 tq gf em ym tu ib oe v3 si wa nm wf qf wg rk kz yd hl wx cy bp mx et
\n uv yb dd k9 ph q4 me o6 nr xa mu ld r5 rb g2 or fe pa fj hp db lu qn nr j2 bk kt xl rn wf qp tl uc dq i7 tx fv ar sf xm mx x1 zw yh lz mr t6 l9 s8 ah u6 x7 yw ol tq eo gc pi
\n ia ys jh wy sb i0 cp u3 ql kl kh
\n po al qg qm d2 gs q2 ap qz q4 q7 kv ah o8 rs 2o ex zx qw z5 r4 r8 y1 is ts y8 qc dm ll we wr sg lb jx wu jv qe ee qt qy es ed ym hw to tx hr s2 oh dt dy af di do qo oo qa w5 uz wh kh x1 t1 hk no r7 rp na sl ej op ev tj eb sb ya pb u5 tx ds o1 hz v6 lw jp k4 wv do wn aj bc wn yw to w4 as ey yk is ig rp o8
\n rs ty q1 wt wy xp e3 wa yd d7 ht ts sz fk su kw xg bw cw qw oo fu od ix sd zu jn qd ci fi xh mo cp ev th ua e0 em kc lm cu u3 n8 xr yq ti yj fm yx tw
\n dx z2 ga kb yo sf sk fd gf i2 vs qw vj vw qe eu mz tu sp xs qs es t0 eb ak uh hl n1 v9 wv kd o8 rp
\n ds qn qj qz cb kz bo wi o6 z9 fq wq ml cv cb lf eq r6 r8 ic y4 am sz sx po jp g8 ze we wu en ew qw 3q lz kw tt ty ti e1 fx ut tc uu s6 ow gm sh qp vn l4 uj op xn wh qk wz rc wh uv um ar e7 uf az uh py h5 lq vt nz lu li lm dd rl er rt yu o4 eu yz if
\n iz ld me z1 y2 dj ar qb b4 l1 mz ij ry to ad xs sd wf eo hj wl ex ie u5 pr zj gt oi wc kg my ex zt ks yg eu aj
\n gr iy ft pq um qj dz wt gj cn ru kx q8 ws ue rk eb ee fo jb jf la ji ke qq qi qw rq yb qp il eb y0 iv ff zp l3 xm fi x4 r5 xd r0 ol wc t8 ae iw ox fk of pb qj ku xc ct wc ie xn zi wm rz w2 tb u0 su pi
\n a3 ss je un zf vq zg wo mx d7 pm gd vn eu yk tq ik fu ai qt qf j0 to yy at ii qk wz lw n7 ly
\n ub dz rv qt rm wg ea pc j9 qa mr h8
\n a4 wp td ur px qq ki yx go wc tm an
\n po gn rf a5 uw qn q1 nn we is z8 wj ca t5 ij eb tz ef pr ix g3 ek ta a0 y6 sq pa wq cx kq qw we rt c0 mz pv py wi cw mj qt qu 0e oa tj ux pf to hr ao tx yr ts fd fv s5 ui qu j7 gw ug ss cy ks qf xm fk wg vp kt mv qj mn lk vh yt ol rf th os e8 tj ua rk on pq dg kv km wq kp ad bp os bw pb qh wp zs q6 u0 gj yu o5 if a1
\n a6 he pu vd cd q4 jz z6 qc jc bz eh wi b1 ed ym eg fo us ib td y8 gj zm pk lc qr wu mh qw sw fi ue j0 xm kz y2 ev aj df h3 qk qc tx rr rl rc ut ad so ro tw
\n a2 pa we kk eg q7 lh zj wp 4z gi yd yg rc io ix r9 jb xe rr iz jd ij tz p9 qi l4 pa g9 s4 vi tj pb hd f4 qh qk lq qs hn ro
\n gn k9 qz aw wu ki yf e2 pk v8 xk wg 5y t3 sl u4 ya gd hn ql zr oj ig
\n fr qn qh ph k0 q2 nq wi zz rh rx ee ef uo d7 ix el fh qv dm vg px wi m8 qq gh ud qy ec fu yw uw sa tb lg us sk wf 5h qf sh vp wk qk zw qz wg aq of ys ak al f5 re f8 pr ku qx wc u1 lr qs wb f4 cw k2 ka hk mf yr w3 ro ir
\n gr qh ql wh kv e4 r4 oy lu qw pb et uj tb tn xs kr dt td t8 e8 uc xn eq yi af
\n q5 dt ed y1 am qv ut gx m7 yt rr yq yr dy sh mt wd wm th bv ym
\n tr qs ca lp uv q3 wu o5 c1 rx om ee er ta ou i1 jb rt ry os ti fc ss px jn gy jz vp ea tg ay rq u6 al de qc zt wn ez rz eq aa rm ox
\n uv ds h9 fy rf jq he qh h8 d4 wr wf du ck wi km yp ut rv io g1 rb av y4 tw a0 hy sz qx gh ha q9 qw ze 1a bz bx bv qw po ee wa qi xk ri i6 ic he tm hl sj jb qp wp jk qr kf sm l8 be x3 qu ql t1 x6 yy fg rj ua ug qg kv k1 wl v7 xx bj ae wc n3 zy tl tz zk wq re n0 yw oh yr oz eu fm do ux uc
\n hb pa h0 pg q3 mw q6 mg ls lh am sc gz al j2 wt t9 lm qe j2 rm re rr ry fl yw ux i0 2e eo bt vh ra ys sm pb on tx re ff wn wv tu rt ox ul ge
\n al fw zb p6 hi qy ay ou rg sx ag rz uy
\n bx wi kv t5 3w e7 sh ht ff nu la xo qi s3 uy jb 1o vm vi l6 be x3 ny pk aw u1 rk fx km qc be rw yn ey eo ro
\n dn q3 jf w0 e0 lh rv zv js j2 xg ld hr qe mk s3 dr kw kc dh h2 ql cg zv n3 ym yt aa as
\n ft ty qh pi d3 qz ip wu wi q8 wj a1 mg mj ut wt r3 om ua y3 ou fd dc zw lp xe cn dh ng qe kp qt mz xy ef ay od tz p8 i0 hu hz qo kw jw qf kf 3y v3 qj w0 ib ew t4 fg oc e9 ua ov hs u6 f9 h6 vj qv li wq iu yv xv rc w6 rm r2
\n d2 d4 av jc si rs ut 5q pa mm s4 e5 tc km
\n qa uk uw qh vd d3 q4 xo wo c3 wl wa w7 w9 mc r4 y2 fp r0 tw fr g8 ae qn lp sd bk en vr gd hu j1 xv xg rw ep wh ed fu ul eb fl fz i7 ht jx ns v3 ll zs j0 op xf qf l6 l7 sn wk zw wg ej ti wb wz t6 oz rh rj uf je av dj h6 ql wl oi im v8 zr qv wn ku wb bj ef o4 yl r1 ei so if uc
\n k7 qg q5 kx oz mu wl ws rh b2 rk yh qn qe 6o 1y mj ei pf ye e1 dw hj hx do qo gc rh v2 zw x5 t1 t3 yu th e9 em au qh f0 qk km ql kp wc 5p vx bg ea ev wn wm w2 rx o3 yk ru
\n a2 gr qa az dd gm d1 k9 hr we bz lg mg ny wp xd mp yo pj uy xs ua pt g4 tw ez jv q9 qn wr nd md nf qq ng pi bb j2 eu yl ij ty sb os eh hw i6 hg m9 nj wa qs w5 qf nz zh hw wh be zo fs rz me yk y3 ub t7 t8 u2 u3 en ha fl yd hf qh qk wc 1x ze w1 oj ee w3 ey du uz id ah pi
\n o0 hn qz q5 du 2b bn a4 ex rj y1 dj yz y5 ig tf th js qv 5e gc j3 ls d8 yw bc cz tx mb wf ij ty ai as hi lh l3 qd n1 7o wf wh qh wg ot ra l6 el e5 s8 dg vm 3f 7o wn yw gj tb et
\n fu a5 cp ch hx hc rd eh tg g4 li sw sf il eg eb zj 2o cg ew uc
\n qs uc rp ml eb yd if pa c7 oq vk wu ot yq rl uz tv gb zu vm w0 sc tf ud qk wm ko yy er tw
\n ak tt ub pa pq il jq q1 hr k0 uv ql q3 kk zs q5 z8 is lh q8 w7 qw es ii av yz dl ht td g6 vs jm zr px bj no g2 g7 la c7 xt mz yx tt ym os to s1 ur ta s3 gv pl qy pc qi sh jv j8 gr l3 bi oa wd fy v1 s1 zp hg 5e t1 rp wj ms wl t5 el wm at fl e0 sq h3 dl qz ka ox tj qb wb wq re xr rx em ee yu ri do uc a1 rp
\n uv rs un qs um il ul q2 jy kl wo a1 rj tj pk ys r5 yn uo oe y4 ou y5 ar zb g0 qn gx zt lb 2v rm qy nw yz lm eg og i7 ht ss qy qi ge wf bw lv lb wh cs wk hf 7g yb zw hk ns ol rh th yo f4 rq dt uj qo fd wx nk rv ka fl mu rr q3 w2 oj ee rt w6 ru ul
\n qs hq qf qn d1 k0 q2 kk o5 na si bv mx e9 tz d8 tq dl ez qv jf zw ww wu xo b8 w3 i2 te p3 ry iz s1 ut as s5 hk wo wa l6 wh bq xk wj wd ra gu yi u1 t0 ak ai tc ax ip uj nf zb wv bd wo fz qm qj pe md ew rv gj ol yk tn iq yl is si ie r2
\n d3 ni wr ws li mj ds sh sl qx vs rp ft ik e1 sd af ho xn wg zh 6b rp eb f3 u5 uf df py k1 wz vk vx k2 dg wm er rv rb
\n gv iz qz wo o7 k0 oq ti r9 us ib ps g0 jm jb tq ue iv pj sa cr kh t1 ot wc at e9 ys o2 ab qj ww za rn yi
\n ty qn qh nt ql la q3 kl wt q5 mp mg o9 ls lh g2 id ez sw hu qb cx nu pl kw wt vz v2 1t wi mh qr eu rm qs xb ei ij uj yb sn ai iv pj oj de hy gv ka lz pa nx wg wj kj cw zp wk eq wj t5 ns rf e6 rh ev t9 eb ir e0 sm gs gd ds f5 dd de fc f8 x0 lm qz wz xc wc kx cu wv ks lv kv wn pv ei wm ju ww yv zy yt e2 rb w6 oc o6 tq ig
\n q2 o2 gj q6 zh mg li mo vo ch tl ax ip ho wt ln ro wo qr tr tt os fo e1 de hg gb sk w5 fp kg mm l6 yi u3 fl hs u6 fx re rr dl wb jr el rw pm rt to rx w4 gx
\n yv a3 qa uz k9 q2 o2 bp ny zl mj vo tk rx ui g5 pu qx nr xt ls lm rv qs yv ik fz tv wp nk gn qh qi yf ek e5 pb au tc ac kc br qz 4t qc mx ly wm kb ez w2 u8 ei o8
\n rf vo q1 o2 wt q5 oc 5l a2 es oe sc cx wq qw ky em tx rm p2 qo ft ed uq fs i9 ok lz v3 au xj v4 wf l3 eg ej ex wl rv qv ak mi hm cj yr w4 si
\n a2 tr qa fy qd qh oh kk lq dy bz oc jf wa k0 ip pm po qx wq cx we wr bj j5 yq qe gg rq rr oo ru od ix qt af or sj qo jn gr sk wd nc xo xl wk hh yf eh ek aq ex ar en tl ys rq pm ug ql qz wb wn yw og xw an if it
\n ak kk wu ig df w0 rb y4 fr gg i2 qb qw yf j1 ij ue s2 qt ad i5 yp ai l9 wk km ql zn yb ee rb ir te
\n hb q2 ld wt q7 qm km ws w7 vi iu yf rc g1 pr ot a9 pn dk ib qx hi qc jd jg hd q0 lo jj cn wb mz ec qp uj y9 tu yq au tp hg pl jx vx j7 wd 3y ca au rq kv qg dh k2 ok cf qa wv bp dw iu de k2 rt hj wm rc er o3 ey fn si if so
\n hq q1 qj d3 ws as ld mu rj ut d8 ey ou ib ez gf y7 qx qn vz qm vd zw ww d8 xu v1 av b6 mg gs bb g8 rm qy yv rr ry oa tb dt jb qa j0 qs l5 nz qg wj t4 td t6 eb ua s0 pn ii ac x9 qj k4 wc v9 s7 cj zy wo 10 hn yq vz u0 fm uz ux
\n ra pp qd d2 vg wj qn rh we ht st jm bv wu wi qy y9 de gw wa sb uz r1 qu pz ot td rg go e8 sn iy zr wc s0 ww ea pw ac w1 h4 w6 rp
\n fr uv pa jt qk q4 ls wu wi mt xa vu tk ed rb pe fp am sr hp qv m1 gc en yw qq qt ud ey eo p3 tj tu en ix ux ye tp ic s3 ad hz qi uf qa kr wh vd lk yn 5t u1 t0 od rr x9 f0 zj kn nk wp zs se rv ei pi
\n yb dc qs py qk nb jj ql q5 m3 mg zz e5 i1 zr lc zx xu vq hr xp by ei yx gl qi qp ij eg ai ph ap tn dy zp qp bp kf j5 ib vd eg el yy td yo ie ag em fc kx zx h8 wv sv q1 te wv vm yq ol if a1
\n ak rd rf qg lo nq xy z6 q6 mw c1 cu z8 q7 vw wp a2 zz w8 yo uu ee yn r8 av yz y6 pp uq dv i3 db jv q9 2l xg rp ib lj sq tx tc mj mk qr rw te p3 ik eg pj e3 i9 im tb tn fm na j7 lj wa ct rg v4 he x3 kl bi r7 wj y1 l6 wk el 9f t6 gu tj od e9 tz re uh o2 zk ki cf lw jp sc s6 qs qb yn yw ms md w3 rv as yi ox du yz ir yc dp hc
\n yv gy ik k5 db gm ux qj gc w8 ea g6 dx po jb 5t pv wi rz qp jv v1 ea en al ii dt qj py w4 if ux
\n iy pd yg qq p4 in qa y1 yy ta fb zk s6 lu
\n ql ws rv yj jk ke lm ff lb fx s4 av uv wl n1 rv dp
\n qh d3 rs ih rc aq we 7y ud t3 h2 zt cu oc
\n fr hn k6 je q3 k4 tm zh lj aj li a4 t7 w7 kx ut pl rc ih th hp wq pl ls ma oe lf wu l1 ve lp qt qy fi ti he oh hi ow tm cu p7 nr va r3 tt wk wc s8 s0 hs al o2 hk x0 qj lm v5 wl qz 7u iw os lu ah wm hk e1 o4 rm fq ro so
\n gr sa rd um pf ca ga ql qz wt ld z6 vw kv my xt cn wr eq tk rw fe qx qc qr qr rk qa qi ex p3 p5 dq ff pc sh cy oq v4 wg s7 yo ya od rq dt un bw zm da bg q5 ru ah
\n iy qd k6 dm oj qz zd vr w0 r7 d8 et y1 eg yj gz qq p3 il i8 ge wp sx s1 wj t4 lm jo qp pw xc vm sr uz ig
\n qf u8 iq rg rk g4 im ih oq fp a0 ib tc uj tn nc kz ll u1 un qv ck lv vv
\n a4 df um jg z2 lq wr tn xu id wo ez rk rz ew d5 ti ix to r0 sk fa fe fr hp jk wr v1 ms wu jm rc qt tr ex uk vr to tz ut hl sg hb qd xn rj jc 6x mo wz rp ek y4 oi oo ae sc e9 od pn hg wz js qv ln ju rx yg as rn gk o7 so it
\n dd qs qd py k9 lw wt db gk zf nw wh t2 nf zx w8 rj ue w0 tl ew r6 ui ef g1 or ej pn an tq ou ff qv gj jv q0 kq 2l uo oe ku wi w1 tx qe rv 1l ws eu lv p4 ru fo tz ph ib fv uu i3 qu oe pc gw wp sx zd kw w6 bo w9 cs cw my qz zf rg fh e8 ay yd fz rw fc ng qz cg wx qp oz qv ly ha cx al iu rt mp e1 ee w4 tn ul ru o6 ag aj
\n qa kv e5 bm yj j4 m5 rj qe ri ht oi qf qe t6 e5 aw t8 wc dw un yb
\n iy jg jj d4 ju ol wy bs wk wq t8 a9 y4 ta dx jp qc q9 su si ut q0 m8 qq 4a zy qq zq sw po qe qt la sn p6 ht oj hy hh qi gc bn w7 au ya kz na oa ox ov je fb or wl qx ze cg we fk tz tv cg uq w6
\n dx sa qa qs db go lo z3 lf ox jc wj 93 tj tk yf ii ef fo ua sk g5 fs el oi fr sx fg se q9 xq qw j3 m6 4h qq l2 eu rq qu qi hs uh p6 ix fx qa i0 wf ke bq ne wh xl ms un ex sx yi ua pb s0 rq ak ao fc pr qj cw wq vy wv u2 3h wq re yw eb gl eu
\n hv gv il jd go qh d3 we q4 q5 ej lk my gv a2 ds ex e7 rk yf fq pu ff qx ho js gx j1 qe kw gm ja ns wy ln d0 cz xt te xu tt sb em ix ic p0 i8 im ui di gt ws os r1 qy pz l6 e5 e6 op sb pv sn ii rr v6 ql le zy pv mt da yq ol w6 yz ag it
\n ds gy dg jf qg uw qj uv q5 q8 q8 q0 qm e5 rk yd tz pv if qn ju cv xy ki qw mj ls yv ru of tz so yr oq qi m9 sc kw qf zh jx wh kg l0 t1 pz un fj os ha sn f3 e0 om ab cw ct nj zy wn fl ww vn fn tn ie ov
\n co qh jr jj cb bi wt q6 ra qm zx ur r5 an fw tg jm re j5 vl em sl xz qe wa 45 nq ex yb ed ef ph e4 tb s6 qy lz cu gm pd gq mc ca 85 yf wf hu sb eb fk fv dt cq lq qz ww wv xr n0 eq ok er et iw r2 o7 fe it
\n ra dx og wd wt o9 i4 it pk qo ic dt hj jl oq sf lz wd ca hi fg f5 ap x9 gq nd iy q6 ep
\n o9 gb a5 z5 q6 wu w0 tl r9 dj if ts ig it zq ll qw qy ep ed ry p5 ut hh dr i0 hl qp qa zs wd ya ot xk s7 e9 om io dl ki k2 wv q1 5g er rb rm
\n qs w0 om ed tk ta th gf ii av og 6o ee o8
\n po ty rf qf he qc hz bv c5 mi rh ew tu ef sh ix r0 d0 pb tq fw ig g8 fh i2 uw hf qw qr sf cn wi fh qt hp nq yv fy tj od ux ut fb hu tn qt oi qs oa xm dm fa nm qx yy oo ec ev ox sb ya rk ys ud jy dz zv qc zb qb 17 tb lt yt u9 w4 rb st et ry yl o5 di ux
\n gb gt az uk gm qh d3 bt qz wd ld o3 bl cm q7 ck k7 we b2 yd ua ew tl rq yg us tq js fk jb gz jn jg qq in qr rq qy 3h c0 qp p3 yn ef sb ym jl sf sh hv qd p4 fj od ix kz ni wj wz tg t0 tj e0 sm om kx ku cw nb zc aw cy qv bq kg wq pn zz wv cd wn yr se o6 pu eo gc
\n gy rf qf k6 qh qj cd q3 kk cj fw b4 fr mj w7 bm wr ya z7 wt w0 r8 ip ti is pn am y5 qb hs jf qw uu wr np qt wi 1t bx qq qw aw er cv qy rw eo oa iz fp iv qi jm nk kr qf xm eo nr w0 qj bi t3 uv wk ek wn ex e5 rf rh ga it f5 pm hm f8 qj gy jp le wc qc lt s9 zu lb q1 ju w1 uw w3 oj tn tq ir r2 te
\n ak gn pw a5 qh k9 qk nb 2l qz wr kv mt gt w7 zx ii oe ug ix sz qx qc ar sr zb su vg qe np yg qt yj sv uk pd uq pf of eh jb sk gy ws ke kd be og kh x6 me wz e6 yo iw ah rw pm rr qg pt lm ql qz wz qs ly wb wn q3 ry rx gj ia o5 ge
\n gy qm d3 q3 ia c1 ta ex e5 e8 eg sy cl jf qe nj nh m9 qa w6 ek iw kv qg ab n4 w5 iq do
\n uv gv qa un az jd qm eg iw nr q8 zj ny c5 vu rl rx yn et ia ua is ot pt hu im gj qv vv xe xu 1g wo vt qt st qy rw qi wh ft es uk hw to p8 fn f1 hp qu sk p2 l4 zf qf g0 fi l7 be ky iv mn xp nn dt 6b ro kw uv ra tp e6 sv s8 sn tl fz iy qh hz ve v8 h8 th wc wy qb xm s9 hs wq zs yq tu en zt w4 dp yc
\n sa ak gi hw qm gp pp qz fm id lh 6w 9h xr w7 ui ow rb oe ia us fq g5 y5 ig oi y6 im gk ze qe gn 3o ye xz jh qe db qi p1 ep re op te y9 os pj e3 p0 zp qa ih l4 cu zg wg sn rh lf fz ic kh ni wh vh wx th ag u3 f6 uj jy k3 2a wc kv lu wo hb eq w1 rb xw yo ei o7 gx
\n hw wd r0 g4 sz b7 pi sn tc in qt zs rg eu
\n iy hv hb hq qf z3 q2 xy ia tm zf jw wq a3 rk w0 d6 eg et is id po tg gh ob jj wr np no wt ja wy xl l2 yr bt bb tc cx qr rn qt lc rw sy ex y0 ru od to tz og ye hr pj de tv e4 qt ad oo qa jq fj l7 fo wh nt pl ro wl vp yy tf va e6 fg th ar s7 os at s8 re df pe hj f0 qx qc x7 lu nc zo tx bf ww pq cg w1 rx id pu it fe
\n qf fi ld 4o ge da t5 zz mo zx vi ui w9 pj rl rz r4 uy r5 sf av ot fq tw sc zv g9 qv i4 ut iy m1 wy xo b0 ud cb qy rq ug wg sn fo ix uc aa i9 ss sd di sh j8 qp xa ep w0 7h wl t7 iw tj ya hs e0 fz hd u6 hh dr dh uk qj qk wx ol xb qv wb s8 15 wy zm jr nx it eo fx ww yv zy to ee yu yi iq ey iw rm uz yz ob
\n po ra ik qd qf je jr lp wr ji ne wu 2b nt wa e7 rx xo ia yl ta ig ff hp gk jf q9 vd si gc qe rk wu by yg rq sb os fu eh em ux ic ao pj hy im du qy cr 2e l5 qw v1 lf mv wk rx dy rp ra rg gi eb hs au ap bo oa sn zi f6 h1 zt ey yz do
\n lj vu y2 if qr wr ta so kg 3k ol u1 f5
\n yb cv mq lf zz ue ui dx qe tv qu ex tz hh tb dy ds wi rb
\n hb gt qa h9 rf qf qg k9 q1 lo q3 ql z8 gl zg q8 1t wo vr rg ez ws mo w9 yo r3 yd e9 ea sf a0 im tg y8 ar qb ni wr qr wv m4 ix d9 nz yq fa if yr bt qr mz qi ea rr xx at ic pk qi za hv kq w7 co xj wd x4 zs wh y3 rf ec u2 e9 ah s0 uh io un h8 iq zb bs nr be zo fz vb wm pr yw md rc ur er ia yl ox ei ux eo tw o8
\n qs rd rh yf px ow d8 tq ig ih cl qr yw qq in qy wc ek rh ya qg cd x9 qm lq to o3 ul a1
\n a2 cp as gk kc 4u zj z2 t5 zz rb eh sh sx fg fh g9 hp vd gx oq cv pe wv b8 qe cc nw tr il tl e3 qu l4 mt wk wh aq td e9 gf lm qz bu jq my wp vb dg y0 ye yq w6 r1
\n ij az qn ph qv bv bf mz iu is y8 ar fh q9 hd qq ji sf ld up qo p2 at sp in qi nk l5 e0 rw px 5o ew oc te
\n qd jr pa q5 w9 hw hu y8 dn qn zw wr ma ei xl dq i7 i9 vb sx wf wh na wl um e7 s0 h2 nh nk fj yl wn iu u7 as ad ey so uc
\n la up ic g5 ay ic x8 u2 ar eb wb yr aj
\n po da rd un qg uw lq m2 4r wg q8 z9 t3 zc d9 ae st q0 li qw wt kr qu ry en sm qf kf kh ny yt gi rh u5 em tc kv h8 qx lr jq ef
\n a2 dh qh q1 h8 qz z6 kx z8 bv 3q df pk tl d8 tq tf g8 zb qw 5p zm qe cv yb ec uz iv e2 gq wp uh kq ws lc wk x3 t8 rj fc io je dk lr lt wv wt bw be eo q4 ye yy rv ok yo yp ir ig
\n rs ij ty ps ul wr bh kb rs z4 z8 er px uo up y1 rb fo jo gg dv ph q0 jn xw ww d8 rp yd yf tx b0 op yn of jl tm px fm jc zf qd pk wh rp uv tp t9 ir yf ug qg v5 ku qz fd k4 cu mw zn iu bg lq ly rv su
\n ik dm cm or tw pu lp eh qd kk j0 em ng tw
\n ra ds qk cg q7 k6 4p t6 yu lq go yd eq r8 fw am dm xy cm v1 cz eo qi ij yn eg hq tc sj qa i0 oa l6 p6 wj vd wf mr yt ex e6 yo t9 ev s8 en rr bq kg hb lm re bj ms w1 et du
\n o9 gy bl wz t8 hq iu ix av y5 y8 jn j1 np xt t9 vw qq 43 xv 9w yi ft es hy op lg vs hg wd ef wx ou ox sw dr ze xr st fm ah
\n a3 wr fb jc c2 w8 rx fe q9 hd xq qq wi te y9 e1 qt qi qs nl ca bh u2 md tv hx
\n ra jh q3 aa t3 1o eq lh rv fo us pt dj pm pi qn zr bj xj cm ix a0 ra hi eu nw yc p3 ru ri ue e2 jl hi wo g9 xn qh wl wx go yp rr dj nd ch u3 fj bd jy vn w1 ia ox tm uz
\n ma y5 bl qi g7 ri fl ap 7a yo ko rp
\n um dh pg wq r5 sf ia ta an hs ne q9 wt sh rk yi ym of tb oq do hv wj ic oi sc pe sc wq wb wm tq
\n hq qm gg q4 xu k4 k7 uo wt kb et fo ey aw g0 xi am in qy eo qi eb ay ue og nt kl wx s9 df qg f9 v6 rv sv pc tl my bj wb eq h4 o3 ri
\n tt uq qh nb qz wt jx ya on om io ow ha qp e2 fd e4 hp hx p2 vm xg xn ra l8 iu yf jr qh k4 1l oa tk zp yw rz sy ul yx eo ep
\n ij yb qs fi ul qk by ql jl wr bi q5 bl tm q7 xp k7 vy gi tj rl rx yf ym tu er r8 pe ip ej y4 fd jn gx zt vj xt xh rj kt cm ri nh zq pp eu rw to pg e1 ue dw e2 so tb gm qi jb nk gy pa v1 xj fl kh 3k kl ed wx wc ek yy ez wc iq yo u4 it tz ak hn f8 h2 hl uv wz h8 gi nk ch zt wt bw kn yq e1 tv zp ag it
\n qs rf jq go qh a8 jj xt le wu qq yd d6 d0 ff qc su c7 lc wp ty y0 tu ti pf ta aa ug vb rf vi rx no un yu e5 7r up rj ag ha hs fc wz bo qv bp tv ki er
\n yb qa rd lo ok q6 o6 ba r6 ow or yl am aq gd dc ho cx c0 wu g4 ib c5 ep re qo ed yw iv ta hy jc pn xs oq xn bo zg ps kr iz yp wh wj xl xo zq me na ek wl wx wc rg uo ir tk aj da rw hm io uj fb jt qj dk nh zx jp qx wc zr zy zu nc xe ww wv vn h2 q6 en ew ad yl af eo if r2
\n yb o0 dn z1 q1 sw qk po xt wr ls z8 ox wu jd ro q8 lh mh wa rg ea c3 pz tu g2 is a9 pn tw po qc sa jj vh ax xh kt wy jv mg nh kp b9 qi od ht e4 uu ij qf sf nc wk ap wd qz rd yi s7 tk pb it f7 o2 f9 kv qh h4 ln wz gi qd zn xm sn 39 yn rz u7 yr yj is ie ag ir tw
\n po dx uv pq jd dm d2 hr cs gs d3 q4 wr np ab di mh vt w7 w0 on tl ia ta aq dz y7 i1 qc pg q0 wq j2 c7 la cb il wr lv na ru 4g vz nf bc sl qq qp qt ud rq wg ex uw he tz ye p9 ui ou vu at ix w0 vf bi ed yh wh ra y6 wc u1 t8 fj ov iy tz kx h1 hj jy qj h6 ng jo wz cj ie mt rq te n9 mp ma q5 8w rb o7 sp ob
\n po ra dg ca qj q2 is kn rd ws lq tu ym yl y5 tg pp qw we cn a7 1t jd m7 wo yr sz qa hp ei qy ec p4 hw p7 to au iv ht qt qy qo l4 lz xm wd yf wg ez s7 en f3 tx yf rr f8 cw ji qv yz ry ew w2 oj w4 w5 st rn oz ri o6 dp aj
\n gm jg ju q5 np lf q7 xo 1y qn k0 jo pp qx th q9 4f or ro pu bv eu nw uq ao dy jb j9 gr rd nz wj wj r9 co ta rk od dd gg hm df pr km ng oj qc sb qd wb tz cq ex wb vb ty eq tv iw
\n fu uq co qk jl cg ld lg wo vr gc bd rj r3 yd rz iu ew d6 io to sh y7 jp db dn qn qm si xg qr ls jo lr wy rk wn m7 qu bb es op qp ru en ta e3 in hy dy hl vc gc gt jw ke 2t wh rk lj hg oy e6 yo ev em fz rw pq re dg qk ku oi qz k4 qv li rq n8 ec 4d yb wb e1 iw id o6 ir do ux pi ep
\n a2 wu jd ef dc mn 5e qp pl xd ag ay
\n yv o9 al a5 uq qg jw pi z2 jt cd q5 3m zl ez vu rg jl rz yf ix sj fp d0 tq ff ha hs zw om ni m0 xg c8 a7 ki qw cc ei xg j3 tt tu il p6 ix tp tx ib sp hg p0 fc pj su qu jv sj lk qp ws qs gm 1x mx lv wj qu l1 dt wh wv un aq fg rg e6 uo ar ie up it sw tx f6 o2 h3 qc qa ho vj u3 kd zy n6 my ww vc lr em w2 se rt o4 yc a1 te
\n qs dv pa ty iz uw qh jj z3 tn jc eg e4 qq w7 ut r3 uu kb up g1 fo iv if fd gd sc qm qe xg ia wb he ky hu tv rw qu rr es p3 ue s2 as i0 dt qt hz jm j0 gy ci fi hw nv ea kk vf rx 68 ti rp wl oi vp at om io uk pt qh qj dl cf lr cx wq ku ki w2 yh af ul sp yc it
\n ub yb dc ty gm dm go nv we ql by la o1 ju o3 jx fm aj wa rg e4 vi a6 r4 xo tz oe ip pv dk tq a0 tf fg tg i4 pz sd ry ky mg hy g7 eu qy yi rw qp eg yw hw sm uc i7 dw fx s3 sf zo m9 xs vn rf ci nz kr qt 9y pj lk ee pz ef rk e0 fx uf az fc qg jr oy lq cg qp um ad wc zn bw n6 my xr mp tu en o3 iq ir ro
\n da a3 d1 jr dz ca ql nu q3 cf o6 nr mt lk yr rs lp w7 a5 pj ys ym r8 ey to fs dz im ih sw qx qv zn gl j1 xe lc zc vw 6a mh b9 qt rm re oo qp p4 tk ix p7 og tz yr sp aa hk ih lx qd mx 4n kk vf el oo td ae yo fj uf pr hl qj qk wr qc qv kf yz my wq hn zs dr ee u8 rv et ru ie ag tw
\n gt ph z0 zl mu ui av zm om ui vh qr he qr es fl ws w6 nc ra rk kp ol wm yu it
\n dd df jq jd ux ql el 3r ya uu iu ee eh g3 sj us ib pp qc jv hd bh zt uo d8 b3 xu bc rq te uh ex tt eb il qu pc ge sj qp ih xf 3r gr yh qx tu wl wn sz up ay it ab jt qz v7 wn li za 6o w3 fn yk eu ie gz ro
\n yv o9 qf eg eh mh jh rh r3 rv ix y3 a0 sr qc qq qr wr qe bx ki m8 mk qi lm uk eb ai ur e2 xd nc ca eo mb ed uv rs up ya of hn lw wz qz et qh wm zr rc o3 r2
\n ss qg qj ph qk q2 cs z4 bi qc cj q8 qn w7 rj ys ea r4 uy om rc ii fp sj ej yz el qx qv zn gk q9 hs m2 ii d7 nk c8 j4 qq dl gg pp ei qo yc od fo eh fp ta hy ok tv uu us dp qf sb zg ks sg n3 wh x2 nr cs wk kh wk wf ew 7h 7k oy t6 gi rg yp s9 ya e9 pb tc dt dh hk h2 pt qh h7 wz n1 qv kd pm cc xr kp yk tm ge
\n gr qa ft tt gn qs pw pu ca ph ls cg cn zf bz q7 z0 c3 qn gv w7 rg ut e8 ii er ip sg y3 oy ek ht gd qx g0 qv db su jn qq lz uu jo ru an wi kn sq nh qr qy yx eo qi xz y9 ru pd au p7 dq he ut ok fd jz ui hc j9 l3 hb xd jq gy kh wf xs sl rs aq ez y4 ts um yi e0 gh dk py v5 qk ql ko jq wc nk v0 wb qv br iu wm 6p sr gk yp pu
\n o0 pa pf z3 jt jy z7 cm ne w8 yz fd fg zn qq ll vg wr wb ia xx yj ty eh e1 so ts tc s4 i0 tn wo wp wa op va wk x3 vg qx rs sn au f3 tz sq hn rr o2 fv un k2 vj ey dj
\n iy ra ij ty a4 un rf qg dm jr kj uv we cv gk wy z8 oc wp mo jl mz ev ch rv tu ax y2 g3 oy y6 im uq qv hp qb hd iy lp nk w2 bb ho ep tr os en sm p8 p9 hy ss ui gm qi oo vn ae qd w6 ps dn wd wg ro mr yt ol oz rg s7 u5 tl yd rr ax f0 cq ku qx ze n4 wn kt ca jy bg yc zs yw rz w1 eu rm r1
\n hv fu ca q8 mt la r3 pl yh to sy yh tv x4 tg yp ov wn ze sp
\n o9 qa az gm qd pw hq pd ga qj cd q3 jk pd du c2 zk xf t8 eq om es rc ua y3 pu ig qx se qv db st qn ii lx qe wt xk nx ku br qe qr qt rm eu xf xb rn qu qi ep qo rr ex xk p5 ym fi uq to ux ix ai hj gn zi oq qf kd wf xn kr w8 rl kk mq rp rf u1 s7 oa fh e7 yp e0 pr ql sx ck ag kg kt mp eb rl em ee w6 du rm yz if ep
\n qs pi am 6a ut r4 ii sd ua ib y6 pa kt pb wm qq dz qt qp y0 he p8 ue tb qu or qo wh 40 8j t4 sl rf iu gh hh qc iq bf rl wm mf oh ew is dp
\n da ps a7 jr z4 q3 bu xt ip jx q6 np z7 bp lg bz ye wo ig bb ww rf om uu ef r8 ey pt ta pn y7 gg th dn pg qb ri qq 42 qw zw b9 b0 xb qt qy yl wh xk ft at yw i7 sp de pz fn qy si m0 ik wf sg cq ql hk er eg lc ek na wc iw ir rk ua e0 ak iu sw ap uj av ab hl uv zc qa wc jw vj qv vk ay kg xw on rw 1b wn rl eb vm eq h4 yt oz eu uz
\n a2 qa rd cp qh ub vg ws u0 4g bp da yo ev kz eq uu ee ef yk pr sj sk fe oi lt uy j2 gn io vk ns 27 ln wu ve yr l2 qu qi ry il ul tj eg ux i5 yr tx ph oj gq or zp wa qs gy iz v0 qt wj ic ca lh yb np ej sl td l8 yi iw s8 e8 ys yd sq al dt pt tg te yb eu tq r1 ir fe
\n tr h9 go qj b1 wu q7 zh el tg mb ys ed ii er r8 xs pe r0 sw db ov m7 d3 re no nu zr pq ji wr lc or qw ee yy qr rn rm re qi te ea qo yb yn y0 uz uq iz tl yr i0 fm wp qs qd he wk kl ew as hl ez oo ox ie s0 f4 dl wq wl ww lr qc qv vk mt my kn ep em yt sy am so rp sp
\n ak ft qg bg ji q6 bk xi tf mo ur pj yk ua qv wq gc qe ke ef eb tk uq i6 oh iv gb qs rx el yo rr pe wz wx ho xq tc mo yk du r1 tq oc hc te
\n ra ub qj jh jt dx ql q6 da tf r3 ew iu sg tp yl el gc 6n tt ry pd ye ff lz kt yp fl yf dl rn
\n o9 hb h9 qd dh qg q1 qj jy se q5 wt nr qv ge c5 el 6y uo rv ax pe et r0 fe y6 dx qx ha qq lo we zy v1 wy dl vr wa qr rm qi qp yn tz pg ph de p0 do qp wp wf bw xh ky xz wh hl to ek rd sv rj rq re h1 qg qh kl f1 zm 18 ez xe vm en 5j o3 rn fw fe it
\n db c2 bb o0 w8 kl kc y4 qx zm pk cw id ve mh lp rq jb fl x1 qi wd lx f3 cy bq dd ye fn ig
/*
* interface functions to dictionary
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "fmgr.h"
#include "utils/array.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "dict.h"
#include "common.h"
#include "snmap.h"
/*********top interface**********/
static void *plan_getdict=NULL;
void
init_dict(Oid id, DictInfo *dict) {
Oid arg[1]={ OIDOID };
bool isnull;
Datum pars[1]={ ObjectIdGetDatum(id) };
int stat;
memset(dict,0,sizeof(DictInfo));
SPI_connect();
if ( !plan_getdict ) {
plan_getdict = SPI_saveplan( SPI_prepare( "select dict_init, dict_initoption, dict_lexize from pg_ts_dict where oid = $1" , 1, arg ) );
if ( !plan_getdict )
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getdict, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 ) {
Datum opt;
Oid oid=InvalidOid;
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
if ( !(isnull || oid==InvalidOid) ) {
opt=SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
dict->dictionary=(void*)DatumGetPointer(OidFunctionCall1(oid, opt));
}
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
if ( isnull || oid==InvalidOid )
ts_error(ERROR, "Null dict_lexize for dictonary %d", id);
fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext);
dict->dict_id=id;
} else
ts_error(ERROR, "No dictionary with id %d", id);
SPI_finish();
}
typedef struct {
DictInfo *last_dict;
int len;
int reallen;
DictInfo *list;
SNMap name2id_map;
} DictList;
static DictList DList = {NULL,0,0,NULL,{0,0,NULL}};
void
reset_dict(void) {
freeSNMap( &(DList.name2id_map) );
/* XXX need to free DList.list[*].dictionary */
if ( DList.list )
free(DList.list);
memset(&DList,0,sizeof(DictList));
}
static int
comparedict(const void *a, const void *b) {
return ((DictInfo*)a)->dict_id - ((DictInfo*)b)->dict_id;
}
DictInfo *
finddict(Oid id) {
/* last used dict */
if ( DList.last_dict && DList.last_dict->dict_id==id )
return DList.last_dict;
/* already used dict */
if ( DList.len != 0 ) {
DictInfo key;
key.dict_id=id;
DList.last_dict = bsearch(&key, DList.list, DList.len, sizeof(DictInfo), comparedict);
if ( DList.last_dict != NULL )
return DList.last_dict;
}
/* last chance */
if ( DList.len==DList.reallen ) {
DictInfo *tmp;
int reallen = ( DList.reallen ) ? 2*DList.reallen : 16;
tmp=(DictInfo*)realloc(DList.list,sizeof(DictInfo)*reallen);
if ( !tmp )
ts_error(ERROR,"No memory");
DList.reallen=reallen;
DList.list=tmp;
}
DList.last_dict=&(DList.list[DList.len]);
init_dict(id, DList.last_dict);
DList.len++;
qsort(DList.list, DList.len, sizeof(DictInfo), comparedict);
return finddict(id); /* qsort changed order!! */;
}
static void *plan_name2id=NULL;
Oid
name2id_dict(text *name) {
Oid arg[1]={ TEXTOID };
bool isnull;
Datum pars[1]={ PointerGetDatum(name) };
int stat;
Oid id=findSNMap_t( &(DList.name2id_map), name );
if ( id )
return id;
SPI_connect();
if ( !plan_name2id ) {
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_dict where dict_name = $1" , 1, arg ) );
if ( !plan_name2id )
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 )
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
else
ts_error(ERROR, "No dictionary with name '%s'", text2char(name));
SPI_finish();
addSNMap_t( &(DList.name2id_map), name, id );
return id;
}
/******sql-level interface******/
PG_FUNCTION_INFO_V1(lexize);
Datum lexize(PG_FUNCTION_ARGS);
Datum
lexize(PG_FUNCTION_ARGS) {
text *in=PG_GETARG_TEXT_P(1);
DictInfo *dict = finddict( PG_GETARG_OID(0) );
char **res, **ptr;
Datum *da;
ArrayType *a;
ptr = res = (char**)DatumGetPointer(
FunctionCall3(&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(VARDATA(in)),
Int32GetDatum(VARSIZE(in)-VARHDRSZ)
)
);
PG_FREE_IF_COPY(in, 1);
if ( !res ) {
if (PG_NARGS() > 2)
PG_RETURN_POINTER(NULL);
else
PG_RETURN_NULL();
}
while(*ptr) ptr++;
da = (Datum*)palloc(sizeof(Datum)*(ptr-res+1));
ptr=res;
while(*ptr) {
da[ ptr-res ] = PointerGetDatum( char2text(*ptr) );
ptr++;
}
a = construct_array(
da,
ptr-res,
TEXTOID,
-1,
false,
'i'
);
ptr=res;
while(*ptr) {
pfree( DatumGetPointer(da[ ptr-res ]) );
pfree( *ptr );
ptr++;
}
pfree(res);
pfree(da);
PG_RETURN_POINTER(a);
}
PG_FUNCTION_INFO_V1(lexize_byname);
Datum lexize_byname(PG_FUNCTION_ARGS);
Datum
lexize_byname(PG_FUNCTION_ARGS) {
text *dictname=PG_GETARG_TEXT_P(0);
Datum res;
strdup("simple");
res=DirectFunctionCall3(
lexize,
ObjectIdGetDatum(name2id_dict(dictname)),
PG_GETARG_DATUM(1),
(Datum)0
);
PG_FREE_IF_COPY(dictname, 0);
if (res)
PG_RETURN_DATUM(res);
else
PG_RETURN_NULL();
}
static Oid currect_dictionary_id=0;
PG_FUNCTION_INFO_V1(set_curdict);
Datum set_curdict(PG_FUNCTION_ARGS);
Datum
set_curdict(PG_FUNCTION_ARGS) {
finddict(PG_GETARG_OID(0));
currect_dictionary_id=PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curdict_byname);
Datum set_curdict_byname(PG_FUNCTION_ARGS);
Datum
set_curdict_byname(PG_FUNCTION_ARGS) {
text *dictname=PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curdict,
ObjectIdGetDatum( name2id_dict(dictname) )
);
PG_FREE_IF_COPY(dictname, 0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(lexize_bycurrent);
Datum lexize_bycurrent(PG_FUNCTION_ARGS);
Datum
lexize_bycurrent(PG_FUNCTION_ARGS) {
Datum res;
if ( currect_dictionary_id == 0 )
elog(ERROR, "No currect dictionary. Execute select set_curdict().");
res = DirectFunctionCall3(
lexize,
ObjectIdGetDatum(currect_dictionary_id),
PG_GETARG_DATUM(0),
(Datum)0
);
if (res)
PG_RETURN_DATUM(res);
else
PG_RETURN_NULL();
}
#ifndef __DICT_H__
#define __DICT_H__
#include "postgres.h"
#include "fmgr.h"
typedef struct {
int len;
char **stop;
char* (*wordop)(char*);
} StopList;
void sortstoplist(StopList *s);
void freestoplist(StopList *s);
void readstoplist(text *in, StopList *s);
bool searchstoplist(StopList *s, char *key);
char* lowerstr(char *str);
typedef struct {
Oid dict_id;
FmgrInfo lexize_info;
void *dictionary;
} DictInfo;
void init_dict(Oid id, DictInfo *dict);
DictInfo* finddict(Oid id);
Oid name2id_dict(text *name);
void reset_dict(void);
/* simple parser of cfg string */
typedef struct {
char *key;
char *value;
} Map;
void parse_cfgdict(text *in, Map **m);
#endif
/*
* example of dictionary
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
typedef struct {
StopList stoplist;
} DictExample;
PG_FUNCTION_INFO_V1(dex_init);
Datum dex_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(dex_lexize);
Datum dex_lexize(PG_FUNCTION_ARGS);
Datum
dex_init(PG_FUNCTION_ARGS) {
DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
if ( !d )
elog(ERROR, "No memory");
memset(d,0,sizeof(DictExample));
d->stoplist.wordop=lowerstr;
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
PG_RETURN_POINTER(d);
}
Datum
dex_lexize(PG_FUNCTION_ARGS) {
DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
char **res=palloc(sizeof(char*)*2);
if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
pfree(txt);
res[0]=NULL;
} else
res[0]=txt;
res[1]=NULL;
PG_RETURN_POINTER(res);
}
/*
* ISpell interface
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
#include "ispell/spell.h"
typedef struct {
StopList stoplist;
IspellDict obj;
} DictISpell;
PG_FUNCTION_INFO_V1(spell_init);
Datum spell_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(spell_lexize);
Datum spell_lexize(PG_FUNCTION_ARGS);
static void
freeDictISpell(DictISpell *d) {
FreeIspell(&(d->obj));
freestoplist(&(d->stoplist));
free(d);
}
Datum
spell_init(PG_FUNCTION_ARGS) {
DictISpell *d;
Map *cfg, *pcfg;
text *in;
bool affloaded=false, dictloaded=false, stoploaded=false;
if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL )
elog(ERROR,"ISpell confguration error");
d = (DictISpell*)malloc( sizeof(DictISpell) );
if ( !d )
elog(ERROR, "No memory");
memset(d,0,sizeof(DictISpell));
d->stoplist.wordop=lowerstr;
in = PG_GETARG_TEXT_P(0);
parse_cfgdict(in,&cfg);
PG_FREE_IF_COPY(in, 0);
pcfg=cfg;
while(pcfg->key) {
if ( strcasecmp("DictFile", pcfg->key) == 0 ) {
if ( dictloaded ) {
freeDictISpell(d);
elog(ERROR,"Dictionary already loaded");
}
if ( ImportDictionary(&(d->obj), pcfg->value) ) {
freeDictISpell(d);
elog(ERROR,"Can't load dictionary file (%s)", pcfg->value);
}
dictloaded=true;
} else if ( strcasecmp("AffFile", pcfg->key) == 0 ) {
if ( affloaded ) {
freeDictISpell(d);
elog(ERROR,"Affixes already loaded");
}
if ( ImportAffixes(&(d->obj), pcfg->value) ) {
freeDictISpell(d);
elog(ERROR,"Can't load affix file (%s)", pcfg->value);
}
affloaded=true;
} else if ( strcasecmp("StopFile", pcfg->key) == 0 ) {
text *tmp=char2text(pcfg->value);
if ( stoploaded ) {
freeDictISpell(d);
elog(ERROR,"Stop words already loaded");
}
readstoplist(tmp, &(d->stoplist));
sortstoplist(&(d->stoplist));
pfree(tmp);
stoploaded=true;
} else {
freeDictISpell(d);
elog(ERROR,"Unknown option: %s => %s", pcfg->key, pcfg->value);
}
pfree(pcfg->key);
pfree(pcfg->value);
pcfg++;
}
pfree(cfg);
if ( affloaded && dictloaded ) {
SortDictionary(&(d->obj));
SortAffixes(&(d->obj));
} else if ( !affloaded ) {
freeDictISpell(d);
elog(ERROR,"No affixes");
} else {
freeDictISpell(d);
elog(ERROR,"No dictionary");
}
PG_RETURN_POINTER(d);
}
Datum
spell_lexize(PG_FUNCTION_ARGS) {
DictISpell *d = (DictISpell*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt;
char **res;
char **ptr, **cptr;
if ( !PG_GETARG_INT32(2) )
PG_RETURN_POINTER(NULL);
res=palloc(sizeof(char*)*2);
txt = pnstrdup(in, PG_GETARG_INT32(2));
res=NormalizeWord(&(d->obj), txt);
pfree(txt);
if ( res==NULL )
PG_RETURN_POINTER(NULL);
ptr=cptr=res;
while(*ptr) {
if ( searchstoplist(&(d->stoplist),*ptr) ) {
pfree(*ptr);
*ptr=NULL;
ptr++;
} else {
*cptr=*ptr;
cptr++; ptr++;
}
}
*cptr=NULL;
PG_RETURN_POINTER(res);
}
/*
* example of Snowball dictionary
* http://snowball.tartarus.org/
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <stdlib.h>
#include <string.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
#include "snowball/header.h"
#include "snowball/english_stem.h"
#include "snowball/russian_stem.h"
typedef struct {
struct SN_env *z;
StopList stoplist;
int (*stem)(struct SN_env * z);
} DictSnowball;
PG_FUNCTION_INFO_V1(snb_en_init);
Datum snb_en_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_ru_init);
Datum snb_ru_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(snb_lexize);
Datum snb_lexize(PG_FUNCTION_ARGS);
Datum
snb_en_init(PG_FUNCTION_ARGS) {
DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
if ( !d )
elog(ERROR, "No memory");
memset(d,0,sizeof(DictSnowball));
d->stoplist.wordop=lowerstr;
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = english_create_env();
if (!d->z) {
freestoplist(&(d->stoplist));
elog(ERROR,"No memory");
}
d->stem=english_stem;
PG_RETURN_POINTER(d);
}
Datum
snb_ru_init(PG_FUNCTION_ARGS) {
DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
if ( !d )
elog(ERROR, "No memory");
memset(d,0,sizeof(DictSnowball));
d->stoplist.wordop=lowerstr;
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = russian_create_env();
if (!d->z) {
freestoplist(&(d->stoplist));
elog(ERROR,"No memory");
}
d->stem=russian_stem;
PG_RETURN_POINTER(d);
}
Datum
snb_lexize(PG_FUNCTION_ARGS) {
DictSnowball *d = (DictSnowball*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
char **res=palloc(sizeof(char*)*2);
if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
pfree(txt);
res[0]=NULL;
} else {
SN_set_current(d->z, strlen(txt), txt);
(d->stem)(d->z);
if ( d->z->p && d->z->l ) {
txt=repalloc(txt, d->z->l+1);
memcpy( txt, d->z->p, d->z->l);
txt[d->z->l]='\0';
}
res[0]=txt;
}
res[1]=NULL;
PG_RETURN_POINTER(res);
}
/*
* ISpell interface
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
#define SYNBUFLEN 4096
typedef struct {
char *in;
char *out;
} Syn;
typedef struct {
int len;
Syn *syn;
} DictSyn;
PG_FUNCTION_INFO_V1(syn_init);
Datum syn_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(syn_lexize);
Datum syn_lexize(PG_FUNCTION_ARGS);
static char *
findwrd(char *in, char **end) {
char *start;
*end=NULL;
while(*in && isspace(*in))
in++;
if ( !in )
return NULL;
start=in;
while(*in && !isspace(*in))
in++;
*end=in;
return start;
}
static int
compareSyn(const void *a, const void *b) {
return strcmp( ((Syn*)a)->in, ((Syn*)b)->in );
}
Datum
syn_init(PG_FUNCTION_ARGS) {
text *in;
DictSyn *d;
int cur=0;
FILE *fin;
char *filename;
char buf[SYNBUFLEN];
char *starti,*starto,*end=NULL;
int slen;
if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL )
elog(ERROR,"NULL config");
in = PG_GETARG_TEXT_P(0);
if ( VARSIZE(in) - VARHDRSZ == 0 )
elog(ERROR,"VOID config");
filename=text2char(in);
PG_FREE_IF_COPY(in, 0);
if ( (fin=fopen(filename,"r")) == NULL )
elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
d = (DictSyn*)malloc( sizeof(DictSyn) );
if ( !d ) {
fclose(fin);
elog(ERROR, "No memory");
}
memset(d,0,sizeof(DictSyn));
while( fgets(buf,SYNBUFLEN,fin) ) {
slen = strlen(buf)-1;
buf[slen] = '\0';
if ( *buf=='\0' ) continue;
if (cur==d->len) {
d->len = (d->len) ? 2*d->len : 16;
d->syn=(Syn*)realloc( d->syn, sizeof(Syn)*d->len );
if ( !d->syn ) {
fclose(fin);
elog(ERROR, "No memory");
}
}
starti=findwrd(buf,&end);
if ( !starti )
continue;
*end='\0';
if ( end >= buf+slen )
continue;
starto= findwrd(end+1, &end);
if ( !starto )
continue;
*end='\0';
d->syn[cur].in=strdup(lowerstr(starti));
d->syn[cur].out=strdup(lowerstr(starto));
if ( !(d->syn[cur].in && d->syn[cur].out) ) {
fclose(fin);
elog(ERROR, "No memory");
}
cur++;
}
fclose(fin);
d->len=cur;
if ( cur>1 )
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
pfree(filename);
PG_RETURN_POINTER(d);
}
Datum
syn_lexize(PG_FUNCTION_ARGS) {
DictSyn *d = (DictSyn*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
Syn key,*found;
char **res=NULL;
if ( !PG_GETARG_INT32(2) )
PG_RETURN_POINTER(NULL);
key.out=NULL;
key.in=lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
found=(Syn*)bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
pfree(key.in);
if ( !found )
PG_RETURN_POINTER(NULL);
res=palloc(sizeof(char*)*2);
res[0]=pstrdup(found->out);
res[1]=NULL;
PG_RETURN_POINTER(res);
}
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>tsearch-v2-intro</title>
<link type="text/css" rel="stylesheet" href="/~megera/postgres/gist/tsearch/tsearch.css">
</head>
<body>
<div class="content">
<h2>Tsearch2 - Introduction</h2>
<p><a href=
"http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html">
[Online version]</a> of this document is available.</p>
<p>The tsearch2 module is available to add as an extension to
the PostgreSQL database to allow for Full Text Indexing. This
document is an introduction to installing, configuring, using
and maintaining the database with the tsearch2 module
activated.</p>
<p>Please, note, tsearch2 module is fully incompatible with old
tsearch, which is deprecated in 7.4 and will be obsoleted in
7.5.</p>
<h3>USING TSEARCH2 AND POSTGRESQL FOR A WEB BASED SEARCH
ENGINE</h3>
<p>This documentation is provided as a short guide on how to
quickly get up and running with tsearch2 and PostgreSQL, for
those who want to implement a full text indexed based search
engine. It is not meant to be a complete in-depth guide into
the full ins and outs of the contrib/tsearch2 module, and is
primarily aimed at beginners who want to speed up searching of
large text fields, or those migrating from other database
systems such as MS-SQL.</p>
<p>The README.tsearch2 file included in the contrib/tsearch2
directory contains a brief overview and history behind tsearch.
This can also be found online <a href=
"http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[right
here]</a>.</p>
<p>Further in depth documentation such as a full function
reference, and user guide can be found online at the <a href=
"http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/">[tsearch
documentation home]</a>.</p>
<h3>ACKNOWLEDGEMENTS</h3>
<p>Robert John Shepherd originally wrote this documentation for
the previous version of tsearch module (v1) included with the
postgres release. I took his documentation and updated it to
comply with the tsearch2 modifications.</p>
<p>Robert's original acknowledgements:</p>
<p>"Thanks to Oleg Bartunov for taking the time to answer many
of my questions regarding this module, and also to Teodor
Sigaev for clearing up the process of making your own
dictionaries. Plus of course a big thanks to the pair of them
for writing this module in the first place!"</p>
<p>I would also like to extend my thanks to the developers, and
Oleg Bartunov for all of his direction and help with the new
features of tsearch2.</p>
<h3>OVERVIEW</h3>
<p>MS-SQL provides a full text indexing (FTI) system which
enables the fast searching of text based fields, very useful
for websites (and other applications) that require a results
set based on key words. PostgreSQL ships with a contributed
module called tsearch2, which implements a special type of
index that can also be used for full text indexing. Further
more, unlike MS' offering which requires regular incremental
rebuilds of the text indexes themselves, tsearch2 indexes are
always up-to-date and keeping them so induces very little
overhead.</p>
<p>Before we get into the details, it is recommended that you
have installed and tested PostgreSQL, are reasonably familiar
with databases, the SQL query language and also understand the
basics of connecting to PostgreSQL from the local shell. This
document isn't intended for the complete PostgreSQL newbie, but
anyone with a reasonable grasp of the basics should be able to
follow it.</p>
<h3>INSTALLATION</h3>
<p>Starting with PostgreSQL version 7.4 tsearch2 is now
included in the contrib directory with the PostgreSQL sources.
contrib/tsearch2 is where you will find everything needed to
install and use tsearch2. Please note that tsearch2 will also
work with PostgreSQL version 7.3.x, but it is not the module
included with the source distribution. You will have to
download the module separately and install it in the same
fashion.</p>
<p>I installed the tsearch2 module to a PostgreSQL 7.3 database
from the contrib directory without squashing the original (old)
tsearch module. What I did was move the modules tsearch src
driectory into the contrib tree under the name tsearchV2.</p>
<p>Step one is to download the tsearch V2 module :</p>
<p><a href=
"http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/">[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/]</a>
(check Development History for latest stable version !)</p>
<pre>
tar -zxvf tsearch-v2.tar.gz
mv tsearch2 PGSQL_SRC/contrib/
cd PGSQL_SRC/contrib/tsearch2
</pre>
<p>If you are installing from PostgreSQL version 7.4 or higher,
you can skip those steps and just change to the
contrib/tsearch2 directory in the source tree and continue from
there.</p>
<p>Then continue with the regular building and installation
process</p>
<pre>
gmake
gmake install
gmake installcheck
</pre>
<p>That is pretty much all you have to do, unless of course you
get errors. However if you get those, you better go check with
the mailing lists over at <a href=
"http://www.postgresql.org">http://www.postgresql.org</a> or
<a href=
"http://openfts.sourceforge.net/">http://openfts.sourceforge.net/</a>
since its never failed for me.</p>
<p>The directory in the contib/ and the directory from the
archive is called tsearch2. Tsearch2 is completely incompatible
with the previous version of tsearch. This means that both
versions can be installed into a single database, and migration
the new version may be much easier.</p>
<p>NOTE: the previous version of tsearch found in the
contrib/tsearch directory is depricated. ALthough it is still
available and included within PostgreSQL version 7.4. It will
be removed in version 7.5.</p>
<h3>ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE</h3>
<p>We should create a database to use as an example for the
remainder of this file. We can call the database "ftstest". You
can create it from the command line like this:</p>
<pre>
#createdb ftstest
</pre>
<p>If you thought installation was easy, this next bit is even
easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and
type:</p>
<pre>
psql ftstest &lt; tsearch2.sql
</pre>
<p>The file "tsearch2.sql" holds all the wonderful little
goodies you need to do full text indexing. It defines numerous
functions and operators, and creates the needed tables in the
database. There will be 4 new tables created after running the
tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg,
pg_ts_cfgmap are added.</p>
<p>You can check out the tables if you like:</p>
<pre>
#psql ftstest
ftstest=# \d
List of relations
Schema | Name | Type | Owner
--------+--------------+-------+----------
public | pg_ts_cfg | table | kopciuch
public | pg_ts_cfgmap | table | kopciuch
public | pg_ts_dict | table | kopciuch
public | pg_ts_parser | table | kopciuch
(4 rows)
</pre>
<h3>TYPES AND FUNCTIONS PROVIDED BY TSEARCH2</h3>
<p>The first thing we can do is try out some of the types that
are provided for us. Lets look at the tsvector type provided
for us:</p>
<pre>
SELECT 'Our first string used today'::tsvector;
tsvector
---------------------------------------
'Our' 'used' 'first' 'today' 'string'
(1 row)
</pre>
<p>The results are the words used within our string. Notice
they are not in any particular order. The tsvector type returns
a string of space separated words.</p>
<pre>
SELECT 'Our first string used today first string'::tsvector;
tsvector
-----------------------------------------------
'Our' 'used' 'again' 'first' 'today' 'string'
(1 row)
</pre>
<p>Notice the results string has each unique word ('first' and
'string' only appear once in the tsvector value). Which of
course makes sense if you are searching the full text ... you
only need to know each unique word in the text.</p>
<p>Those examples were just casting a text field to that of
type tsvector. Lets check out one of the new functions created
by the tsearch2 module.</p>
<p>The function to_tsvector has 3 possible signatures:</p>
<pre>
to_tsvector(oid, text);
to_tsvector(text, text);
to_tsvector(text);
</pre>
<p>We will use the second method using two text fields. The
overloaded methods provide us with a way to specifiy the way
the searchable text is broken up into words (Stemming process).
Right now we will specify the 'default' configuration. See the
section on TSEARCH2 CONFIGURATION to learn more about this.</p>
<pre>
SELECT to_tsvector('default',
'Our first string used today first string');
to_tsvector
--------------------------------------------
'use':4 'first':2,6 'today':5 'string':3,7
(1 row)
</pre>
<p>The result returned from this function is of type tsvector.
The results came about by this reasoning: All of the words in
the text passed in are stemmed, or not used because they are
stop words defined in our configuration. Each lower case
morphed word is returned with all of the positons in the
text.</p>
<p>In this case the word "Our" is a stop word in the default
configuration. That means it will not be included in the
result. The word "first" is found at positions 2 and 6
(although "Our" is a stop word, it's position is maintained).
The word(s) positioning is maintained exactly as in the
original string. The word "used" is morphed to the word "use"
based on the default configuration for word stemming, and is
found at position 4. The rest of the results follow the same
logic. Just a reminder again ... the order of the 'word'
position in the output is not in any kind of order. (ie 'use':4
appears first)</p>
<p>If you want to view the output of the tsvector fields
without their positions, you can do so with the function
"strip(tsvector)".</p>
<pre>
SELECT strip(to_tsvector('default',
'Our first string used today first string'));
strip
--------------------------------
'use' 'first' 'today' 'string'
</pre>
<p>If you wish to know the number of unique words returned in
the tsvector you can do so by using the function
"length(tsvector)"</p>
<pre>
SELECT length(to_tsvector('default',
'Our first string used today first string'));
length
--------
4
(1 row)
</pre>
<p>Lets take a look at the function to_tsquery. It also has 3
signatures which follow the same rational as the to_tsvector
function:</p>
<pre>
to_tsquery(oid, text);
to_tsquery(text, text);
to_tsquery(text);
</pre>
<p>Lets try using the function with a single word :</p>
<pre>
SELECT to_tsquery('default', 'word');
to_tsquery
-----------
'word'
(1 row)
</pre>
<p>I call the function the same way I would a to_tsvector
function, specifying the 'default' configuration for morphing,
and the result is the stemmed output 'word'.</p>
<p>Lets attempt to use the function with a string of multiple
words:</p>
<pre>
SELECT to_tsquery('default', 'this is many words');
ERROR: Syntax error
</pre>
<p>The function can not accept a space separated string. The
intention of the to_tsquery function is to return a type of
"tsquery" used for searching a tsvector field. What we need to
do is search for one to many words with some kind of logic (for
now simple boolean).</p>
<pre>
SELECT to_tsquery('default', 'searching|sentence');
to_tsquery
----------------------
'search' | 'sentenc'
(1 row)
</pre>
<p>Notice that the words are separated by the boolean logic
"OR", the text could contain boolean operators &amp;,|,!,()
with their usual meaning.</p>
<p>You can not use words defined as being a stop word in your
configuration. The function will not fail ... you will just get
no result, and a NOTICE like this:</p>
<pre>
SELECT to_tsquery('default', 'a|is&amp;not|!the');
NOTICE: Query contains only stopword(s)
or doesn't contain lexem(s), ignored
to_tsquery
-----------
(1 row)
</pre>
<p>That is a beginning to using the types, and functions
defined in the tsearch2 module. There are numerous more
functions that I have not touched on. You can read through the
tsearch2.sql file built when compiling to get more familiar
with what is included.</p>
<h3>INDEXING FIELDS IN A TABLE</h3>
<p>The next stage is to add a full text index to an existing
table. In this example we already have a table defined as
follows:</p>
<pre>
CREATE TABLE tblMessages
(
intIndex int4,
strTopic varchar(100),
strMessage text
);
</pre>
<p>We are assuming there are several rows with some kind of
data in them. Any data will do, just do several inserts with
test strings for a topic, and a message. here is some test data
I inserted. (yes I know it's completely useless stuff ;-) but
it will serve our purpose right now).</p>
<pre>
INSERT INTO tblMessages
VALUES ('1', 'Testing Topic', 'Testing message data input');
INSERT INTO tblMessages
VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
INSERT INTO tblMessages
VALUES ('3', 'Famous Author', 'Stephen King');
INSERT INTO tblMessages
VALUES ('4', 'Political Topic',
'Nelson Mandella is released from prison');
INSERT INTO tblMessages
VALUES ('5', 'Nursery rhyme phrase',
'Little jack horner sat in a corner');
INSERT INTO tblMessages
VALUES ('6', 'Gettysburg address quotation',
'Four score and seven years ago'
' our fathers brought forth on this'
' continent a new nation, conceived in'
' liberty and dedicated to the proposition'
' that all men are created equal');
INSERT INTO tblMessages
VALUES ('7', 'Classic Rock Bands',
'Led Zeppelin Grateful Dead and The Sex Pistols');
INSERT INTO tblMessages
VALUES ('8', 'My birth address',
'18 Sommervile road, Regina, Saskatchewan');
INSERT INTO tblMessages
VALUES ('9', 'Joke', 'knock knock : who\'s there?'
' I will not finish this joke');
INSERT INTO tblMessages
VALUES ('10', 'Computer information',
'My computer is a pentium III 400 mHz'
' with 192 megabytes of RAM');
</pre>
<p>The next stage is to create a special text index which we
will use for FTI, so we can search our table of messages for
words or a phrase. We do this using the SQL command:</p>
<pre>
ALTER TABLE tblMessages ADD idxFTI tsvector;
</pre>
<p>Note that unlike traditional indexes, this is actually a new
field in the same table, which is then used (through the magic
of the tsearch2 operators and functions) by a special index we
will create in a moment.</p>
<p>The general rule for the initial insertion of data will
follow four steps:</p>
<pre>
1. update table
2. vacuum full analyze
3. create index
4. vacuum full analyze
</pre>
<p>The data can be updated into the table, the vacuum full
analyze will reclaim unused space. The index can be created on
the table after the data has been inserted. Having the index
created prior to the update will slow down the process. It can
be done in that manner, this way is just more efficient. After
the index has been created on the table, vacuum full analyze is
run again to update postgres's statistics (ie having the index
take effect).</p>
<pre>
UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
VACUUM FULL ANALYZE;
</pre>
<p>Note that this only inserts the field strMessage as a
tsvector, so if you want to also add strTopic to the
information stored, you should instead do the following, which
effectively concatenates the two fields into one before being
inserted into the table:</p>
<pre>
UPDATE tblMessages
SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
VACUUM FULL ANALYZE;
</pre>
<p><strong>Using the coalesce function makes sure this
concatenation also works with NULL fields.</strong></p>
<p>We need to create the index on the column idxFTI. Keep in
mind that the database will update the index when some action
is taken. In this case we _need_ the index (The whole point of
Full Text INDEXINGi ;-)), so don't worry about any indexing
overhead. We will create an index based on the gist function.
GiST is an index structure for Generalized Search Tree.</p>
<pre>
CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
VACUUM FULL ANALYZE;
</pre>
<p>After you have converted all of your data and indexed the
column, you can select some rows to see what actually happened.
I will not display output here but you can play around
yourselves and see what happened.</p>
<p>The last thing to do is set up a trigger so every time a row
in this table is changed, the text index is automatically
updated. This is easily done using:</p>
<pre>
CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
</pre>
<p>Or if you are indexing both strMessage and strTopic you
should instead do:</p>
<pre>
CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
FOR EACH ROW EXECUTE PROCEDURE
tsearch2(idxFTI, strTopic, strMessage);
</pre>
<p>Before you ask, the tsearch2 function accepts multiple
fields as arguments so there is no need to concatenate the two
into one like we did before.</p>
<p>If you want to do something specific with columns, you may
write your very own trigger function using plpgsql or other
procedural languages (but not SQL, unfortunately) and use it
instead of <em>tsearch2</em> trigger.</p>
<p>You could however call other stored procedures from within
the tsearch2 function. Lets say we want to create a function to
remove certain characters (like the @ symbol from all
text).</p>
<pre>
CREATE FUNCTION dropatsymbol(text)
RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
</pre>
<p>Now we can use this function within the tsearch2 function on
the trigger.</p>
<pre>
DROP TRIGGER tsvectorupdate ON tblmessages;
CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', 'Test@test.com');
</pre>
<p>If at this point you receive an error stating: ERROR: Can't
find tsearch config by locale</p>
<p>Do not worry. You have done nothing wrong. And tsearch2 is
not broken. All that has happened here is that the
configuration is setup to use a configuration based on the
locale of the server. All you have to do is change your default
configuration, or add a new one for your specific locale. See
the section on TSEARCH2 CONFIGURATION.</p>
<pre class="real">
SELECT * FROM tblmessages WHERE intindex = 69;
intindex | strtopic | strmessage | idxfti
----------+--------------------------+---------------+-----------------------
69 | Attempt for dropatsymbol | Test@test.com | 'test':1 'test.com':2
(1 row)
</pre>Notice that the string content was passed throught the stored
procedure dropatsymbol. The '@' character was replaced with a
single space ... and the output from the procedure was then stored
in the tsvector column.
<p>This could be useful for removing other characters from
indexed text, or any kind of preprocessing needed to be done on
the text prior to insertion into the index.</p>
<h3>QUERYING A TABLE</h3>
<p>There are some examples in the README.tsearch2 file for
querying a table. One major difference between tsearch and
tsearch2 is the operator ## is no longer available. Only the
operator @@ is defined, using the types tsvector on one side
and tsquery on the other side.</p>
<p>Lets search the indexed data for the word "Test". I indexed
based on the the concatenation of the strTopic, and the
strMessage:</p>
<pre>
SELECT intindex, strtopic FROM tblmessages
WHERE idxfti @@ 'test'::tsquery;
intindex | strtopic
----------+---------------
1 | Testing Topic
(1 row)
</pre>
<p>The only result that matched was the row with a topic
"Testing Topic". Notice that the word I search for was all
lowercase. Let's see what happens when I query for uppercase
"Test".</p>
<pre>
SELECT intindex, strtopic FROM tblmessages
WHERE idxfti @@ 'Test'::tsquery;
intindex | strtopic
----------+----------
(0 rows)
</pre>
<p>We get zero rows returned. The reason is because when the
text was inserted, it was morphed to my default configuration
(because of the call to to_tsvector in the UPDATE statement).
If there was no morphing done, and the tsvector field(s)
contained the word 'Text', a match would have been found.</p>
<p>Most likely the best way to query the field is to use the
to_tsquery function on the right hand side of the @@ operator
like this:</p>
<pre>
SELECT intindex, strtopic FROM tblmessages
WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
intindex | strtopic
----------+--------------------
1 | Testing Topic
7 | Classic Rock Bands
(2 rows)
</pre>
<p>That query searched for all instances of "Test" OR
"Zeppelin". It returned two rows: the "Testing Topic" row, and
the "Classic Rock Bands" row. The to_tsquery function performed
the correct morphology upon the parameters, and searched the
tsvector field appropriately.</p>
<p>The last example here relates to searching for a phrase, for
example "minority report". This poses a problem with regard to
tsearch2, as it doesn't index phrases, only words. But there is
a way around which doesn't appear to have a significant impact
on query time, and that is to use a query such as the
following:</p>
<pre>
SELECT intindex, strTopic FROM tblmessages
WHERE idxfti @@ to_tsquery('default', 'gettysburg &amp; address')
AND strMessage ~* '.*men are created equal.*';
intindex | strtopic
----------+------------------------------
6 | Gettysburg address quotation
(1 row)
SELECT intindex, strTopic FROM tblmessages
WHERE idxfti @@ to_tsquery('default', 'gettysburg &amp; address')
AND strMessage ~* '.*something that does not exist.*';
intindex | strtopic
----------+----------
(0 rows)
</pre>
<p>Of course if your indexing both strTopic and strMessage, and
want to search for this phrase on both, then you will have to
get out the brackets and extend this query a little more.</p>
<h3>TSEARCH2 CONFIGURATION</h3>
<p>Some words such as "and", "the", and "who" are automatically
not indexed, since they belong to a pre-existing dictionary of
"Stop Words" which tsearch2 does not perform indexing on. If
someone needs to search for "The Who" in your database, they
are going to have a tough time coming up with any results,
since both are ignored in the indexes. But there is a
solution.</p>
<p>Lets say we want to add a word into the stop word list for
english stemming. We could edit the file
:'/usr/local/pgsql/share/english.stop' and add a word to the
list. I edited mine to exclude my name from indexing:</p>
<pre>
- Edit /usr/local/pgsql/share/english.stop
- Add 'andy' to the list
- Save the file.
</pre>
<p>When you connect to the database, the dict_init procedure is
run during initialization. And in my configuration it will read
the stop words from the file I just edited. If you were
connected to the DB while editing the stop words, you will need
to end the current session and re-connect. When you re-connect
to the database, 'andy' is no longer indexed:</p>
<pre>
SELECT to_tsvector('default', 'Andy');
to_tsvector
------------
(1 row)
</pre>
<p>Originally I would get the result :</p>
<pre>
SELECT to_tsvector('default', 'Andy');
to_tsvector
------------
'andi':1
(1 row)
</pre>
<p>But since I added it as a stop word, it would be ingnored on
the indexing. The stop word added was used in the dictionary
"en_stem". If I were to use a different configuration such as
'simple', the results would be different. There are no stop
words for the simple dictionary. It will just convert to lower
case, and index every unique word.</p>
<pre>
SELECT to_tsvector('simple', 'Andy andy The the in out');
to_tsvector
-------------------------------------
'in':5 'out':6 'the':3,4 'andy':1,2
(1 row)
</pre>
<p>All this talk about which configuration to use is leading us
into the actual configuration of tsearch2. In the examples in
this document the configuration has always been specified when
using the tsearch2 functions:</p>
<pre>
SELECT to_tsvector('default', 'Testing the default config');
SELECT to_tsvector('simple', 'Example of simple Config');
</pre>
<p>The pg_ts_cfg table holds each configuration you can use
with the tsearch2 functions. As you can see the ts_name column
contains both the 'default' configurations based on the 'C'
locale. And the 'simple' configuration which is not based on
any locale.</p>
<pre>
SELECT * from pg_ts_cfg;
ts_name | prs_name | locale
-----------------+----------+--------------
default | default | C
default_russian | default | ru_RU.KOI8-R
simple | default |
(3 rows)
</pre>
<p>Each row in the pg_ts_cfg table contains the name of the
tsearch2 configuration, the name of the parser to use, and the
locale mapped to the configuration. There is only one parser to
choose from the table pg_ts_parser called 'default'. More
parsers could be written, but for our needs we will use the
default.</p>
<p>There are 3 configurations installed by tsearch2 initially.
If your locale is set to 'en_US' for example (like my laptop),
then as you can see there is currently no dictionary configured
to use with that locale. You can either set up a new
configuration or just use one that already exists. If I do not
specify which configuration to use in the to_tsvector function,
I receive the following error.</p>
<pre>
SELECT to_tsvector('learning tsearch is like going to school');
ERROR: Can't find tsearch config by locale
</pre>
<p>We will create a new configuration for use with the server
encoding 'en_US'. The first step is to add a new configuration
into the pg_ts_cfg table. We will call the configuration
'default_english', with the default parser and use the locale
'en_US'.</p>
<pre>
INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
VALUES ('default_english', 'default', 'en_US');
</pre>
<p>We have only declared that there is a configuration called
'default_english'. We need to set the configuration of how
'default_english' will work. The next step is creating a new
dictionary to use. The configuration of the dictionary is
completlely different in tsearch2. In the prior versions to
make changes, you would have to re-compile your changes into
the tsearch.so. All of the configuration has now been moved
into the system tables created by executing the SQL code from
tsearch2.sql</p>
<p>Lets take a first look at the pg_ts_dict table</p>
<pre>
ftstest=# \d pg_ts_dict
Table "public.pg_ts_dict"
Column | Type | Modifiers
-----------------+---------+-----------
dict_name | text | not null
dict_init | oid |
dict_initoption | text |
dict_lemmatize | oid | not null
dict_comment | text |
Indexes: pg_ts_dict_idx unique btree (dict_name)
</pre>
<p>The dict_name column is the name of the dictionary, for
example 'simple', 'en_stem' or 'ru_stem'. The dict_init column
is an OID of a stored procedure to run for initialization of
that dictionary, for example 'snb_en_init' or 'snb_ru_init'.
The dict_init option is used for options passed to the init
function for the stored procedure. In the cases of 'en_stem' or
'ru_stem' it is a path to a stopword file for that dictionary,
for example '/usr/local/pgsql/share/english.stop'. This is
however dictated by the dictionary. ISpell dictionaries may
require different options. The dict_lemmatize column is another
OID of a stored procedure to the function used to lemmitize,
for example 'snb_lemmatize'. The dict_comment column is just a
comment.</p>
<p>Next we will configure the use of a new dictionary based on
ISpell. We will assume you have ISpell installed on you
machine. (in /usr/local/lib)</p>
<p>First lets register the dictionary(ies) to use from ISpell.
We will use the english dictionary from ISpell. We insert the
paths to the relevant ISpell dictionary (*.hash) and affixes
(*.aff) files. There seems to be some question as to which
ISpell files are to be used. I installed ISpell from the latest
sources on my computer. The installation installed the
dictionary files with an extension of *.hash. Some
installations install with an extension of *.dict As far as I
know the two extensions are equivilant. So *.hash ==
*.dict.</p>
<p>We will also continue to use the english word stop file that
was installed for the en_stem dictionary. You could use a
different one if you like. The ISpell configuration is based on
the "ispell_template" dictionary installed by default with
tsearch2. We will use the OIDs to the stored procedures from
the row where the dict_name = 'ispell_template'.</p>
<pre>
INSERT INTO pg_ts_dict
(SELECT 'en_ispell',
dict_init,
'DictFile="/usr/local/lib/english.hash",'
'AffFile="/usr/local/lib/english.aff",'
'StopFile="/usr/local/pgsql/share/english.stop"',
dict_lexize
FROM pg_ts_dict
WHERE dict_name = 'ispell_template');
</pre>
<p>Next we need to set up the configuration for mapping the
dictionay use to the lexxem parsings. This will be done by
altering the pg_ts_cfgmap table. We will insert several rows,
specifying to using the new dictionary we installed and
configured for use within tsearch2. There are several type of
lexims we would be concerned with forcing the use of the ISpell
dictionary.</p>
<pre>
INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
</pre>
<p>We have just inserted 3 records to the configuration
mapping, specifying that the lexem types for "lhword,
lpart_hword and lword" are to be stemmed using the 'en_ispell'
dictionary we added into pg_ts_dict, when using the
configuration ' default_english' which we added to
pg_ts_cfg.</p>
<p>There are several other lexem types used that we do not need
to specify as using the ISpell dictionary. We can simply insert
values using the 'simple' stemming process dictionary.</p>
<pre>
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'url', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'host', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'sfloat', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'uri', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'int', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'float', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'email', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'word', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'hword', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'nlword', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'nlpart_hword', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'part_hword', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'nlhword', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'file', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'uint', '{simple}');
INSERT INTO pg_ts_cfgmap
VALUES ('default_english', 'version', '{simple}');
</pre>
<p>Our addition of a configuration for 'default_english' is now
complete. We have successfully created a new tsearch2
configuration. At the same time we have also set the new
configuration to be our default for en_US locale.</p>
<pre>
SELECT to_tsvector('default_english',
'learning tsearch is like going to school');
to_tsvector
--------------------------------------------------
'go':5 'like':4 'learn':1 'school':7 'tsearch':2
SELECT to_tsvector('learning tsearch is like going to school');
to_tsvector
--------------------------------------------------
'go':5 'like':4 'learn':1 'school':7 'tsearch':2
(1 row)
</pre>
<p>In the case that you already have a configuration set for
the locale, and you are changing it to your new dictionary
configuration. You will have to set the old locale to NULL. If
we are using the 'C' locale then we would do this:</p>
<pre>
UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
</pre>
<p>That about wraps up the configuration of tsearch2. There is
much more you can do with the tables provided. This was just an
introduction to get things working rather quickly.</p>
<h3>ADDING NEW DICTIONARIES TO TSEARCH2</h3>
<p>To aid in the addition of new dictionaries to the tsearch2
module you can use another additional module in combination
with tsearch2. The gendict module is included into tsearch2
distribution and is available from gendict/ subdirectory.</p>
<p>I will not go into detail about installation and
instructions on how to use gendict to it's fullest extent right
now. You can read the README.gendict ... it has all of the
instructions and information you will need.</p>
<h3>BACKING UP AND RESTORING DATABASES THAT FEATURE
TSEARCH2</h3>
<p>Believe it or not, this isn't as straight forward as it
should be, and you will have problems trying to backup and
restore any database which uses tsearch2 unless you take the
steps shown below. And before you ask using pg_dumpall will
result in failure every time. These took a lot of trial and
error to get working, but the process as laid down below has
been used a dozen times now in live production environments so
it should work fine.</p>
<p>HOWEVER never rely on anyone elses instructions to backup
and restore a database system, always develop and understand
your own methodology, and test it numerous times before you
need to do it for real.</p>
<p>To Backup a PostgreSQL database that uses the tsearch2
module:</p>
<p>1) Backup any global database objects such as users and
groups (this step is usually only necessary when you will be
restoring to a virgin system)</p>
<pre>
pg_dumpall -g &gt; GLOBALobjects.sql
</pre>
<p>2) Backup the full database schema using pg_dump</p>
<pre>
pg_dump -s DATABASE &gt; DATABASEschema.sql
</pre>
<p>3) Backup the full database using pg_dump</p>
<pre>
pg_dump -Fc DATABASE &gt; DATABASEdata.tar
</pre>
<p>To Restore a PostgreSQL database that uses the tsearch2
module:</p>
<p>1) Create the blank database</p>
<pre>
createdb DATABASE
</pre>
<p>2) Restore any global database objects such as users and
groups (this step is usually only necessary when you will be
restoring to a virgin system)</p>
<pre>
psql DATABASE &lt; GLOBALobjects.sql
</pre>
<p>3) Create the tsearch2 objects, functions and operators</p>
<pre>
psql DATABASE &lt; tsearch2.sql
</pre>
<p>4) Edit the backed up database schema and delete all SQL
commands which create tsearch2 related functions, operators and
data types, BUT NOT fields in table definitions that specify
tsvector types. If your not sure what these are, they are the
ones listed in tsearch2.sql. Then restore the edited schema to
the database</p>
<pre>
psql DATABASE &lt; DATABASEschema.sql
</pre>
<p>5) Restore the data for the database</p>
<pre>
pg_restore -N -a -d DATABASE DATABASEdata.tar
</pre>
<p>If you get any errors in step 4, it will most likely be
because you forgot to remove an object that was created in
tsearch2.sql. Any errors in step 5 will mean the database
schema was probably restored wrongly.</p>
</div>
</body>
</html>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<link type="text/css" rel="stylesheet" href="/~megera/postgres/gist/tsearch/tsearch.css">
<title>tsearch2 guide</title>
</head>
<body>
<h1 align=center>The tsearch2 Guide</h1>
<p align=center>
Brandon Craig Rhodes<br>30 June 2003
<p>
This Guide introduces the reader to the PostgreSQL tsearch2 module,
version&nbsp;2.
More formal descriptions of the module's types and functions
are provided in the <a href="tsearch2-ref.html">tsearch2 Reference</a>,
which is a companion to this document.
You can retrieve a beta copy of the tsearch2 module from the
<a href="http://www.sai.msu.su/~megera/postgres/gist/">GiST for PostgreSQL</a>
page &mdash; look under the section entitled <i>Development History</i>
for the current version.
<p>
First we will examine the <tt>tsvector</tt> and <tt>tsquery</tt> types
and how they are used to search documents;
next, we will use them to build a simple search engine in&nbsp;SQL;
and finally, we will study the internals of document conversion
and how you might tune the internals to accommodate various searching needs.
<p>
Once you have tsearch2 working with PostgreSQL,
you should be able to run the examples here exactly as they are typed.
<p>
<hr>
<h2>Table of Contents</h2>
<blockquote>
<a href="#vectors_queries">Vectors and Queries</a><br>
<a href="#simple_search">A Simple Search Engine</a><br>
<a href="#weights">Ranking and Position Weights</a><br>
<a href="#casting">Casting Vectors and Queries</a><br>
<a href="#parsing_lexing">Parsing and Lexing</a><br>
</blockquote>
<hr>
<h2><a name=vectors_queries>Vectors and Queries</a></h2>
<blockquote>
<i>This section introduces
the two data types upon which tsearch2 search engines are based,
and illustrates their interaction using the simplest possible case.
The complex examples we present later on
are merely variations and elaborations of this basic mechanism.</i>
</blockquote>
<p>
The tsearch2 module allows you to index documents by the words they contain,
and then perform very efficient searches
for documents that contain a given combination of words.
Preparing your document index involves two steps:
<ul>
<li><b>Making a list of the words each document contains.</b>
You must reduce each document to a <tt>tsvector</tt>
which lists each word that appears in the document.
This process offers many options,
because there is no requirement
that you must copy words into the vector
exactly as they appear in the document.
For example,
many developers omit frequent and content-free <b>stop words</b>
like <i>the</i> to reduce the size of their index;
others reduce different forms of the same word
(<i>forked</i>, <i>forking</i>, <i>forks</i>)
to a common form (<i>fork</i>)
to make search results independent of tense and case.
Because words are very often stored in a modified form,
we use the special term <b>lexemes</b>
for the word forms we actually store in the vector.
<li><b>Creating an index of the documents by lexeme.</b>
This is managed automatically by tsearch2
when you creat a <tt>gist()</tt> index
on the <tt>tsvector</tt> column of a table,
which implements a form of the Berkeley
<a href="http://gist.cs.berkeley.edu/"><i>Generalized Search Tree</i></a>.
</ul>
Once your documents are indexed,
performing a search involves:
<ul>
<li><b>Reducing the search terms to lexemes.</b>
You must express each search you want to perform
as a <tt>tsquery</tt> specifying a boolean combination of lexemes.
Note that tsearch2 only finds <i>exact</i> matches
between the lexemes in your query and the ones in each vector &mdash;
even capitalization counts as a difference
(which is why all lexemes are usually kept lowercase).
So you must process search words the same way you processed document words;
if <i>forking</i> became <i>fork</i> in the document's <tt>tsvector</tt>,
then the search term <i>forking</i> must also become <i>fork</i>
or the search will not find the document.
<li><b>Retrieving the documents that match the query.</b>
Running a <tt>SELECT</tt> ... <tt>WHERE</tt>
<tt><i>query</i></tt> <tt>@@</tt> <tt><i>vector</i></tt>
on the table with the <tt><i>vector</i></tt> column
will return the documents that match your query.
<li><b>Presenting your results.</b>
This final stage offers as many options
as turning documents into vectors.
You can order documents by how well they matched the search terms;
create a headline for each document
showing some of the phrases in which it uses the search terms;
and restrict the number of results retrieved.
You will of course want some way to identify each document,
so the user can ask for the full text of the ones he wants to read.
</ul>
And beyond deciding upon rules for turning documents into vectors
and for presenting search results to users,
you have to decide <i>where</i> to perform these operations &mdash;
whether one database server
will parse documents, perform searches, and prepare search results,
or whether to spread the load of these operations across several machines.
These are complicated design issues
which we will explore later;
in this section and the next,
we will illustrate what can be accomplished
using a single database server.
<p>
The <tt>default</tt> tsearch2 configuration,
which we will learn more about later,
provides a good example of a process for reducing documents to vectors:
<pre>
=# <b>SELECT set_curcfg('default')</b>
=# <b>SELECT to_tsvector('The air smells of sea water.')</b>
to_tsvector
-------------------------------------
'air':2 'sea':5 'smell':3 'water':6
(1 row)
</pre>
Note the complex relationship between this document and its vector.
The vector lists only words from the document &mdash;
spaces and punctuation have disappeared.
Common words like <i>the</i> and <i>of</i> have been eliminated.
The&nbsp;<i>-s</i> that makes <i>smells</i> a plural has been removed,
leaving a lexeme that represents the word in its simplest form.
And finally,
though the vector remembers the positions in which each word appeared,
it does not store the lexemes in that order.
<p>
Keeping word positions in your vectors is optional, by the way.
The positions are necessary for the tsearch2 ranking functions,
which you can use to prioritize documents
based on how often each document uses the search terms
and whether they appear in close proximity.
But if you do not perform ranking,
or use your own process that ignores the word positions stored in the vector,
then you can save space by stripping them from your vectors:
<pre>
=# <b>SELECT strip(to_tsvector('The air smells of sea water.'))</b>
strip
-----------------------------
'air' 'sea' 'smell' 'water'
(1 row)
</pre>
Now that we have a procedure for creating vectors,
we can build an indexed table of vectors very simply:
<pre>
=# <b>CREATE TABLE vectors ( vector tsvector )</b>
=# <b>CREATE INDEX vector_index ON vectors USING gist(vector)</b>
=# <b>INSERT INTO vectors VALUES (to_tsvector('The path forks here'))</b>
=# <b>INSERT INTO vectors VALUES (to_tsvector('A crawl leads west'))</b>
=# <b>INSERT INTO vectors VALUES (to_tsvector('The left fork leads northeast'))</b>
=# <b>SELECT * FROM vectors</b>
vector
------------------------------------------
'fork':3 'path':2
'lead':3 'west':4 'crawl':2
'fork':3 'lead':4 'left':2 'northeast':5
(3 rows)
</pre>
Now we can search this collection of document vectors
using the <tt>@@</tt> operator and a <tt>tsquery</tt>
that specifies the combination of lexemes we are looking for.
Note that while vectors simply list lexemes,
queries always combine them with the operators
&lsquo;<tt>&amp;</tt>&rsquo;&nbsp;and,
&lsquo;<tt>|</tt>&rsquo;&nbsp;or,
and &nbsp;&lsquo;<tt>!</tt>&rsquo;&nbsp;not,
plus parentheses for grouping.
Some examples of the query syntax:
<table align=center>
<tr>
<td>&lsquo;find documents with the word <i>forks</i> in them&rsquo;<br>
<td><tt>'forks'</tt>
<tr>
<td>&lsquo;... with both <i>forks</i> and <i>leads</i>&rsquo;<br>
<td><tt>'forks & leads'</tt>
<tr>
<td>&lsquo;... with either <i>forks</i> or <i>leads</i>&rsquo;<br>
<td><tt>'forks | leads'</tt>
<tr>
<td>&lsquo;... with either <i>forks</i> or <i>leads</i>,
but without <i>crawl</i>&rsquo;<br>
<td><tt>'(forks|leads) & !crawl'</tt>
</table>
The tsearch2 module
provides a <tt>to_tsquery()</tt> function for creating queries
that uses the same process as <tt>to_tsvector()</tt> uses
to reduce words to lexemes.
For instance,
it will remove the&nbsp;<i>-s</i> from the plurals in the last example above:
<pre>
=# <b>SELECT to_tsquery('(leads|forks) & !crawl')</b>
to_tsquery
--------------------------------
( 'lead' | 'fork' ) & !'crawl'
(1 row)
</pre>
Again,
this is critically important because the search operator <tt>@@</tt>
only finds <i>exact</i> matches
between the words in a query and the words in a vector;
if the document vector lists the lexeme <i>fork</i>
but the query looks for the plural form <i>forks</i>,
the query would not match that document.
Thanks to the symmetry between our process
for producing vectors and queries, however,
the above searches return correct results:
<pre>
=# <b>SELECT * FROM vectors WHERE vector @@ to_tsquery('(leads|forks) & !crawl')</b>
vector
------------------------------------------
'fork':3 'path':2
'fork':3 'lead':4 'left':2 'northeast':5
(2 rows)
</pre>
You may want to try the other queries shown above,
and perhaps invent some of your own.
<p>
You should not include stop words in a query,
since you cannot search for words you have discarded.
If you throw out the word <i>the</i> when building vectors, for example,
your index will obviously not know which documents included it.
The <tt>to_tsquery()</tt> function will automatically detect this
and give you an error to prevent this mistake:
<pre>
=# <b>SELECT to_tsquery('the')</b>
NOTICE: Query contains only stopword(s) or doesn't contain lexem(s), ignored
to_tsquery
------------
(1 row)
</pre>
But if you every build vectors and queries using your own routines,
a possibility we will discuss later,
then you will need to enforce this rule yourself.
<blockquote><i>
Now that you understand how vectors and queries work together,
you are prepared to tackle many additional topics:
how to distribute searching across many servers;
how to customize the process
by which tsearch2 turns documents and queries into lexemes,
or use a process of your own;
and how to sort and display search results to your users.
But before discussing these detailed questions,
we will build a simple search engine
to see how easily its basic features work together.
</i></blockquote>
<h2><a name=simple_search>A Simple Search Engine</a></h2>
<blockquote><i>
In this section we build a simple search engine out of SQL functions
that use the vector and query types described in the previous section.
While this example is simpler
than a search engine that has to interface with the outside world,
it will illustrate the basic principles of building a search engine,
and better prepare you for developing your own.
</i></blockquote>
Building a search engine involves only a few improvements
upon the rudimentary vector searches described in the last section.
<ul>
<li>Because the user wants to read documents, not vectors,
you must provide some way
for the full text of each document to be accessed &mdash;
either by storing the entire text of each document in the database,
or storing an identifier
like a URL, file name, or document routing number
that lets you fetch the document from other storage.
<li>You can make it easier for user interface code to refer to each document
by providing a unique identifier for each document,
perhaps with a <tt>SERIAL</tt> column.
<li>Search results should be ordered by relevance.
If you leave word positions in your vectors,
you can either have PostgreSQL <tt>ORDER</tt> your results
<tt>BY</tt> a ranking function,
or you can fetch the vectors yourself and perform your own sort.
If you choose to ignore word positions or strip them from your vectors,
you will have to determine relevance yourself,
using either the full text of the document
or other information about each document you may possess.
<li>For each document returned by a search,
you will usually want to display a summary called a <i>headline</i>
that shows short excerpts
illustrating how the document uses the query words.
Headlines are usually generated from the full text of the document,
not from position information in the <tt>tsvector</tt>,
since excerpts lacking stop words, punctuation, and suffixes
would not be comprehensible.
If you store the full text of each document in the database,
headlines can be generated very simply by a tsearch2 function.
If you store your documents elsewhere,
then you will either have to transmit each document to the database
every time you want to run the headline function on it,
or use your own headline code outside of the database.
</ul>
<p>
We can easily construct a simple search engine
that accomplishes these goals.
First we build a table that, for each document,
stores a unique identifier, the full text of the document,
and its <tt>tsvector</tt>:
<pre>
=# <b>CREATE TABLE docs ( id SERIAL, doc TEXT, vector tsvector )</b>
=# <b>CREATE INDEX docs_index ON docs USING gist(vector);</b>
</pre>
Note that although searches will still work
on tables where you have neglected
to create a <tt>gist()</tt> index over your vectors,
they will run much more slowly
since they will have to compare the query
against every document vector in the table.
<p>
Because the table we have created
stores each document in two different ways &mdash;
both as text and as a vector &mdash;
our <tt>INSERT</tt> statements must provide the document in both forms.
While more advanced PostgreSQL programmers
might accomplish this with a database trigger or rule,
for this simple example we will use a small SQL function:
<pre>
=# <b>CREATE FUNCTION insdoc(text) RETURNS void LANGUAGE sql AS
'INSERT INTO docs (doc, vector) VALUES ($1, to_tsvector($1));'</b>
</pre>
Now, by calling <tt>insdoc()</tt> several times,
we can populate our table with documents:
<pre>
=# <b>SELECT insdoc('A low crawl over cobbles leads inward to the west.')</b>
=# <b>SELECT insdoc('The canyon runs into a mass of boulders -- dead end.')</b>
=# <b>SELECT insdoc('You are crawling over cobbles in a low passage.')</b>
=# <b>SELECT insdoc('Cavernous passages lead east, north, and south.')</b>
=# <b>SELECT insdoc('To the east a low wide crawl slants up.')</b>
=# <b>SELECT insdoc('You are in the south side chamber.')</b>
=# <b>SELECT insdoc('The passage here is blocked by a recent cave-in.')</b>
=# <b>SELECT insdoc('You are in a splendid chamber thirty feet high.')</b>
</pre>
Now we can build a search function.
Its <tt>SELECT</tt> statement is based upon
the same <tt>@@</tt> operation illustrated in the previous section.
But instead of returning matching vectors,
we return for each document
its <tt>SERIAL</tt> identifier, so the user can retrieve it later;
a headline that illustrates its use of the search terms;
and a ranking with which we also order the results.
Our search operation can be coded as a single <tt>SELECT</tt> statement
returning its own kind of table row,
which we call a&nbsp;<tt>finddoc_t</tt>:
<pre>
=# <b>CREATE TYPE finddoc_t AS (id INTEGER, headline TEXT, rank REAL)</b>
=# <b>CREATE FUNCTION finddoc(text) RETURNS SETOF finddoc_t LANGUAGE sql AS '
SELECT id, headline(doc, q), rank(vector, q)
FROM docs, to_tsquery($1) AS q
WHERE vector @@ q ORDER BY rank(vector, q) DESC'</b>
</pre>
This function is a rather satisfactory search engine.
Here is one example search,
after which the user fetches the top-ranking document itself;
with similar commands you can try queries of your own:
<pre>
=# <b>SELECT * FROM finddoc('passage|crawl')</b>
id | headline | rank
----+-------------------------------------------------------+------
3 | &lt;b&gt;crawling&lt;/b&gt; over cobbles in a low &lt;b&gt;passage&lt;/b&gt;. | 0.19
1 | &lt;b&gt;crawl&lt;/b&gt; over cobbles leads inward to the west. | 0.1
4 | &lt;b&gt;passages&lt;/b&gt; lead east, north, and south. | 0.1
5 | &lt;b&gt;crawl&lt;/b&gt; slants up. | 0.1
7 | &lt;b&gt;passage&lt;/b&gt; here is blocked by a recent cave-in. | 0.1
(5 rows)
=# <b>SELECT doc FROM docs WHERE id = 3</b>
doc
-------------------------------------------------
You are crawling over cobbles in a low passage.
(1 row)
</pre>
While by default the <tt>headline()</tt> function
surrounds matching words with <tt>&lt;b&gt;</tt> and <tt>&lt;/b&gt;</tt>
in order to distinguish them from the surrounding text,
you can provide options that change its behavior;
consult the tsearch2 Reference for more details about
<a href="tsearch2-ref.html#headlines">Headline Functions</a>.
<p>
Though a search may match hundreds or thousands of documents,
you will usually present only ten or twenty results to the user at a time.
This can be most easily accomplished
by limiting your query with a <tt>LIMIT</tt>
and an <tt>OFFSET</tt> clause &mdash;
to display results ten at a time, for example,
your would generate your first page of results
with <tt>LIMIT</tt> <tt>10</tt> <tt>OFFSET</tt> <tt>0</tt>,
your second page
with <tt>LIMIT</tt> <tt>10</tt> <tt>OFFSET</tt> <tt>10</tt>,
your third page
with <tt>LIMIT</tt> <tt>10</tt> <tt>OFFSET</tt> <tt>20</tt>,
and so forth.
There are two problems with this approach, however.
<p>
The first problem is the strain of running the query over again
for every page of results the user views.
For small document collections or lightly loaded servers,
this may not be a problem;
but the impact can be high
when a search must repeatedly rank and sort
the same ten thousand results
on an already busy server.
So instead of selecting only one page of results,
you will probably use <tt>LIMIT</tt> and <tt>OFFSET</tt>
to return a few dozen or few hundred results,
which you can cache and display to the user one page at a time.
Whether a result cache rewards your effort
will depend principally on the behavior of your users &mdash;
how often they even view the second page of results, for instance.
<p>
The second issue solved by caching involves consistency.
If the database is changing while the user browses their results,
then documents might appear and disappear as they page through them.
In some cases the user might even miss a particular result &mdash;
perhaps the one they were looking for &mdash;
if, say, its rank improves from 31th to 30th
after they load results 21&ndash;30 but before they view results 31&ndash;40.
While many databases are static or infrequently updated,
and will not present this problem,
users searching very dymanic document collections
might benefit from the stable results that caches yield.
<blockquote><i>
Having seen the features of a search engine
implemented entirely within the database,
we will learn about some specific tsearch2 features.
First we will look in more detail at document ranking.
</i></blockquote>
<h2><a name=weights>Ranking and Position Weights</a></h2>
<blockquote><i>
When we built our simple search engine,
we used the </i><tt>rank()</tt><i> function to order our results.
Here we describe tsearch2 ranking in more detail.
</i></blockquote>
There are two functions with which tsearch2 can rank search results.
They both use the lexeme positions listed in the <tt>tsvector</tt>,
so you cannot rank vectors
from which these have been removed with <tt>strip()</tt>.
The <tt>rank()</tt> function existed in older versions of OpenFTS,
and has the feature that you can assign different weights
to words from different sections of your document.
The <tt>rank_cd()</tt> uses a recent technique for weighting results
but does not allow different weight to be given
to different sections of your document.
<p>
Both ranking functions allow you to specify,
as an optional last argument,
whether you want their results <i>normalized</i> &mdash;
whether the rank returned should be adjusted for document length.
Specifying a last argument of <tt>0</tt> (zero) makes no adjustment;
<tt>1</tt> (one) divides the document rank
by the logarithm of the document length;
and <tt>2</tt> divides it by the plain length.
In all of these examples we omit this optional argument,
which is the same as specifying zero &mdash;
we are making no adjustment for document length.
<p>
The <tt>rank_cd()</tt> function uses an experimental measurement
called <i>cover density ranking</i> that rewards documents
when they make frequent use of the search terms
that are close together in the document.
You can read about the algorithm in more detail
in Clarke&nbsp;et&nbsp;al.,
&ldquo;<a href="http://citeseer.nj.nec.com/clarke00relevance.html"
>Relevance Ranking for One to Three Term Queries</a>.&rdquo;
An optional first argument allows you to tune their formula;
for details
see the <a href="tsearch2-ref.html#ranking">section on ranking</a>
in the Reference.
<p>
The <tt>rank()</tt> function offers more flexibility
because it pays attention to the <i>weights</i>
with which you have labelled lexeme positions.
Currently tsearch2 supports four different weight labels:
<tt>'D'</tt>, the default weight;
and <tt>'A'</tt>, <tt>'B'</tt>, and <tt>'C'</tt>.
All vectors created with <tt>to_tsvector()</tt>
assign the weight <tt>'D'</tt> to each position,
which as the default is not displayed when you print a vector out.
<p>
If you want positions with weights other than <tt>'D'</tt>,
you have two options:
either you can author a vector directly through the <tt>::tsvector</tt>
casting operation,
as described in the following section,
which lets you give each position whichever weight you want;
or you can pass a vector through the <tt>setweight()</tt> function
which sets all of its position weights to a single value.
An example of the latter:
<pre>
=# <b>SELECT vector FROM docs WHERE id = 3</b>
vector
----------------------------------------
'low':8 'cobbl':5 'crawl':3 'passag':9
(1 row)
=# <b>SELECT setweight(vector, 'A') FROM docs WHERE id = 3</b>
setweight
--------------------------------------------
'low':8A 'cobbl':5A 'crawl':3A 'passag':9A
(1 row)
</pre>
Merely changing all of the weights in a vector is not very useful,
of course,
since this results still in all words having the same weight.
But if we parse different parts of a document separately,
giving each section its own weight,
and then concatenate the vectors of each part into a single vector,
the result can be very useful.
We can construct a simple example
in which document titles are given greater weight
that text in the body of the document:
<pre>
=# <b>CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )</b>
=# <b>CREATE INDEX tdocs_index ON tdocs USING gist(vector);</b>
=# <b>CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS
'INSERT INTO tdocs (title, doc, vector)
VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'</b>
</pre>
Now words from a document title will be weighted differently
than those in the main text
if we provide the title and body as separate arguments:
<pre>
=# <b>SELECT instdoc('Spendid Chamber',
'The walls are frozen rivers of orange stone.')</b>
instdoc
---------
(1 row)
=# <b>SELECT vector FROM tdocs</b>
vector
------------------------------------------------------------------------------
'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A
(1 row)
</pre>
Note that although the necessity is unusual,
you can constrain search terms
to only match words from certain sections
by following them with a colon
and a list of the sections in which the word can occur;
by default this list is <tt>'ABCD'</tt>
so that search terms match words from all sections.
For example,
here we search for a word both generally,
and then looking only for specific weights:
<pre>
=# <b>SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')</b>
title | doc
-----------------+----------------------------------------------
Spendid Chamber | The walls are frozen rivers of orange stone.
(1 row)
=# <b>SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')</b>
title | doc
-----------------+----------------------------------------------
Spendid Chamber | The walls are frozen rivers of orange stone.
(1 row)
=# <b>SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')</b>
title | doc
-------+-----
(0 rows)
</pre>
<blockquote><i>
Our examples so far use tsearch2 to parse our documents into vectors.
When your application needs absolute control over vector content,
you will want to use direct type casting,
which is described in the next section.
</i></blockquote>
<h2><a name=casting>Casting Vectors and Queries</a></h2>
<blockquote><i>
While tsearch2 has powerful and flexible ways
to process documents and turn them into document vectors,
you will sometimes want to parse documents on your own
and place the results directly in vectors.
Here we show you how.
</i></blockquote>
In the preceding examples,
we used the <tt>to_tsvector()</tt> function
when we needed a document's text reduced to a document vector.
We saw that the function stripped whitespace and punctuation,
eliminated common words,
and altered suffixes to reduce words to a common form.
While these operations are often desirable,
and while in the sections below
we will gain precise control over this process,
there are occasions on which
you want to avoid the changes that <tt>to_tsvector()</tt> makes to text
and specify explicitly the words that you want in your vectors.
Or you may want to create queries directly
rather than through <tt>to_tsquery()</tt>.
<p>
For example,
you may have already developed your own routine
for reducing your documents to searchable lexemes,
and do not want your carefully generated terms altered
by passing them through <tt>to_tsvector()</tt>.
Or you might be developing and debugging parsing routines of your own
that you are not ready to load into the database.
In either case,
you will find that direct insertion is easily accomplished
if you simply follow some simple rules.
<p>
Vectors are created directly
when you cast a string of whitespace separated lexemes
to the <tt>tsvector</tt> type:
<pre>
=# <b>select 'the only exit is the way you came in'::tsvector</b>
tsvector
--------------------------------------------------
'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only'
(1 row)
</pre>
Notice that the conversion interpreted the string
simply as a list of lexemes to be included in the vector.
Their order was lost,
as was the number of times each lexeme appeared.
You must keep in mind that directly creating vectors with casting
is <i>not</i> an alternate means of parsing;
it is a way of directly entering lexemes into a vector <i>without</i> parsing.
<p>
Queries can also be created through casting,
if you separate lexemes with boolean operators
rather than with whitespace.
When creating your own vectors and queries,
remember that the search operator <tt>@@</tt>
finds only <i>exact</i> matches between query lexemes and vector lexemes
&mdash;
if they are not exactly the same string,
they will not be considered a match.
<p>
To include lexeme positions in your vector,
write the positions exactly the way tsearch2 displays them
when it prints vectors:
by following each lexeme with a colon
and a comma-separated list of integer positions.
If you list a lexeme more than once,
then all the positions listed for it are combined into a single list.
For example,
here are two ways of writing the same vector,
depending on whether you mention &lsquo;<tt>the</tt>&rsquo; twice
or combine its positions into a list yourself:
<pre>
=# <b>select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector</b>
tsvector
--------------------------------------------------------------------
'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
(1 row)
=# <b>select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector</b>
tsvector
--------------------------------------------------------------------
'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
(1 row)
</pre>
Things can get slightly tricky
if you want to include apostrophes, backslashes, or spaces
inside your lexemes
(wanting to include either of the latter would be unusual,
but they can be included if you follow the rules).
The main problem is that the apostrophe and backslash
are important <i>both</i> to PostgreSQL when it is interpreting a string,
<i>and</i> to the <tt>tsvector</tt> conversion function.
You may want to review section
<a href="http://www.postgresql.org/docs/view.php?version=7.3&idoc=0&file=sql-syntax.html#SQL-SYNTAX-STRINGS">1.1.2.1,
&ldquo;String Constants&rdquo;</a>
in the PostgreSQL documentation before proceeding.
<p>
When you cast strings directly into vectors:
<ul>
<li>The string is interpreted as a whitespace-separated list of lexemes,
any of which can be suffixed with a colon and a list of positions.
<li>A lexeme can be quoted by preceding it with an apostrophe,
in which case it runs until the next apostrophe;
otherwise a lexeme ends with the first whitespace or colon encountered.
<li>Any character preceded by a backslash,
including whitespace, the apostrophe, the colon, and the backslash itself,
loses its normal meaning and is treated as a letter.
Backslashes are effective
both inside and outside of apostrophe-quoted lexemes.
<li>A lexeme can be suffixed with a list of positions
by appending a colon and a comma-separated list of integers,
each of which can itself be followed by a letter
to designate a position weight
(position weights are <a href="#weights">described below</a>).
</ul>
Here are some example strings,
showing the lexeme you want to insert
together with the string that the <tt>::tsvector</tt> operator
needs to see,
and how you would type that string at the PostgreSQL prompt:
<table align=center>
<tr>
<td><i>For the lexeme...</i>
<td><i>you need the string...</i>
<td><i>which you can type as:</i>
<tr>
<td><tt>nugget</tt>
<td><tt>nugget</tt>
<td><tt>'nugget'</tt>
<tr>
<td><tt>won't</tt>
<td><tt>won't</tt>
<td><tt>'won''t'</tt>
<tr>
<td><tt>pinin'</tt>
<td><tt>pinin'</tt>
<td><tt>'pinin'''</tt>
<tr>
<td><tt>'bout</tt>
<td><tt>\'bout</tt>
<td><tt>'\\''bout'</tt>
<tr>
<td><tt>white mist</tt>
<td><tt>white\ mist</tt>
<td><tt>'white\\ mist'</tt>
<tr>
<td align=right><tt><i>or:</i></tt>
<td><tt>'white mist'</tt>
<td><tt>'''white mist'''</tt>
<tr>
<td><tt>won't budge</tt>
<td><tt>won\'t\ budge</tt>
<td><tt>'won\\''t\\ budge'</tt>
<tr>
<td align=right><tt><i>or:</i></tt>
<td><tt>'won\'t budge'</tt>
<td><tt>'''won\\''t budge'''</tt>
<tr>
<td><tt>back\slashed</tt>
<td><tt>back\\slashed</tt>
<td><tt>'back\\\\slashed'</tt>
</table>
Remember to use the quoted quoting shown at the right
only when typing in strings as part of a PostgreSQL query.
If you are providing strings through a library
that automatically quotes them
or provides them in binary form to PostgreSQL,
then you can use the strings in the middle instead &mdash;
suitably quoted in the language you are using, of course.
<p>
Position weights are <a href="#weights">described below</a>
and can be written exactly as they will be displayed
when you select a weighted vector:
<pre>
=# <b>select 'weighty:1,3A trivial:2B,4'::tsvector</b>
tsvector
-------------------------------
'trivial':2B,4 'weighty':1,3A
(1 row)
</pre>
<p>
Note that if you are composing SQL queries
in a scripting language like Perl or Python,
that itself considers quotes and backslashes special,
then you may have another quoting layer to deal with
on top of the two layers already shown above.
In such cases you may want to write a function
that performs the necessary quoting for you.
<blockquote><i>
Having seen how to create vectors of your own,
it is time to learn how the native tsearch2 parser
reduces documents to vectors.
</i></blockquote>
<h2><a name=parsing_lexing>Parsing and Lexing</a></h2>
<blockquote><i>
The previous section
described how you can bypass the parser provided by tsearch2
and populate your table of documents
with vectors of your own devising.
But for those interested in the native tsearch2 facilities,
we present here an overview of how it goes about
reducing documents to vectors.
</i></blockquote>
The <tt>to_tsvector()</tt> function reduces documents to vectors
in two stages.
First, a <i>parser</i> breaks the input document
into short sequences of text called <i>tokens</i>.
Each token is usually a word, space, or piece of punctuation,
though some parsers return larger and more exotic items
like HTML tags as single tokens.
Each token returned by the parser
is either discarded
or passed to a <i>dictionary</i> that converts it into a lexeme.
The resulting lexemes are collected into a vector and returned.
<p>
The choice of which parser and dictionaries <tt>to_tsvector()</tt> should use
is controlled by your choice of <i>configuration</i>.
The tsearch2 module comes with several configurations,
and you can define more of your own;
in fact the creation of a new configuration is illustrated below,
in the section on position weights.
<p>
To learn about parsing in more detail,
we will study this example:
<pre>
=# <b>select to_tsvector('default',
'The walls extend upward for well over 100 feet.')</b>
to_tsvector
----------------------------------------------------------
'100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4
(1 row)
</pre>
Unlike the <tt>to_tsvector()</tt> calls used in the above examples,
this one specifies the <tt>'default'</tt> configuration explicitly.
When we called <tt>to_tsvector()</tt> in earlier examples
with only one argument,
it used the <i>current</i> configuration,
which is chosen automatically based on your <tt>LOCALE</tt>
if that locale is mentioned in the <tt>pg_ts_cfg</tt> table
(which is shown under the first bullet in the description below).
If your locale is not listed in the table,
your attempts to use the current configuration will return:
<pre>
ERROR: Can't find tsearch2 config by locale
</pre>
You can always change the current configuration manually
by calling the <tt>set_curcfg()</tt> function
described in the section on
<a href="tsearch2-ref.html#configurations">Configurations</a>
in the Reference.
<p>
Each configuration serves as an index into two different tables:
in <tt>pg_ts_cfg</tt> it determines
which parser will break our text into tokens,
and in <tt>pg_ts_cfgmap</tt>
it directs each token to a dictionary for processing.
The steps in detail are:
<ul>
<li class=big>
<p>First, our text is parsed,
using the parser listed for our configuration in the <tt>pg_ts_cfg</tt> table.
We are using the <tt>'default'</tt> configuration,
so the table tells us to use the <tt>'default'</tt> parser:
<pre>
=# <b>SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'</b>
ts_name | prs_name | locale
---------+----------+--------
default | default | C
(1 row)
</pre>
So our text will be parsed as though we had called:
<pre>
=# <b>select * from parse('default',
'The walls extend upward for well over 100 feet.')</b>
</pre>
This breaks the text into a list of tokens
which are each labelled with an integer type:
<p align=center>
The<sub>1</sub>&diams;<sub>12</sub
>walls<sub>1</sub>&diams;<sub>12</sub
>extend<sub>1</sub>&diams;<sub>12</sub
>upward<sub>1</sub>&diams;<sub>12</sub
>for<sub>1</sub>&diams;<sub>12</sub
>well<sub>1</sub>&diams;<sub>12</sub
>over<sub>1</sub>&diams;<sub>12</sub
>100<sub>22</sub>&diams;<sub>12</sub
>feet<sub>1</sub>.<sub>12</sub>
<p>
Each word has been assigned type&nbsp;1;
each space (represented here by a diamond) and the period, type&nbsp;12;
and the number one hundred, type&nbsp;22.
We can retrieve the alias for each type
through the <tt>token_type</tt> function:
<pre>
=# <b>select * from token_type('default')
where tokid = 1 or tokid = 12 or tokid = 22</b>
tokid | alias | descr
-------+-------+------------------
1 | lword | Latin word
12 | blank | Space symbols
22 | uint | Unsigned integer
(3 rows)
</pre>
<li class=big>
Next, the tokens are assigned to dictionaries
by looking up their type aliases in <tt>pg_ts_cfgmap</tt>
to determine which dictionary should process each token.
Since we are using the <tt>'default'</tt> configuration:
<pre>
=# <b>select * from pg_ts_cfgmap where ts_name = 'default' and
(tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')</b>
ts_name | tok_alias | dict_name
---------+-----------+-----------
default | lword | {en_stem}
default | uint | {simple}
(2 rows)
</pre>
Since this map provides no dictionary for <tt>blank</tt> tokens,
the spaces and period are simply discarded,
leaving nine tokens,
which are then numbered by their position:
<p align=center>
The<sup>1</sup>
walls<sup>2</sup>
extend<sup>3</sup>
upward<sup>4</sup>
for<sup>5</sup>
well<sup>6</sup>
over<sup>7</sup>
100<sup>8</sup>
feet<sup>9</sup>
<li class=big>
Finally, the words are reduced to lexemes by their respective dictionaries.
The <tt>100</tt> is submitted to the <tt>simple</tt> dictionary,
which returns tokens unaltered except for making them lowercase:
<pre>
=# <b>select lexize('simple', '100')</b>
lexize
--------
{100}
(1 row)
</pre>
The other words are submitted to <tt>en_stem</tt>
which reduces each English word to a linguistic stem,
and then discards stems which belong to its list of stop words;
you can see the list of stop words
in the file whose path is in the <tt>dict_initoption</tt> field
of the <tt>pg_ts_dict</tt> table entry for <tt>en_stem</tt>.
The first three words of our text illustrate respectively
an <tt>en_stem</tt> stop word,
a word which <tt>en_stem</tt> alters by stemming,
and a word which <tt>en_stem</tt> leaves alone:
<pre>
=# <b>select lexize('en_stem', 'The')</b>
lexize
--------
{}
(1 row)
=# <b>select lexize('en_stem', 'walls')</b>
lexize
--------
{wall}
(1 row)
=# <b>select lexize('en_stem', 'extend')</b>
lexize
----------
{extend}
(1 row)
</pre>
Once <tt>en_stem</tt> is done discarding stop words and stemming the rest,
we are left with:
<p align=center>
wall<sup>2</sup>
extend<sup>3</sup>
upward<sup>4</sup>
well<sup>6</sup>
100<sup>8</sup>
feet<sup>9</sup>
<p>
Which is precisely the result of the example that began this section.
</ul>
Query words are stemmed by the <tt>to_tsquery()</tt> function
using the same scheme to determine the dictionary for each token,
with the difference that the query parser recognizes as special
the boolean operators that separate query words.
</body>
</html>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<link type="text/css" rel="stylesheet" href="/~megera/postgres/gist/tsearch/tsearch.css">
<title>tsearch2 reference</title>
</head>
<body>
<h1 align=center>The tsearch2 Reference</h1>
<p align=center>
Brandon Craig Rhodes<br>30 June 2003
<p>
This Reference documents the user types and functions
of the tsearch2 module for PostgreSQL.
An introduction to the module is provided
by the <a href="tsearch2-guide.html">tsearch2 Guide</a>,
a companion document to this one.
You can retrieve a beta copy of the tsearch2 module from the
<a href="http://www.sai.msu.su/~megera/postgres/gist/">GiST for PostgreSQL</a>
page &mdash; look under the section entitled <i>Development History</i>
for the current version.
<h2><a name="vq">Vectors and Queries</h2>
Vectors and queries both store lexemes,
but for different purposes.
A <tt>tsvector</tt> stores the lexemes
of the words that are parsed out of a document,
and can also remember the position of each word.
A <tt>tsquery</tt> specifies a boolean condition among lexemes.
<p>
Any of the following functions with a <tt><i>configuration</i></tt> argument
can use either an integer <tt>id</tt> or textual <tt>ts_name</tt>
to select a configuration;
if the option is omitted, then the current configuration is used.
For more information on the current configuration,
read the next section on Configurations.
<h3>Vector Operations</h3>
<dl>
<dt>
<tt>to_tsvector( <em>[</em><i>configuration</i>,<em>]</em>
<i>document</i> TEXT) RETURNS tsvector</tt>
<dd>
Parses a document into tokens,
reduces the tokens to lexemes,
and returns a <tt>tsvector</tt> which lists the lexemes
together with their positions in the document.
For the best description of this process,
see the section on <a href="tsearch2-guide.html#ps">Parsing and Stemming</a>
in the accompanying tsearch2 Guide.
<dt>
<tt>strip(<i>vector</i> tsvector) RETURNS tsvector</tt>
<dd>
Return a vector which lists the same lexemes
as the given <tt><i>vector</i></tt>,
but which lacks any information
about where in the document each lexeme appeared.
While the returned vector is thus useless for relevance ranking,
it will usually be much smaller.
<dt>
<tt>setweight(<i>vector</i> tsvector, <i>letter</i>) RETURNS tsvector</tt>
<dd>
This function returns a copy of the input vector
in which every location has been labelled
with either the <tt><i>letter</i></tt>
<tt>'A'</tt>, <tt>'B'</tt>, or <tt>'C'</tt>,
or the default label <tt>'D'</tt>
(which is the default with which new vectors are created,
and as such is usually not displayed).
These labels are retained when vectors are concatenated,
allowing words from different parts of a document
to be weighted differently by ranking functions.
<dt>
<tt><i>vector1</i> || <i>vector2</i></tt>
<dt class=br>
<tt>concat(<i>vector1</i> tsvector, <i>vector2</i> tsvector)
RETURNS tsvector</tt>
<dd>
Returns a vector which combines the lexemes and position information
in the two vectors given as arguments.
Position weight labels (described in the previous paragraph)
are retained intact during the concatenation.
This has at least two uses.
First,
if some sections of your document
need be parsed with different configurations than others,
you can parse them separately
and concatenate the resulting vectors into one.
Second,
you can weight words from some sections of you document
more heavily than those from others by:
parsing the sections into separate vectors;
assigning the vectors different position labels
with the <tt>setweight()</tt> function;
concatenating them into a single vector;
and then providing a <tt><i>weights</i></tt> argument
to the <tt>rank()</tt> function
that assigns different weights to positions with different labels.
<dt>
<tt>tsvector_size(<i>vector</i> tsvector) RETURNS INT4</tt>
<dd>
Returns the number of lexemes stored in the vector.
<dt>
<tt><i>text</i>::tsvector RETURNS tsvector</tt>
<dd>
Directly casting text to a <tt>tsvector</tt>
allows you to directly inject lexemes into a vector,
with whatever positions and position weights you choose to specify.
The <tt><i>text</i></tt> should be formatted
like the vector would be printed by the output of a <tt>SELECT</tt>.
See the <a href="tsearch2-guide.html#casting">Casting</a>
section in the Guide for details.
</dl>
<h3>Query Operations</h3>
<dl>
<dt>
<tt>to_tsquery( <em>[</em><i>configuration</i>,<em>]</em>
<i>querytext</i> text) RETURNS tsvector</tt>
<dd>
Parses a query,
which should be single words separated by the boolean operators
&ldquo;<tt>&amp;</tt>&rdquo;&nbsp;and,
&ldquo;<tt>|</tt>&rdquo;&nbsp;or,
and&nbsp;&ldquo;<tt>!</tt>&rdquo;&nbsp;not,
which can be grouped using parenthesis.
Each word is reduced to a lexeme using the current
or specified configuration.
</ul>
<dt>
<tt>querytree(<i>query</i> tsquery) RETURNS text</tt>
<dd>
This might return a textual representation of the given query.
<dt>
<tt><i>text</i>::tsquery RETURNS tsquery</tt>
<dd>
Directly casting text to a <tt>tsquery</tt>
allows you to directly inject lexemes into a query,
with whatever positions and position weight flags you choose to specify.
The <tt><i>text</i></tt> should be formatted
like the query would be printed by the output of a <tt>SELECT</tt>.
See the <a href="tsearch2-guide.html#casting">Casting</a>
section in the Guide for details.
</dl>
<h2><a name="configurations">Configurations</a></h2>
A configuration specifies all of the equipment necessary
to transform a document into a <tt>tsvector</tt>:
the parser that breaks its text into tokens,
and the dictionaries which then transform each token into a lexeme.
Every call to <tt>to_tsvector()</tt> (described above)
uses a configuration to perform its processing.
Three configurations come with tsearch2:
<ul>
<li><b>default</b> &mdash; Indexes words and numbers,
using the <i>en_stem</i> English Snowball stemmer for Latin-alphabet words
and the <i>simple</i> dictionary for all others.
<li><b>default_russian</b> &mdash; Indexes words and numbers,
using the <i>en_stem</i> English Snowball stemmer for Latin-alphabet words
and the <i>ru_stem</i> Russian Snowball dictionary for all others.
<li><b>simple</b> &mdash; Processes both words and numbers
with the <i>simple</i> dictionary,
which neither discards any stop words nor alters them.
</ul>
The tsearch2 modules initially chooses your current configuration
by looking for your current locale in the <tt>locale</tt> field
of the <tt>pg_ts_cfg</tt> table described below.
You can manipulate the current configuration yourself with these functions:
<dl>
<dt>
<tt>set_curcfg( <i>id</i> INT <em>|</em> <i>ts_name</i> TEXT
) RETURNS VOID</tt>
<dd>
Set the current configuration used by <tt>to_tsvector</tt>
and <tt>to_tsquery</tt>.
<dt>
<tt>show_curcfg() RETURNS INT4</tt>
<dd>
Returns the integer <tt>id</tt> of the current configuration.
</dl>
<p>
Each configuration is defined by a record in the <tt>pg_ts_cfg</tt> table:
<pre>create table pg_ts_cfg (
id int not null primary key,
ts_name text not null,
prs_name text not null,
locale text
);</pre>
The <tt>id</tt> and <tt>ts_name</tt> are unique values
which identify the configuration;
the <tt>prs_name</tt> specifies which parser the configuration uses.
Once this parser has split document text into tokens,
the type of each resulting token &mdash;
or, more specifically, the type's <tt>lex_alias</tt>
as specified in the parser's <tt>lexem_type()</tt> table &mdash;
is searched for together with the configuration's <tt>ts_name</tt>
in the <tt>pg_ts_cfgmap</tt> table:
<pre>create table pg_ts_cfgmap (
ts_name text not null,
lex_alias text not null,
dict_name text[],
primary key (ts_name,lex_alias)
);</pre>
Those tokens whose types are not listed are discarded.
The remaining tokens are assigned integer positions,
starting with 1 for the first token in the document,
and turned into lexemes with the help of the dictionaries
whose names are given in the <tt>dict_name</tt> array for their type.
These dictionaries are tried in order,
stopping either with the first one to return a lexeme for the token,
or discarding the token if no dictionary returns a lexeme for it.
<h2><a name="dictionaries">Parsers</a></h2>
Each parser is defined by a record in the <tt>pg_ts_parser</tt> table:
<pre>create table pg_ts_parser (
prs_id int not null primary key,
prs_name text not null,
prs_start oid not null,
prs_getlexem oid not null,
prs_end oid not null,
prs_headline oid not null,
prs_lextype oid not null,
prs_comment text
);</pre>
The <tt>prs_id</tt> and <tt>prs_name</tt> uniquely identify the parser,
while <tt>prs_comment</tt> usually describes its name and version
for the reference of users.
The other items identify the low-level functions
which make the parser operate,
and are only of interest to someone writing a parser of their own.
<p>
The tsearch2 module comes with one parser named <tt>default</tt>
which is suitable for parsing most plain text and HTML documents.
<p>
Each <tt><i>parser</i></tt> argument below
must designate a parser with either an integer <tt><i>prs_id</i></tt>
or a textual <tt><i>prs_name</i></tt>;
the current parser is used when this argument is omitted.
<dl>
<dt>
<tt>CREATE FUNCTION set_curprs(<i>parser</i>) RETURNS VOID</tt>
<dd>
Selects a current parser
which will be used when any of the following functions
are called without a parser as an argument.
<dt>
<tt>CREATE FUNCTION lexem_type(
<em>[</em> <i>parser</i> <em>]</em>
) RETURNS SETOF lexemtype</tt>
<dd>
Returns a table which defines and describes
each kind of token the parser may produce as output.
For each token type the table gives the <tt>lexid</tt>
which the parser will label each token of that type,
the <tt>alias</tt> which names the token type,
and a short description <tt>descr</tt> for the user to read.
<dt>
<tt>CREATE FUNCTION parse(
<em>[</em> <i>parser</i>, <em>]</em> <i>document</i> TEXT
) RETURNS SETOF lexemtype</tt>
<dd>
Parses the given document and returns a series of records,
one for each token produced by parsing.
Each token includes a <tt>lexid</tt> giving its type
and a <tt>lexem</tt> which gives its content.
</dl>
<h2><a name="dictionaries">Dictionaries</a></h2>
Dictionaries take textual tokens as input,
usually those produced by a parser,
and return lexemes which are usually some reduced form of the token.
Among the dictionaries which come installed with tsearch2 are:
<ul>
<li><b>simple</b> simply folds uppercase letters to lowercase
before returning the word.
<li><b>en_stem</b> runs an English Snowball stemmer on each word
that attempts to reduce the various forms of a verb or noun
to a single recognizable form.
<li><b>ru_stem</b> runs a Russian Snowball stemmer on each word.
</ul>
Each dictionary is defined by an entry in the <tt>pg_ts_dict</tt> table:
<pre>CREATE TABLE pg_ts_dict (
dict_id int not null primary key,
dict_name text not null,
dict_init oid,
dict_initoption text,
dict_lemmatize oid not null,
dict_comment text
);</pre>
The <tt>dict_id</tt> and <tt>dict_name</tt>
serve as unique identifiers for the dictionary.
The meaning of the <tt>dict_initoption</tt> varies among dictionaries,
but for the built-in Snowball dictionaries
it specifies a file from which stop words should be read.
The <tt>dict_comment</tt> is a human-readable description of the dictionary.
The other fields are internal function identifiers
useful only to developers trying to implement their own dictionaries.
<p>
The argument named <tt><i>dictionary</i></tt>
in each of the following functions
should be either an integer <tt>dict_id</tt> or a textual <tt>dict_name</tt>
identifying which dictionary should be used for the operation;
if omitted then the current dictionary is used.
<dl>
<dt>
<tt>CREATE FUNCTION set_curdict(<i>dictionary</i>) RETURNS VOID</tt>
<dd>
Selects a current dictionary for use by functions
that do not select a dictionary explicitly.
<dt>
<tt>CREATE FUNCTION lexize(
<em>[</em> <i>dictionary</i>, <em>]</em> <i>word</i> text)
RETURNS TEXT[]</tt>
<dd>
Reduces a single word to a lexeme.
Note that lexemes are arrays of zero or more strings,
since in some languages there might be several base words
from which an inflected form could arise.
</dl>
<h2><a name="ranking">Ranking</a></h2>
Ranking attempts to measure how relevant documents are to particular queries
by inspecting the number of times each search word appears in the document,
and whether different search terms occur near each other.
Note that this information is only available in unstripped vectors &mdash;
ranking functions will only return a useful result
for a <tt>tsvector</tt> which still has position information!
<p>
Both of these ranking functions
take an integer <i>normalization</i> option
that specifies whether a document's length should impact its rank.
This is often desirable,
since a hundred-word document with five instances of a search word
is probably more relevant than a thousand-word document with five instances.
The option can have the values:
<ul>
<li><tt>0</tt> (the default) ignores document length.
<li><tt>1</tt> divides the rank by the logarithm of the length.
<li><tt>2</tt> divides the rank by the length itself.
</ul>
The two ranking functions currently available are:
<dl>
<dt>
<tt>CREATE FUNCTION rank(<br>
<em>[</em> <i>weights</i> float4[], <em>]</em>
<i>vector</i> tsvector, <i>query</i> tsquery,
<em>[</em> <i>normalization</i> int4 <em>]</em><br>
) RETURNS float4</tt>
<dd>
This is the ranking function from the old version of OpenFTS,
and offers the ability to weight word instances more heavily
depending on how you have classified them.
The <i>weights</i> specify how heavily to weight each category of word:
<pre
>{<i>D-weight</i>, <i>A-weight</i>, <i>B-weight</i>, <i>C-weight</i>}</pre>
If no weights are provided, then these defaults are used:
<pre>{0.1, 0.2, 0.4, 1.0}</pre>
Often weights are used to mark words from special areas of the document,
like the title or an initial abstract,
and make them more or less important than words in the document body.
<dt>
<tt>CREATE FUNCTION rank_cd(<br>
<em>[</em> <i>K</i> int4, <em>]</em>
<i>vector</i> tsvector, <i>query</i> tsquery,
<em>[</em> <i>normalization</i> int4 <em>]</em><br>
) RETURNS float4</tt>
<dd>
This function computes the cover density ranking
for the given document <i>vector</i> and <i>query</i>,
as described in Clarke, Cormack, and Tudhope's
&ldquo;<a href="http://citeseer.nj.nec.com/clarke00relevance.html"
>Relevance Ranking for One to Three Term Queries</a>&rdquo;
in the 1999 <i>Information Processing and Management</i>.
The value <i>K</i> is one of the values from their formula,
and defaults to&nbsp;<i>K</i>=4.
The examples in their paper <i>K</i>=16;
we can roughly describe the term
as stating how far apart two search terms can fall
before the formula begins penalizing them for lack of proximity.
</dl>
<h2><a name="headlines">Headlines</a></h2>
<dl>
<dt>
<tt>CREATE FUNCTION headline(<br>
<em>[</em> <i>id</i> int4, <em>|</em> <i>ts_name</i> text, <em>]</em>
<i>document</i> text, <i>query</i> tsquery,
<em>[</em> <i>options</i> text <em>]</em><br>
) RETURNS text</tt>
<dd>
Every form of the the <tt>headline()</tt> function
accepts a <tt>document</tt> along with a <tt>query</tt>,
and returns one or more ellipse-separated excerpts from the document
in which terms from the query are highlighted.
The configuration with which to parse the document
can be specified by either its <i>id</i> or <i>ts_name</i>;
if none is specified that the current configuration is used instead.
<p>
An <i>options</i> string if provided should be a comma-separated list
of one or more &lsquo;<i>option</i><tt>=</tt><i>value</i>&rsquo; pairs.
The available options are:
<ul>
<li><tt>StartSel</tt>, <tt>StopSel</tt> &mdash;
the strings with which query words appearing in the document
should be delimited to distinguish them from other excerpted words.
<li><tt>MaxWords</tt>, <tt>MinWords</tt> &mdash;
limits on the shortest and longest headlines you will accept.
<li><tt>ShortWord</tt> &mdash;
this prevents your headline from beginning or ending
with a word which has this many characters or less.
The default value of <tt>3</tt> should eliminate most English
conjunctions and articles.
</ul>
Any unspecified options receive these defaults:
<pre>
StartSel=&lt;b&gt;, StopSel=&lt;/b&gt;, MaxWords=35, MinWords=15, ShortWord=3
</pre>
</dl>
</body>
</html>
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of seg.sql.
--
\set ECHO none
psql:tsearch2.sql:13: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict'
psql:tsearch2.sql:145: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser'
psql:tsearch2.sql:244: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg'
psql:tsearch2.sql:251: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap'
psql:tsearch2.sql:339: NOTICE: ProcedureCreate: type tsvector is not yet defined
psql:tsearch2.sql:344: NOTICE: Argument type "tsvector" is only a shell
psql:tsearch2.sql:398: NOTICE: ProcedureCreate: type tsquery is not yet defined
psql:tsearch2.sql:403: NOTICE: Argument type "tsquery" is only a shell
psql:tsearch2.sql:545: NOTICE: ProcedureCreate: type gtsvector is not yet defined
psql:tsearch2.sql:550: NOTICE: Argument type "gtsvector" is only a shell
--tsvector
SELECT '1'::tsvector;
tsvector
----------
'1'
(1 row)
SELECT '1 '::tsvector;
tsvector
----------
'1'
(1 row)
SELECT ' 1'::tsvector;
tsvector
----------
'1'
(1 row)
SELECT ' 1 '::tsvector;
tsvector
----------
'1'
(1 row)
SELECT '1 2'::tsvector;
tsvector
----------
'1' '2'
(1 row)
SELECT '\'1 2\''::tsvector;
tsvector
----------
'1 2'
(1 row)
SELECT '\'1 \\\'2\''::tsvector;
tsvector
----------
'1 \'2'
(1 row)
SELECT '\'1 \\\'2\'3'::tsvector;
tsvector
-------------
'3' '1 \'2'
(1 row)
SELECT '\'1 \\\'2\' 3'::tsvector;
tsvector
-------------
'3' '1 \'2'
(1 row)
SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
tsvector
------------------
'4' ' 3' '1 \'2'
(1 row)
select '\'w\':4A,3B,2C,1D,5 a:8';
?column?
-----------------------
'w':4A,3B,2C,1D,5 a:8
(1 row)
select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
?column?
----------------------------
'a':3A,4B 'b':2A 'ba':1237
(1 row)
select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
setweight
----------------------------------------------------------
'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
(1 row)
select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
strip
---------------
'a' 'w' 'asd'
(1 row)
--tsquery
SELECT '1'::tsquery;
tsquery
---------
'1'
(1 row)
SELECT '1 '::tsquery;
tsquery
---------
'1'
(1 row)
SELECT ' 1'::tsquery;
tsquery
---------
'1'
(1 row)
SELECT ' 1 '::tsquery;
tsquery
---------
'1'
(1 row)
SELECT '\'1 2\''::tsquery;
tsquery
---------
'1 2'
(1 row)
SELECT '\'1 \\\'2\''::tsquery;
tsquery
---------
'1 \'2'
(1 row)
SELECT '!1'::tsquery;
tsquery
---------
!'1'
(1 row)
SELECT '1|2'::tsquery;
tsquery
-----------
'1' | '2'
(1 row)
SELECT '1|!2'::tsquery;
tsquery
------------
'1' | !'2'
(1 row)
SELECT '!1|2'::tsquery;
tsquery
------------
!'1' | '2'
(1 row)
SELECT '!1|!2'::tsquery;
tsquery
-------------
!'1' | !'2'
(1 row)
SELECT '!(!1|!2)'::tsquery;
tsquery
------------------
!( !'1' | !'2' )
(1 row)
SELECT '!(!1|2)'::tsquery;
tsquery
-----------------
!( !'1' | '2' )
(1 row)
SELECT '!(1|!2)'::tsquery;
tsquery
-----------------
!( '1' | !'2' )
(1 row)
SELECT '!(1|2)'::tsquery;
tsquery
----------------
!( '1' | '2' )
(1 row)
SELECT '1&2'::tsquery;
tsquery
-----------
'1' & '2'
(1 row)
SELECT '!1&2'::tsquery;
tsquery
------------
!'1' & '2'
(1 row)
SELECT '1&!2'::tsquery;
tsquery
------------
'1' & !'2'
(1 row)
SELECT '!1&!2'::tsquery;
tsquery
-------------
!'1' & !'2'
(1 row)
SELECT '(1&2)'::tsquery;
tsquery
-----------
'1' & '2'
(1 row)
SELECT '1&(2)'::tsquery;
tsquery
-----------
'1' & '2'
(1 row)
SELECT '!(1)&2'::tsquery;
tsquery
------------
!'1' & '2'
(1 row)
SELECT '!(1&2)'::tsquery;
tsquery
----------------
!( '1' & '2' )
(1 row)
SELECT '1|2&3'::tsquery;
tsquery
-----------------
'1' | '2' & '3'
(1 row)
SELECT '1|(2&3)'::tsquery;
tsquery
-----------------
'1' | '2' & '3'
(1 row)
SELECT '(1|2)&3'::tsquery;
tsquery
---------------------
( '1' | '2' ) & '3'
(1 row)
SELECT '1|2&!3'::tsquery;
tsquery
------------------
'1' | '2' & !'3'
(1 row)
SELECT '1|!2&3'::tsquery;
tsquery
------------------
'1' | !'2' & '3'
(1 row)
SELECT '!1|2&3'::tsquery;
tsquery
------------------
!'1' | '2' & '3'
(1 row)
SELECT '!1|(2&3)'::tsquery;
tsquery
------------------
!'1' | '2' & '3'
(1 row)
SELECT '!(1|2)&3'::tsquery;
tsquery
----------------------
!( '1' | '2' ) & '3'
(1 row)
SELECT '(!1|2)&3'::tsquery;
tsquery
----------------------
( !'1' | '2' ) & '3'
(1 row)
SELECT '1|(2|(4|(5|6)))'::tsquery;
tsquery
-----------------------------------------
'1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
(1 row)
SELECT '1|2|4|5|6'::tsquery;
tsquery
-----------------------------------------
( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
(1 row)
SELECT '1&(2&(4&(5&6)))'::tsquery;
tsquery
-----------------------------
'1' & '2' & '4' & '5' & '6'
(1 row)
SELECT '1&2&4&5&6'::tsquery;
tsquery
-----------------------------
'1' & '2' & '4' & '5' & '6'
(1 row)
SELECT '1&(2&(4&(5|6)))'::tsquery;
tsquery
---------------------------------
'1' & '2' & '4' & ( '5' | '6' )
(1 row)
SELECT '1&(2&(4&(5|!6)))'::tsquery;
tsquery
----------------------------------
'1' & '2' & '4' & ( '5' | !'6' )
(1 row)
SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
tsquery
------------------------------------------
'1' & '2' & ' 4' & ( '|5' | '6 \' !|&' )
(1 row)
SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
?column?
------------------------------------------
'the wether':dc & ' sKies ':BC & a:d b:a
(1 row)
select lexize('simple', 'ASD56 hsdkf');
lexize
-----------------
{"asd56 hsdkf"}
(1 row)
select lexize('en_stem', 'SKIES Problems identity');
lexize
--------------------------
{"skies problems ident"}
(1 row)
select * from token_type('default');
tokid | alias | descr
-------+--------------+-----------------------------------
1 | lword | Latin word
2 | nlword | Non-latin word
3 | word | Word
4 | email | Email
5 | url | URL
6 | host | Host
7 | sfloat | Scientific notation
8 | version | VERSION
9 | part_hword | Part of hyphenated word
10 | nlpart_hword | Non-latin part of hyphenated word
11 | lpart_hword | Latin part of hyphenated word
12 | blank | Space symbols
13 | tag | HTML Tag
14 | http | HTTP head
15 | hword | Hyphenated word
16 | lhword | Latin hyphenated word
17 | nlhword | Non-latin hyphenated word
18 | uri | URI
19 | file | File or path name
20 | float | Decimal notation
21 | int | Signed integer
22 | uint | Unsigned integer
23 | entity | HTML Entity
(23 rows)
select * from parse('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty');
tokid | token
-------+--------------------------------------
22 | 345
12 |
4 | qwe@efd.r
12 |
12 | '
12 |
14 | http://
6 | www.com
12 | /
12 |
14 | http://
5 | aew.werc.ewr/?ad=qwe&dw
6 | aew.werc.ewr
18 | /?ad=qwe&dw
12 |
5 | 1aew.werc.ewr/?ad=qwe&dw
6 | 1aew.werc.ewr
18 | /?ad=qwe&dw
12 |
6 | 2aew.werc.ewr
12 |
14 | http://
5 | 3aew.werc.ewr/?ad=qwe&dw
6 | 3aew.werc.ewr
18 | /?ad=qwe&dw
12 |
14 | http://
6 | 4aew.werc.ewr
12 |
14 | http://
5 | 5aew.werc.ewr:8100/?
6 | 5aew.werc.ewr
18 | :8100/?
12 |
1 | ad
12 | =
1 | qwe
12 | &
1 | dw
12 |
5 | 6aew.werc.ewr:8100/?ad=qwe&dw
6 | 6aew.werc.ewr
18 | :8100/?ad=qwe&dw
12 |
5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32
6 | 7aew.werc.ewr
18 | :8100/?ad=qwe&dw=%20%32
12 |
7 | +4.0e-10
12 |
1 | qwe
12 |
1 | qwe
12 |
1 | qwqwe
12 |
20 | 234.435
12 |
22 | 455
12 |
20 | 5.005
12 |
4 | teodor@stack.net
12 |
16 | qwe-wer
11 | qwe
12 | -
11 | wer
12 |
1 | asdf
12 |
13 |
1 | qwer
12 |
1 | jf
12 |
1 | sdjk
13 |
12 |
3 | ewr1
12 | >
12 |
3 | ewri2
12 |
13 |
12 |
19 | /usr/local/fff
12 |
19 | /awdf/dwqe/4325
12 |
19 | rewt/ewr
12 |
1 | wefjn
12 |
19 | /wqe-324/ewr
12 |
6 | gist.h
12 |
6 | gist.h.c
12 |
6 | gist.c
12 | .
12 |
1 | readline
12 |
20 | 4.2
12 |
20 | 4.2
12 | .
12 |
20 | 4.2
12 | ,
12 |
15 | readline-4
11 | readline
12 | -
20 | 4.2
12 |
15 | readline-4
11 | readline
12 | -
20 | 4.2
12 | .
12 |
22 | 234
12 |
13 |
12 |
1 | wow
12 |
12 | <
12 |
1 | jqw
12 |
12 | <
12 | >
12 |
1 | qwerty
(138 rows)
SELECT to_tsvector('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty');
to_tsvector
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 'qwe@efd.r':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 'teodor@stack.net':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24
(1 row)
SELECT length(to_tsvector('default', '345 qw'));
length
--------
2
(1 row)
SELECT length(to_tsvector('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty'));
length
--------
53
(1 row)
select to_tsquery('default', 'qwe & sKies ');
to_tsquery
---------------
'qwe' & 'sky'
(1 row)
select to_tsquery('simple', 'qwe & sKies ');
to_tsquery
-----------------
'qwe' & 'skies'
(1 row)
select to_tsquery('default', '\'the wether\':dc & \' sKies \':BC ');
to_tsquery
------------------------
'wether':CD & 'sky':BC
(1 row)
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
?column?
----------
t
(1 row)
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
?column?
----------
t
(1 row)
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
?column?
----------
t
(1 row)
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
?column?
----------
f
(1 row)
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
?column?
----------
t
(1 row)
CREATE TABLE test_tsvector( t text, a tsvector );
\copy test_tsvector from 'data/test_tsearch.data'
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count
-------
158
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
count
-------
17
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
count
-------
6
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
count
-------
98
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
count
-------
23
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
count
-------
39
(1 row)
create index wowidx on test_tsvector using gist (a);
set enable_seqscan=off;
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
count
-------
158
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
count
-------
17
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
count
-------
6
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
count
-------
98
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
count
-------
23
(1 row)
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
count
-------
39
(1 row)
select set_curcfg('default');
set_curcfg
------------
(1 row)
CREATE TRIGGER tsvectorupdate
BEFORE UPDATE OR INSERT ON test_tsvector
FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
count
-------
0
(1 row)
INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
count
-------
1
(1 row)
UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
count
-------
0
(1 row)
drop trigger tsvectorupdate on test_tsvector;
create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
create trigger tsvectorupdate before update or insert on test_tsvector
for each row execute procedure tsearch2(a, wow, t);
insert into test_tsvector (t) values ('345 qwerty');
select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
count
-------
1
(1 row)
select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
count
-------
1
(1 row)
select rank(' a:1 s:2C d g'::tsvector, 'a | s');
rank
------
0.28
(1 row)
select rank(' a:1 s:2B d g'::tsvector, 'a | s');
rank
------
0.46
(1 row)
select rank(' a:1 s:2 d g'::tsvector, 'a | s');
rank
------
0.19
(1 row)
select rank(' a:1 s:2C d g'::tsvector, 'a & s');
rank
----------
0.140153
(1 row)
select rank(' a:1 s:2B d g'::tsvector, 'a & s');
rank
----------
0.198206
(1 row)
select rank(' a:1 s:2 d g'::tsvector, 'a & s');
rank
-----------
0.0991032
(1 row)
insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
word | ndoc | nentry
-----------+------+--------
qq | 109 | 109
qt | 102 | 102
qe | 100 | 100
qh | 98 | 98
qw | 98 | 98
qa | 97 | 97
ql | 94 | 94
qs | 94 | 94
qi | 92 | 92
qr | 92 | 92
qj | 91 | 91
qd | 87 | 87
qz | 87 | 87
qc | 86 | 86
qn | 86 | 86
qv | 85 | 85
qo | 84 | 84
qy | 84 | 84
wp | 84 | 84
qf | 81 | 81
qk | 80 | 80
wt | 80 | 80
qu | 79 | 79
qg | 78 | 78
wb | 78 | 78
qx | 77 | 77
wr | 77 | 77
ws | 73 | 73
wy | 73 | 73
wa | 72 | 72
wf | 70 | 70
wg | 70 | 70
wi | 70 | 70
wu | 70 | 70
wc | 69 | 69
wj | 69 | 69
qp | 68 | 68
wh | 68 | 68
wv | 68 | 68
qb | 66 | 66
eu | 65 | 65
we | 65 | 65
wl | 65 | 65
wq | 65 | 65
wk | 64 | 64
ee | 63 | 63
eo | 63 | 63
qm | 63 | 63
wn | 63 | 63
ef | 62 | 62
eh | 62 | 62
ex | 62 | 62
re | 62 | 62
rl | 62 | 62
rr | 62 | 62
eb | 61 | 61
ek | 61 | 61
ww | 61 | 61
ea | 60 | 60
ei | 60 | 60
em | 60 | 60
eq | 60 | 60
ew | 60 | 60
ro | 60 | 60
rw | 60 | 60
tl | 60 | 60
eg | 59 | 59
en | 59 | 59
ez | 59 | 59
rj | 59 | 59
ry | 59 | 59
tw | 59 | 59
tx | 59 | 59
ej | 58 | 58
es | 58 | 58
ra | 58 | 58
rd | 58 | 58
rg | 58 | 58
rx | 58 | 58
tb | 58 | 58
wd | 58 | 58
ed | 57 | 57
tc | 57 | 57
wx | 57 | 57
er | 56 | 56
wm | 56 | 56
wo | 56 | 56
yw | 56 | 56
ep | 55 | 55
rk | 55 | 55
rp | 55 | 55
rz | 55 | 55
ta | 55 | 55
rq | 54 | 54
yn | 54 | 54
ec | 53 | 53
el | 53 | 53
ru | 53 | 53
rv | 53 | 53
tz | 53 | 53
un | 53 | 53
wz | 53 | 53
ys | 53 | 53
oe | 52 | 52
tn | 52 | 52
tq | 52 | 52
ty | 52 | 52
uq | 52 | 52
yg | 52 | 52
ym | 52 | 52
oi | 51 | 51
to | 51 | 51
yi | 51 | 51
pn | 50 | 50
rb | 50 | 50
ri | 50 | 50
rn | 50 | 50
ti | 50 | 50
tv | 50 | 50
um | 50 | 50
ut | 50 | 50
ya | 50 | 50
et | 49 | 49
ix | 49 | 49
ox | 49 | 49
q3 | 49 | 49
yf | 49 | 49
yl | 49 | 49
yo | 49 | 49
yr | 49 | 49
ev | 48 | 48
ey | 48 | 48
ot | 48 | 48
rc | 48 | 48
rm | 48 | 48
th | 48 | 48
uo | 48 | 48
ia | 47 | 47
q1 | 47 | 47
rh | 47 | 47
yq | 47 | 47
yz | 47 | 47
av | 46 | 46
im | 46 | 46
os | 46 | 46
tk | 46 | 46
yy | 46 | 46
ir | 45 | 45
iv | 45 | 45
iw | 45 | 45
oj | 45 | 45
pl | 45 | 45
pv | 45 | 45
te | 45 | 45
tu | 45 | 45
uv | 45 | 45
ux | 45 | 45
yd | 45 | 45
yx | 45 | 45
ij | 44 | 44
pa | 44 | 44
se | 44 | 44
tg | 44 | 44
ue | 44 | 44
yb | 44 | 44
yt | 44 | 44
if | 43 | 43
ik | 43 | 43
in | 43 | 43
ph | 43 | 43
pj | 43 | 43
q5 | 43 | 43
rt | 43 | 43
ub | 43 | 43
ud | 43 | 43
uh | 43 | 43
uj | 43 | 43
w7 | 43 | 43
ye | 43 | 43
yv | 43 | 43
db | 42 | 42
do | 42 | 42
id | 42 | 42
ie | 42 | 42
ii | 42 | 42
of | 42 | 42
pr | 42 | 42
q4 | 42 | 42
rf | 42 | 42
td | 42 | 42
uk | 42 | 42
up | 42 | 42
yh | 42 | 42
yk | 42 | 42
io | 41 | 41
it | 41 | 41
pb | 41 | 41
q0 | 41 | 41
q7 | 41 | 41
rs | 41 | 41
tj | 41 | 41
ur | 41 | 41
ig | 40 | 40
iu | 40 | 40
iy | 40 | 40
od | 40 | 40
q6 | 40 | 40
tt | 40 | 40
ug | 40 | 40
ul | 40 | 40
us | 40 | 40
uu | 40 | 40
uz | 40 | 40
ah | 39 | 39
ar | 39 | 39
as | 39 | 39
dl | 39 | 39
dt | 39 | 39
hk | 39 | 39
iq | 39 | 39
is | 39 | 39
oc | 39 | 39
ov | 39 | 39
oy | 39 | 39
uf | 39 | 39
ui | 39 | 39
aa | 38 | 38
ad | 38 | 38
fh | 38 | 38
gm | 38 | 38
ic | 38 | 38
jd | 38 | 38
om | 38 | 38
or | 38 | 38
oz | 38 | 38
pm | 38 | 38
q8 | 38 | 38
sf | 38 | 38
sm | 38 | 38
sv | 38 | 38
uc | 38 | 38
ak | 37 | 37
aq | 37 | 37
di | 37 | 37
e4 | 37 | 37
fi | 37 | 37
fx | 37 | 37
ha | 37 | 37
hp | 37 | 37
ih | 37 | 37
og | 37 | 37
po | 37 | 37
pw | 37 | 37
sn | 37 | 37
su | 37 | 37
sw | 37 | 37
w6 | 37 | 37
yj | 37 | 37
yu | 37 | 37
ag | 36 | 36
am | 36 | 36
at | 36 | 36
e1 | 36 | 36
ff | 36 | 36
gx | 36 | 36
he | 36 | 36
hj | 36 | 36
ib | 36 | 36
iz | 36 | 36
lm | 36 | 36
ok | 36 | 36
pk | 36 | 36
pp | 36 | 36
pu | 36 | 36
sp | 36 | 36
tf | 36 | 36
tm | 36 | 36
ay | 35 | 35
dy | 35 | 35
fu | 35 | 35
ku | 35 | 35
lh | 35 | 35
lq | 35 | 35
o6 | 35 | 35
ob | 35 | 35
on | 35 | 35
op | 35 | 35
pd | 35 | 35
ps | 35 | 35
si | 35 | 35
sl | 35 | 35
sx | 35 | 35
tp | 35 | 35
tr | 35 | 35
w3 | 35 | 35
y1 | 35 | 35
al | 34 | 34
ap | 34 | 34
az | 34 | 34
dc | 34 | 34
dd | 34 | 34
dz | 34 | 34
e0 | 34 | 34
fj | 34 | 34
fp | 34 | 34
gd | 34 | 34
gg | 34 | 34
gk | 34 | 34
go | 34 | 34
ho | 34 | 34
jc | 34 | 34
oa | 34 | 34
oh | 34 | 34
oo | 34 | 34
pe | 34 | 34
px | 34 | 34
sd | 34 | 34
sq | 34 | 34
sy | 34 | 34
ab | 33 | 33
ae | 33 | 33
af | 33 | 33
aw | 33 | 33
e5 | 33 | 33
fk | 33 | 33
gu | 33 | 33
gy | 33 | 33
hb | 33 | 33
hm | 33 | 33
hy | 33 | 33
jl | 33 | 33
jr | 33 | 33
ls | 33 | 33
oq | 33 | 33
pt | 33 | 33
sa | 33 | 33
sh | 33 | 33
sj | 33 | 33
so | 33 | 33
sz | 33 | 33
t7 | 33 | 33
uw | 33 | 33
w8 | 33 | 33
y0 | 33 | 33
yp | 33 | 33
dh | 32 | 32
dp | 32 | 32
dq | 32 | 32
e7 | 32 | 32
fn | 32 | 32
fo | 32 | 32
fr | 32 | 32
ga | 32 | 32
gq | 32 | 32
hh | 32 | 32
il | 32 | 32
ip | 32 | 32
jv | 32 | 32
lc | 32 | 32
ol | 32 | 32
pc | 32 | 32
q9 | 32 | 32
ds | 31 | 31
e9 | 31 | 31
fd | 31 | 31
fe | 31 | 31
ft | 31 | 31
gs | 31 | 31
hl | 31 | 31
hs | 31 | 31
jb | 31 | 31
kc | 31 | 31
kw | 31 | 31
mj | 31 | 31
q2 | 31 | 31
r3 | 31 | 31
sb | 31 | 31
sk | 31 | 31
ts | 31 | 31
ua | 31 | 31
yc | 31 | 31
zw | 31 | 31
ao | 30 | 30
du | 30 | 30
fw | 30 | 30
gj | 30 | 30
hu | 30 | 30
kh | 30 | 30
kl | 30 | 30
kv | 30 | 30
ld | 30 | 30
lf | 30 | 30
pq | 30 | 30
py | 30 | 30
sc | 30 | 30
sr | 30 | 30
uy | 30 | 30
vg | 30 | 30
w2 | 30 | 30
xg | 30 | 30
xo | 30 | 30
au | 29 | 29
cx | 29 | 29
fv | 29 | 29
gh | 29 | 29
gl | 29 | 29
gt | 29 | 29
hw | 29 | 29
ji | 29 | 29
km | 29 | 29
la | 29 | 29
ou | 29 | 29
r0 | 29 | 29
w0 | 29 | 29
y9 | 29 | 29
zm | 29 | 29
zs | 29 | 29
zy | 29 | 29
ax | 28 | 28
cd | 28 | 28
dj | 28 | 28
dn | 28 | 28
dr | 28 | 28
ht | 28 | 28
jf | 28 | 28
lo | 28 | 28
lr | 28 | 28
na | 28 | 28
ng | 28 | 28
r8 | 28 | 28
ss | 28 | 28
xt | 28 | 28
y6 | 28 | 28
aj | 27 | 27
ca | 27 | 27
cg | 27 | 27
df | 27 | 27
dg | 27 | 27
dv | 27 | 27
gc | 27 | 27
gn | 27 | 27
gr | 27 | 27
hd | 27 | 27
i8 | 27 | 27
jn | 27 | 27
jt | 27 | 27
lp | 27 | 27
o9 | 27 | 27
ow | 27 | 27
r9 | 27 | 27
t8 | 27 | 27
u5 | 27 | 27
w4 | 27 | 27
xm | 27 | 27
zz | 27 | 27
a2 | 26 | 26
ac | 26 | 26
ai | 26 | 26
cm | 26 | 26
cu | 26 | 26
cw | 26 | 26
dk | 26 | 26
e2 | 26 | 26
fc | 26 | 26
fg | 26 | 26
fl | 26 | 26
fs | 26 | 26
ge | 26 | 26
gv | 26 | 26
hc | 26 | 26
hi | 26 | 26
hx | 26 | 26
jj | 26 | 26
jm | 26 | 26
kg | 26 | 26
kk | 26 | 26
kn | 26 | 26
ko | 26 | 26
kt | 26 | 26
ln | 26 | 26
mx | 26 | 26
pg | 26 | 26
r4 | 26 | 26
t6 | 26 | 26
u1 | 26 | 26
u4 | 26 | 26
vi | 26 | 26
vr | 26 | 26
w1 | 26 | 26
w9 | 26 | 26
xk | 26 | 26
xs | 26 | 26
zf | 26 | 26
bb | 25 | 25
dm | 25 | 25
dw | 25 | 25
e8 | 25 | 25
fb | 25 | 25
gw | 25 | 25
h8 | 25 | 25
hf | 25 | 25
hg | 25 | 25
hn | 25 | 25
hv | 25 | 25
i0 | 25 | 25
i3 | 25 | 25
jg | 25 | 25
jo | 25 | 25
jx | 25 | 25
kq | 25 | 25
lw | 25 | 25
lx | 25 | 25
o3 | 25 | 25
p7 | 25 | 25
pf | 25 | 25
pi | 25 | 25
pz | 25 | 25
r2 | 25 | 25
r5 | 25 | 25
t9 | 25 | 25
u7 | 25 | 25
ve | 25 | 25
vu | 25 | 25
y5 | 25 | 25
y8 | 25 | 25
zt | 25 | 25
an | 24 | 24
bj | 24 | 24
dx | 24 | 24
fm | 24 | 24
fz | 24 | 24
gb | 24 | 24
gi | 24 | 24
gp | 24 | 24
hr | 24 | 24
hz | 24 | 24
i5 | 24 | 24
jq | 24 | 24
kb | 24 | 24
ke | 24 | 24
kf | 24 | 24
kp | 24 | 24
lv | 24 | 24
lz | 24 | 24
o8 | 24 | 24
r1 | 24 | 24
s7 | 24 | 24
sg | 24 | 24
u3 | 24 | 24
vj | 24 | 24
vt | 24 | 24
w5 | 24 | 24
zj | 24 | 24
be | 23 | 23
bi | 23 | 23
bn | 23 | 23
cn | 23 | 23
cy | 23 | 23
da | 23 | 23
e6 | 23 | 23
fa | 23 | 23
js | 23 | 23
ki | 23 | 23
kz | 23 | 23
li | 23 | 23
mt | 23 | 23
mz | 23 | 23
nu | 23 | 23
o2 | 23 | 23
p5 | 23 | 23
p8 | 23 | 23
r7 | 23 | 23
t0 | 23 | 23
t1 | 23 | 23
t3 | 23 | 23
vm | 23 | 23
xh | 23 | 23
xx | 23 | 23
zp | 23 | 23
zr | 23 | 23
a3 | 22 | 22
bg | 22 | 22
de | 22 | 22
e3 | 22 | 22
fq | 22 | 22
i2 | 22 | 22
i7 | 22 | 22
ja | 22 | 22
jk | 22 | 22
jy | 22 | 22
kr | 22 | 22
kx | 22 | 22
ly | 22 | 22
nb | 22 | 22
nh | 22 | 22
ns | 22 | 22
s3 | 22 | 22
u2 | 22 | 22
vn | 22 | 22
xe | 22 | 22
y4 | 22 | 22
zh | 22 | 22
zo | 22 | 22
zq | 22 | 22
a1 | 21 | 21
bl | 21 | 21
bo | 21 | 21
cb | 21 | 21
ch | 21 | 21
co | 21 | 21
cq | 21 | 21
cv | 21 | 21
d7 | 21 | 21
g8 | 21 | 21
je | 21 | 21
jp | 21 | 21
jz | 21 | 21
lg | 21 | 21
me | 21 | 21
nc | 21 | 21
p4 | 21 | 21
st | 21 | 21
vb | 21 | 21
vw | 21 | 21
vz | 21 | 21
xj | 21 | 21
xq | 21 | 21
xu | 21 | 21
xy | 21 | 21
zb | 21 | 21
bv | 20 | 20
bz | 20 | 20
cj | 20 | 20
cp | 20 | 20
cs | 20 | 20
d8 | 20 | 20
ju | 20 | 20
k0 | 20 | 20
ks | 20 | 20
ky | 20 | 20
l1 | 20 | 20
lb | 20 | 20
lj | 20 | 20
lu | 20 | 20
nm | 20 | 20
nw | 20 | 20
nz | 20 | 20
o7 | 20 | 20
p6 | 20 | 20
vh | 20 | 20
vp | 20 | 20
vs | 20 | 20
xb | 20 | 20
xr | 20 | 20
z3 | 20 | 20
zv | 20 | 20
bq | 19 | 19
br | 19 | 19
by | 19 | 19
cl | 19 | 19
d2 | 19 | 19
f1 | 19 | 19
f4 | 19 | 19
gf | 19 | 19
hq | 19 | 19
k9 | 19 | 19
ka | 19 | 19
kd | 19 | 19
kj | 19 | 19
md | 19 | 19
mi | 19 | 19
ml | 19 | 19
my | 19 | 19
nj | 19 | 19
ny | 19 | 19
o1 | 19 | 19
s4 | 19 | 19
s8 | 19 | 19
t5 | 19 | 19
u0 | 19 | 19
xl | 19 | 19
zg | 19 | 19
zi | 19 | 19
a5 | 18 | 18
b9 | 18 | 18
bh | 18 | 18
bx | 18 | 18
d3 | 18 | 18
fy | 18 | 18
g2 | 18 | 18
i4 | 18 | 18
i6 | 18 | 18
i9 | 18 | 18
jw | 18 | 18
lk | 18 | 18
mb | 18 | 18
mv | 18 | 18
nd | 18 | 18
nr | 18 | 18
nt | 18 | 18
t2 | 18 | 18
xf | 18 | 18
xv | 18 | 18
zc | 18 | 18
zd | 18 | 18
a7 | 17 | 17
bc | 17 | 17
bd | 17 | 17
ce | 17 | 17
cf | 17 | 17
cr | 17 | 17
g9 | 17 | 17
j0 | 17 | 17
j5 | 17 | 17
mp | 17 | 17
mr | 17 | 17
mw | 17 | 17
nk | 17 | 17
no | 17 | 17
o0 | 17 | 17
o4 | 17 | 17
s0 | 17 | 17
s1 | 17 | 17
t4 | 17 | 17
u9 | 17 | 17
vf | 17 | 17
vx | 17 | 17
x3 | 17 | 17
xi | 17 | 17
xn | 17 | 17
xz | 17 | 17
zl | 17 | 17
zn | 17 | 17
a0 | 16 | 16
bu | 16 | 16
bw | 16 | 16
ci | 16 | 16
ck | 16 | 16
d0 | 16 | 16
d4 | 16 | 16
d6 | 16 | 16
f5 | 16 | 16
g1 | 16 | 16
gz | 16 | 16
h4 | 16 | 16
jh | 16 | 16
l4 | 16 | 16
lt | 16 | 16
mg | 16 | 16
mh | 16 | 16
mo | 16 | 16
ni | 16 | 16
nl | 16 | 16
nq | 16 | 16
p2 | 16 | 16
u8 | 16 | 16
v9 | 16 | 16
vl | 16 | 16
vo | 16 | 16
xp | 16 | 16
y3 | 16 | 16
y7 | 16 | 16
z7 | 16 | 16
za | 16 | 16
zx | 16 | 16
bf | 15 | 15
bp | 15 | 15
cc | 15 | 15
g0 | 15 | 15
j2 | 15 | 15
j9 | 15 | 15
l6 | 15 | 15
le | 15 | 15
ll | 15 | 15
m8 | 15 | 15
ma | 15 | 15
mu | 15 | 15
nf | 15 | 15
r6 | 15 | 15
s5 | 15 | 15
vd | 15 | 15
vk | 15 | 15
xa | 15 | 15
xw | 15 | 15
y2 | 15 | 15
z8 | 15 | 15
ze | 15 | 15
zu | 15 | 15
a6 | 14 | 14
bk | 14 | 14
bt | 14 | 14
c0 | 14 | 14
f8 | 14 | 14
g3 | 14 | 14
g4 | 14 | 14
g7 | 14 | 14
h6 | 14 | 14
h7 | 14 | 14
h9 | 14 | 14
i1 | 14 | 14
k1 | 14 | 14
k2 | 14 | 14
k6 | 14 | 14
k7 | 14 | 14
mc | 14 | 14
nn | 14 | 14
p9 | 14 | 14
u6 | 14 | 14
xd | 14 | 14
z6 | 14 | 14
zk | 14 | 14
a4 | 13 | 13
a9 | 13 | 13
bm | 13 | 13
cz | 13 | 13
f2 | 13 | 13
f3 | 13 | 13
f6 | 13 | 13
g6 | 13 | 13
h2 | 13 | 13
j1 | 13 | 13
k5 | 13 | 13
m1 | 13 | 13
mf | 13 | 13
mq | 13 | 13
np | 13 | 13
nx | 13 | 13
o5 | 13 | 13
p0 | 13 | 13
p1 | 13 | 13
s6 | 13 | 13
s9 | 13 | 13
v6 | 13 | 13
va | 13 | 13
vc | 13 | 13
xc | 13 | 13
z0 | 13 | 13
c9 | 12 | 12
d1 | 12 | 12
h0 | 12 | 12
h1 | 12 | 12
j8 | 12 | 12
k4 | 12 | 12
l5 | 12 | 12
l9 | 12 | 12
m2 | 12 | 12
m6 | 12 | 12
m9 | 12 | 12
n7 | 12 | 12
nv | 12 | 12
p3 | 12 | 12
vq | 12 | 12
vy | 12 | 12
x1 | 12 | 12
x2 | 12 | 12
z5 | 12 | 12
c1 | 11 | 11
c3 | 11 | 11
ct | 11 | 11
f9 | 11 | 11
g5 | 11 | 11
j6 | 11 | 11
l8 | 11 | 11
n1 | 11 | 11
v7 | 11 | 11
vv | 11 | 11
x5 | 11 | 11
x8 | 11 | 11
z2 | 11 | 11
b0 | 10 | 10
b2 | 10 | 10
b8 | 10 | 10
c6 | 10 | 10
f0 | 10 | 10
f7 | 10 | 10
h5 | 10 | 10
j3 | 10 | 10
j4 | 10 | 10
j7 | 10 | 10
l7 | 10 | 10
m0 | 10 | 10
m7 | 10 | 10
mm | 10 | 10
mn | 10 | 10
n8 | 10 | 10
v1 | 10 | 10
x0 | 10 | 10
x6 | 10 | 10
x7 | 10 | 10
x9 | 10 | 10
a8 | 9 | 9
b1 | 9 | 9
b4 | 9 | 9
b5 | 9 | 9
b6 | 9 | 9
ba | 9 | 9
bs | 9 | 9
c5 | 9 | 9
d5 | 9 | 9
k8 | 9 | 9
l0 | 9 | 9
m5 | 9 | 9
mk | 9 | 9
ms | 9 | 9
n3 | 9 | 9
n4 | 9 | 9
n6 | 9 | 9
ne | 9 | 9
v0 | 9 | 9
v3 | 9 | 9
v5 | 9 | 9
v8 | 9 | 9
b3 | 8 | 8
b7 | 8 | 8
c2 | 8 | 8
c7 | 8 | 8
c8 | 8 | 8
d9 | 8 | 8
k3 | 8 | 8
l3 | 8 | 8
m3 | 8 | 8
m4 | 8 | 8
n0 | 8 | 8
n5 | 8 | 8
v4 | 8 | 8
x4 | 8 | 8
z1 | 8 | 8
z9 | 8 | 8
l2 | 7 | 7
s2 | 7 | 7
z4 | 7 | 7
1l | 6 | 6
1o | 6 | 6
1t | 6 | 6
2e | 6 | 6
2o | 6 | 6
c4 | 6 | 6
h3 | 6 | 6
n2 | 6 | 6
n9 | 6 | 6
v2 | 6 | 6
2l | 5 | 5
2u | 5 | 5
3k | 5 | 5
4p | 5 | 5
18 | 4 | 4
1a | 4 | 4
1i | 4 | 4
2s | 4 | 4
3q | 4 | 4
3y | 4 | 4
5y | 4 | 4
1f | 3 | 3
1h | 3 | 3
1m | 3 | 3
1p | 3 | 3
1s | 3 | 3
1v | 3 | 3
1x | 3 | 3
27 | 3 | 3
2a | 3 | 3
2b | 3 | 3
2h | 3 | 3
2n | 3 | 3
2p | 3 | 3
2v | 3 | 3
2y | 3 | 3
3d | 3 | 3
3w | 3 | 3
3z | 3 | 3
4a | 3 | 3
4d | 3 | 3
4v | 3 | 3
4z | 3 | 3
5e | 3 | 3
5i | 3 | 3
5k | 3 | 3
5o | 3 | 3
5t | 3 | 3
6b | 3 | 3
6d | 3 | 3
6o | 3 | 3
6w | 3 | 3
7a | 3 | 3
7h | 3 | 3
7r | 3 | 3
93 | 3 | 3
10 | 2 | 2
12 | 2 | 2
15 | 2 | 2
16 | 2 | 2
19 | 2 | 2
1b | 2 | 2
1d | 2 | 2
1g | 2 | 2
1j | 2 | 2
1n | 2 | 2
1r | 2 | 2
1u | 2 | 2
1w | 2 | 2
1y | 2 | 2
20 | 2 | 2
25 | 2 | 2
2d | 2 | 2
2i | 2 | 2
2j | 2 | 2
2k | 2 | 2
2q | 2 | 2
2r | 2 | 2
2t | 2 | 2
2w | 2 | 2
2z | 2 | 2
3b | 2 | 2
3f | 2 | 2
3h | 2 | 2
3o | 2 | 2
3p | 2 | 2
3r | 2 | 2
3s | 2 | 2
3v | 2 | 2
42 | 2 | 2
43 | 2 | 2
4f | 2 | 2
4g | 2 | 2
4h | 2 | 2
4j | 2 | 2
4m | 2 | 2
4r | 2 | 2
4s | 2 | 2
4t | 2 | 2
4u | 2 | 2
5c | 2 | 2
5f | 2 | 2
5h | 2 | 2
5p | 2 | 2
5q | 2 | 2
5z | 2 | 2
6a | 2 | 2
6h | 2 | 2
6q | 2 | 2
6r | 2 | 2
6t | 2 | 2
6y | 2 | 2
70 | 2 | 2
7c | 2 | 2
7g | 2 | 2
7k | 2 | 2
7o | 2 | 2
7u | 2 | 2
8j | 2 | 2
8w | 2 | 2
9f | 2 | 2
9y | 2 | 2
copyright | 2 | 2
foo | 1 | 3
bar | 1 | 2
0e | 1 | 1
0h | 1 | 1
0p | 1 | 1
0w | 1 | 1
0z | 1 | 1
11 | 1 | 1
13 | 1 | 1
14 | 1 | 1
17 | 1 | 1
1k | 1 | 1
1q | 1 | 1
1z | 1 | 1
24 | 1 | 1
26 | 1 | 1
28 | 1 | 1
2f | 1 | 1
30 | 1 | 1
345 | 1 | 1
37 | 1 | 1
39 | 1 | 1
3a | 1 | 1
3e | 1 | 1
3g | 1 | 1
3i | 1 | 1
3m | 1 | 1
3t | 1 | 1
3u | 1 | 1
40 | 1 | 1
41 | 1 | 1
44 | 1 | 1
45 | 1 | 1
48 | 1 | 1
4b | 1 | 1
4c | 1 | 1
4i | 1 | 1
4k | 1 | 1
4n | 1 | 1
4o | 1 | 1
4q | 1 | 1
4w | 1 | 1
4y | 1 | 1
51 | 1 | 1
55 | 1 | 1
56 | 1 | 1
5a | 1 | 1
5d | 1 | 1
5g | 1 | 1
5j | 1 | 1
5l | 1 | 1
5s | 1 | 1
5u | 1 | 1
5x | 1 | 1
64 | 1 | 1
68 | 1 | 1
6c | 1 | 1
6f | 1 | 1
6g | 1 | 1
6i | 1 | 1
6k | 1 | 1
6n | 1 | 1
6p | 1 | 1
6s | 1 | 1
6u | 1 | 1
6x | 1 | 1
72 | 1 | 1
7f | 1 | 1
7j | 1 | 1
7n | 1 | 1
7p | 1 | 1
7w | 1 | 1
7y | 1 | 1
7z | 1 | 1
80 | 1 | 1
82 | 1 | 1
85 | 1 | 1
8d | 1 | 1
8i | 1 | 1
8l | 1 | 1
8n | 1 | 1
8p | 1 | 1
8t | 1 | 1
8x | 1 | 1
95 | 1 | 1
97 | 1 | 1
9a | 1 | 1
9e | 1 | 1
9h | 1 | 1
9r | 1 | 1
9w | 1 | 1
qwerti | 1 | 1
(1146 rows)
select reset_tsearch();
NOTICE: TSearch cache cleaned
reset_tsearch
---------------
(1 row)
select to_tsquery('default', 'skies & books');
to_tsquery
----------------
'sky' & 'book'
(1 row)
select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('sea&thousand&years'));
rank_cd
---------
1.2
(1 row)
select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('granite&sea'));
rank_cd
----------
0.880303
(1 row)
select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('sea'));
rank_cd
---------
2
(1 row)
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('sea&thousand&years'));
get_covers
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964
(1 row)
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('granite&sea'));
get_covers
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964
(1 row)
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('sea'));
get_covers
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964
(1 row)
select headline('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
', to_tsquery('sea&thousand&years'));
headline
-----------------------------------------------------------------------------------------------------------------------
<b>sea</b> a <b>thousand</b> <b>years</b>,
A <b>thousand</b> <b>years</b> to trace
The granite features of this cliff
(1 row)
select headline('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
', to_tsquery('granite&sea'));
headline
----------------------------------------------------------------------------------------------
<b>sea</b> an hour one night
An hour of storm to place
The sculpture of these <b>granite</b>
(1 row)
select headline('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
', to_tsquery('sea'));
headline
-------------------------------------------------------------------------------------------
<b>sea</b> a thousand years,
A thousand years to trace
The granite features of this cliff
(1 row)
subdir = contrib/CFG_DIR
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
MODULE_big = dict_CFG_MODNAME
OBJS = CFG_OFILE
DATA_built = dict_CFG_MODNAME.sql
DOCS = README.CFG_MODNAME
PG_CPPFLAGS =
SHLIB_LINK = ../tsearch2/libtsearch2.a
include $(top_srcdir)/contrib/contrib-global.mk
Gendict - generate dictionary templates for contrib/tsearch2 module.
This utility aims to help people creating dictionary for contrib/tsearch v2
module. Particularly, it has built-in support for snowball stemmers.
Programming API to tsearch2 dictionaries is described in tsearch v2
documentation.
Prerequisities:
* PostgreSQL 7.3 and above.
* You need tsearch2 module sources already compiled
* Rights to install contrib modules
Usage:
run config.sh without parameters to see options and arguments
Usage:
./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ]
-v - be verbose
-d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME)
-C COMMENT - dictionary comment
Generate Snowball stemmer:
./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
-s - generate Snowball wrapper
-p - prefix of Snowball's function, (default DICTNAME)
Generate template dictionary:
./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
-c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.
These files will be used in Makefile.
-h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.
These files will be used in Makefile and subinclude.h
-i - dictionary has init method
Example 1:
Create Portuguese stemmer
0. cd PGSQL_SRC/contrib/tsearch2/gendict
1. Obtain stem.{c,h} files for Portuguese
wget http://snowball.tartarus.org/portuguese/stem.c
wget http://snowball.tartarus.org/portuguese/stem.h
2. Create template files for Portuguese
./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese'
Note, that argument for -p option should be *the same* as name of stemming
function in stem.c (without _stem)
A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt
directory.
3. Compile and install dictionary
cd PGSQL_SRC/contrib/dict_pt
make
make install
4. Test it
Sample portuguese words with the stemmed forms are available
from http://snowball.tartarus.org/portuguese/stemmer.html
createdb testdict
psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql
psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql
psql -d testdict -c "select lexize('pt','bobagem');"
lexize
---------
{bobag}
(1 row)
Here is what I have in pg_ts_dict table
psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';"
dict_name | dict_init | dict_initoption | dict_lexize | dict_comment
-----------+-----------+-----------------+-------------+---------------------------------
pt | 7177806 | | 7159330 | Snowball stemmer for Portuguese
(1 row)
Note, that you have already installed dictionary and corresponding
entry in tsearch configuration and you may modify it using
plain SQL commands, for example, specify stop words.
Example 2:
a) Simple template dictionary with init method
./config.sh -n wow -v -i -C WOW
b) Create simple template dict (without init method):
./config.sh -n wow -v -C WOW
The same as above, but dictionary will have not init method
Dictionaries obtained in a) and b) are fully working and ready
for use:
a) lowercase input word and remove it if it is a stop word
b) recognizes any word
c) Simple template dictionary with source files (with init method):
./config.sh -n wow -v -i -c a.c -h a.h -C WOW
Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory.
These files will be used in Makefile.
Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory.
These files will be used in Makefile and subinclude.h
d) Simple template dictionary with source files (without init method):
./config.sh -n wow -v -c a.c -h a.h -C WOW
The same as above, but dictionary will have not init method
After that you have sources in PGSQL_SRC/contrib/dict_wow and
you may edit them to create actual dictionary.
Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/)
for additional information about "Gendict tutorial" and dictionaries.
\ No newline at end of file
#!/bin/sh
usage () {
echo Usage:
echo $0 -n DICTNAME \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ]
echo ' -v - be verbose'
echo ' -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)'
echo ' -C COMMENT - dictionary comment'
echo Generate Snowball stemmer:
echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ]
echo ' -s - generate Snowball wrapper'
echo " -p - prefix of Snowball's function, (default DICTNAME)"
echo Generate template dictionary:
echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ]
echo ' -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.'
echo ' These files will be used in Makefile.'
echo ' -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.'
echo ' These files will be used in Makefile and subinclude.h'
echo ' -i - dictionary has init method'
exit 1;
}
dictname=
stemmode=no
verbose=no
cfile=
hfile=
dir=
hasinit=no
comment=
prefix=
while getopts n:c:C:h:d:p:vis opt
do
case "$opt" in
v) verbose=yes;;
s) stemmode=yes;;
i) hasinit=yes;;
n) dictname="$OPTARG";;
c) cfile="$OPTARG";;
h) hfile="$OPTARG";;
d) dir="$OPTARG";;
C) comment="$OPTARG";;
p) prefix="$OPTARG";;
\?) usage;;
esac
done
[ ${#dictname} -eq 0 ] && usage
dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'`
if [ $stemmode = "yes" ] ; then
[ ${#prefix} -eq 0 ] && prefix=$dictname
hasinit=yes
cfile="stem.c"
hfile="stem.h"
fi
[ ${#dir} -eq 0 ] && dir="dict_$dictname"
if [ ${#comment} -eq 0 ]; then
comment=null
else
comment="'$comment'"
fi
ofile=
for f in $cfile
do
f=` echo $f | sed 's#c$#o#'`
ofile="$ofile $f"
done
if [ $stemmode = "yes" ] ; then
ofile="$ofile dict_snowball.o"
else
ofile="$ofile dict_tmpl.o"
fi
if [ $verbose = "yes" ]; then
echo Dictname: "'"$dictname"'"
echo Snowball stemmer: $stemmode
echo Has init method: $hasinit
[ $stemmode = "yes" ] && echo Function prefix: $prefix
echo Source files: $cfile
echo Header files: $hfile
echo Object files: $ofile
echo Comment: $comment
echo Directory: ../../$dir
fi
[ $verbose = "yes" ] && echo -n 'Build directory... '
if [ ! -d ../../$dir ]; then
if ! mkdir ../../$dir ; then
echo "Can't create directory ../../$dir"
exit 1
fi
fi
[ $verbose = "yes" ] && echo ok
[ $verbose = "yes" ] && echo -n 'Build Makefile... '
sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp
if [ $stemmode = "yes" ] ; then
sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile
else
sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile
fi
rm ../../$dir/Makefile.tmp
[ $verbose = "yes" ] && echo ok
[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in... '
if [ $hasinit = "yes" ]; then
sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp
if [ $stemmode = "yes" ] ; then
sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
else
sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
fi
rm ../../$dir/dict_$dictname.sql.in.tmp
else
sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in
fi
[ $verbose = "yes" ] && echo ok
if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then
[ $verbose = "yes" ] && echo -n 'Copy source and header files... '
if [ ${#cfile} -ne 0 ] ; then
if ! cp $cfile ../../$dir ; then
echo "Cant cp all or one of files: $cfile"
exit 1
fi
fi
if [ ${#hfile} -ne 0 ] ; then
if ! cp $hfile ../../$dir ; then
echo "Cant cp all or one of files: $hfile"
exit 1
fi
fi
[ $verbose = "yes" ] && echo ok
fi
[ $verbose = "yes" ] && echo -n 'Build sub-include header... '
echo -n > ../../$dir/subinclude.h
for i in $hfile
do
echo "#include \"$i\"" >> ../../$dir/subinclude.h
done
[ $verbose = "yes" ] && echo ok
if [ $stemmode = "yes" ] ; then
[ $verbose = "yes" ] && echo -n 'Build Snowball stemmer... '
sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c
else
[ $verbose = "yes" ] && echo -n 'Build dictinonary... '
sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp
if [ $hasinit = "yes" ]; then
sed s#^HASINIT## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c
else
sed s#^HASINIT.*\$## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c
fi
rm ../../$dir/dict_tmpl.c.tmp
fi
[ $verbose = "yes" ] && echo ok
[ $verbose = "yes" ] && echo -n "Build README.$dictname... "
if [ $stemmode = "yes" ] ; then
echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname
else
echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname
fi
[ $verbose = "yes" ] && echo ok
echo All is done
/*
* example of Snowball dictionary
* http://snowball.tartarus.org/
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <stdlib.h>
#include <string.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
#include "snowball/header.h"
#include "subinclude.h"
typedef struct {
struct SN_env *z;
StopList stoplist;
int (*stem)(struct SN_env * z);
} DictSnowball;
PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
Datum
dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) );
if ( !d )
elog(ERROR, "No memory");
memset(d,0,sizeof(DictSnowball));
d->stoplist.wordop=lowerstr;
if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
text *in = PG_GETARG_TEXT_P(0);
readstoplist(in, &(d->stoplist));
sortstoplist(&(d->stoplist));
PG_FREE_IF_COPY(in, 0);
}
d->z = CFG_PREFIX_create_env();
if (!d->z) {
freestoplist(&(d->stoplist));
elog(ERROR,"No memory");
}
d->stem=CFG_PREFIX_stem;
PG_RETURN_POINTER(d);
}
/*
* example of dictionary
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
#include "subinclude.h"
HASINIT typedef struct {
HASINIT StopList stoplist;
HASINIT } DictExample;
HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME);
HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS);
HASINIT Datum
HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) {
HASINIT DictExample *d = (DictExample*)malloc( sizeof(DictExample) );
HASINIT
HASINIT if ( !d )
HASINIT elog(ERROR, "No memory");
HASINIT memset(d,0,sizeof(DictExample));
HASINIT
HASINIT d->stoplist.wordop=lowerstr;
HASINIT
HASINIT /* Your INIT code */
HASINIT
HASINIT if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) {
HASINIT text *in = PG_GETARG_TEXT_P(0);
HASINIT readstoplist(in, &(d->stoplist));
HASINIT sortstoplist(&(d->stoplist));
HASINIT PG_FREE_IF_COPY(in, 0);
HASINIT }
HASINIT
HASINIT PG_RETURN_POINTER(d);
HASINIT }
PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME);
Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS);
Datum
dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
HASINIT DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
char **res=palloc(sizeof(char*)*2);
/* Your INIT dictionary code */
HASINIT if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
HASINIT pfree(txt);
HASINIT res[0]=NULL;
HASINIT } else
res[0]=txt;
res[1]=NULL;
PG_RETURN_POINTER(res);
}
SET search_path = public;
BEGIN;
HASINIT create function dinit_CFG_MODNAME(text)
HASINIT returns internal
HASINIT as 'MODULE_PATHNAME'
HASINIT language 'C';
NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4)
NOSNOWBALL returns internal
NOSNOWBALL as 'MODULE_PATHNAME'
NOSNOWBALL language 'C'
NOSNOWBALL with (isstrict);
insert into pg_ts_dict select
'CFG_MODNAME',
HASINIT (select oid from pg_proc where proname='dinit_CFG_MODNAME'),
NOINIT null,
null,
ISSNOWBALL (select oid from pg_proc where proname='snb_lexize'),
NOSNOWBALL (select oid from pg_proc where proname='dlexize_CFG_MODNAME'),
CFG_COMMENT
;
END;
#include "postgres.h"
#include <float.h>
#include "access/gist.h"
#include "access/itup.h"
#include "access/rtree.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
#include "access/tuptoaster.h"
#include "tsvector.h"
#include "query.h"
#include "gistidx.h"
#include "crc32.h"
PG_FUNCTION_INFO_V1(gtsvector_in);
Datum gtsvector_in(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsvector_out);
Datum gtsvector_out(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsvector_compress);
Datum gtsvector_compress(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsvector_decompress);
Datum gtsvector_decompress(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsvector_consistent);
Datum gtsvector_consistent(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsvector_union);
Datum gtsvector_union(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsvector_same);
Datum gtsvector_same(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsvector_penalty);
Datum gtsvector_penalty(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsvector_picksplit);
Datum gtsvector_picksplit(PG_FUNCTION_ARGS);
#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
#define SUMBIT(val) ( \
GETBITBYTE(val,0) + \
GETBITBYTE(val,1) + \
GETBITBYTE(val,2) + \
GETBITBYTE(val,3) + \
GETBITBYTE(val,4) + \
GETBITBYTE(val,5) + \
GETBITBYTE(val,6) + \
GETBITBYTE(val,7) \
)
Datum
gtsvector_in(PG_FUNCTION_ARGS)
{
elog(ERROR, "Not implemented");
PG_RETURN_DATUM(0);
}
Datum
gtsvector_out(PG_FUNCTION_ARGS)
{
elog(ERROR, "Not implemented");
PG_RETURN_DATUM(0);
}
static int
compareint(const void *a, const void *b)
{
if (*((int4 *) a) == *((int4 *) b))
return 0;
return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
}
static int
uniqueint(int4 *a, int4 l)
{
int4 *ptr,
*res;
if (l == 1)
return l;
ptr = res = a;
qsort((void *) a, l, sizeof(int4), compareint);
while (ptr - a < l)
if (*ptr != *res)
*(++res) = *ptr++;
else
ptr++;
return res + 1 - a;
}
static void
makesign(BITVECP sign, GISTTYPE * a)
{
int4 k,
len = ARRNELEM(a);
int4 *ptr = GETARR(a);
MemSet((void *) sign, 0, sizeof(BITVEC));
for (k = 0; k < len; k++)
HASH(sign, ptr[k]);
}
Datum
gtsvector_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{ /* tsvector */
GISTTYPE *res;
tsvector *toastedval = (tsvector *) DatumGetPointer(entry->key);
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
int4 len;
int4 *arr;
WordEntry *ptr = ARRPTR(val);
char *words = STRPTR(val);
len = CALCGTSIZE(ARRKEY, val->size);
res = (GISTTYPE *) palloc(len);
res->len = len;
res->flag = ARRKEY;
arr = GETARR(res);
len = val->size;
while (len--)
{
*arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len);
arr++;
ptr++;
}
len = uniqueint(GETARR(res), val->size);
if (len != val->size)
{
/*
* there is a collision of hash-function; len is always less
* than val->size
*/
len = CALCGTSIZE(ARRKEY, len);
res = (GISTTYPE *) repalloc((void *) res, len);
res->len = len;
}
if (val != toastedval)
pfree(val);
/* make signature, if array is too long */
if (res->len > TOAST_INDEX_TARGET)
{
GISTTYPE *ressign;
len = CALCGTSIZE(SIGNKEY, 0);
ressign = (GISTTYPE *) palloc(len);
ressign->len = len;
ressign->flag = SIGNKEY;
makesign(GETSIGN(ressign), res);
pfree(res);
res = ressign;
}
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
entry->offset, res->len, FALSE);
}
else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
!ISALLTRUE(DatumGetPointer(entry->key)))
{
int4 i,
len;
GISTTYPE *res;
BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
LOOPBYTE(
if ((sign[i] & 0xff) != 0xff)
PG_RETURN_POINTER(retval);
);
len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
res = (GISTTYPE *) palloc(len);
res->len = len;
res->flag = SIGNKEY | ALLISTRUE;
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
entry->offset, res->len, FALSE);
}
PG_RETURN_POINTER(retval);
}
Datum
gtsvector_decompress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTTYPE *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
if (key != (GISTTYPE *) DatumGetPointer(entry->key))
{
GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, key->len, FALSE);
PG_RETURN_POINTER(retval);
}
PG_RETURN_POINTER(entry);
}
typedef struct
{
int4 *arrb;
int4 *arre;
} CHKVAL;
/*
* is there value 'val' in array or not ?
*/
static bool
checkcondition_arr(void *checkval, ITEM * val)
{
int4 *StopLow = ((CHKVAL *) checkval)->arrb;
int4 *StopHigh = ((CHKVAL *) checkval)->arre;
int4 *StopMiddle;
/* Loop invariant: StopLow <= val < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->val)
return (true);
else if (*StopMiddle < val->val)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
return (false);
}
static bool
checkcondition_bit(void *checkval, ITEM * val)
{
return GETBIT(checkval, HASHVAL(val->val));
}
Datum
gtsvector_consistent(PG_FUNCTION_ARGS)
{
QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(1);
GISTTYPE *key = (GISTTYPE *) DatumGetPointer(
((GISTENTRY *) PG_GETARG_POINTER(0))->key
);
if (!query->size)
PG_RETURN_BOOL(false);
if (ISSIGNKEY(key))
{
if (ISALLTRUE(key))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) GETSIGN(key), false,
checkcondition_bit
));
}
else
{ /* only leaf pages */
CHKVAL chkval;
chkval.arrb = GETARR(key);
chkval.arre = chkval.arrb + ARRNELEM(key);
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) &chkval, true,
checkcondition_arr
));
}
}
static int4
unionkey(BITVECP sbase, GISTTYPE * add)
{
int4 i;
if (ISSIGNKEY(add))
{
BITVECP sadd = GETSIGN(add);
if (ISALLTRUE(add))
return 1;
LOOPBYTE(
sbase[i] |= sadd[i];
);
}
else
{
int4 *ptr = GETARR(add);
for (i = 0; i < ARRNELEM(add); i++)
HASH(sbase, ptr[i]);
}
return 0;
}
Datum
gtsvector_union(PG_FUNCTION_ARGS)
{
bytea *entryvec = (bytea *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
BITVEC base;
int4 len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
int4 i;
int4 flag = 0;
GISTTYPE *result;
MemSet((void *) base, 0, sizeof(BITVEC));
for (i = 0; i < len; i++)
{
if (unionkey(base, GETENTRY(entryvec, i)))
{
flag = ALLISTRUE;
break;
}
}
flag |= SIGNKEY;
len = CALCGTSIZE(flag, 0);
result = (GISTTYPE *) palloc(len);
*size = result->len = len;
result->flag = flag;
if (!ISALLTRUE(result))
memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
PG_RETURN_POINTER(result);
}
Datum
gtsvector_same(PG_FUNCTION_ARGS)
{
GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0);
GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
if (ISSIGNKEY(a))
{ /* then b also ISSIGNKEY */
if (ISALLTRUE(a) && ISALLTRUE(b))
*result = true;
else if (ISALLTRUE(a))
*result = false;
else if (ISALLTRUE(b))
*result = false;
else
{
int4 i;
BITVECP sa = GETSIGN(a),
sb = GETSIGN(b);
*result = true;
LOOPBYTE(
if (sa[i] != sb[i])
{
*result = false;
break;
}
);
}
}
else
{ /* a and b ISARRKEY */
int4 lena = ARRNELEM(a),
lenb = ARRNELEM(b);
if (lena != lenb)
*result = false;
else
{
int4 *ptra = GETARR(a),
*ptrb = GETARR(b);
int4 i;
*result = true;
for (i = 0; i < lena; i++)
if (ptra[i] != ptrb[i])
{
*result = false;
break;
}
}
}
PG_RETURN_POINTER(result);
}
static int4
sizebitvec(BITVECP sign)
{
int4 size = 0,
i;
LOOPBYTE(
size += SUMBIT(*(char *) sign);
sign = (BITVECP) (((char *) sign) + 1);
);
return size;
}
static int
hemdistsign(BITVECP a, BITVECP b) {
int i,dist=0;
LOOPBIT(
if ( GETBIT(a,i) != GETBIT(b,i) )
dist++;
);
return dist;
}
static int
hemdist(GISTTYPE *a, GISTTYPE *b) {
if ( ISALLTRUE(a) ) {
if (ISALLTRUE(b))
return 0;
else
return SIGLENBIT-sizebitvec(GETSIGN(b));
} else if (ISALLTRUE(b))
return SIGLENBIT-sizebitvec(GETSIGN(a));
return hemdistsign( GETSIGN(a), GETSIGN(b) );
}
Datum
gtsvector_penalty(PG_FUNCTION_ARGS)
{
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
float *penalty = (float *) PG_GETARG_POINTER(2);
GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key);
GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key);
BITVECP orig = GETSIGN(origval);
*penalty = 0.0;
if (ISARRKEY(newval)) {
BITVEC sign;
makesign(sign, newval);
if ( ISALLTRUE(origval) )
*penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1);
else
*penalty=hemdistsign(sign,orig);
} else {
*penalty=hemdist(origval,newval);
}
PG_RETURN_POINTER(penalty);
}
typedef struct
{
bool allistrue;
BITVEC sign;
} CACHESIGN;
static void
fillcache(CACHESIGN * item, GISTTYPE * key)
{
item->allistrue = false;
if (ISARRKEY(key))
makesign(item->sign, key);
else if (ISALLTRUE(key))
item->allistrue = true;
else
memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
}
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
typedef struct
{
OffsetNumber pos;
int4 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
return 0;
else
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
}
static int
hemdistcache(CACHESIGN *a, CACHESIGN *b) {
if ( a->allistrue ) {
if (b->allistrue)
return 0;
else
return SIGLENBIT-sizebitvec(b->sign);
} else if (b->allistrue)
return SIGLENBIT-sizebitvec(a->sign);
return hemdistsign( a->sign, b->sign );
}
Datum
gtsvector_picksplit(PG_FUNCTION_ARGS)
{
bytea *entryvec = (bytea *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber k,
j;
GISTTYPE *datum_l,
*datum_r;
BITVECP union_l,
union_r;
int4 size_alpha,
size_beta;
int4 size_waste,
waste = -1;
int4 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
OffsetNumber maxoff;
BITVECP ptr;
int i;
CACHESIGN *cache;
SPLITCOST *costvector;
maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) {
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
if (k == FirstOffsetNumber)
fillcache(&cache[j], GETENTRY(entryvec, j));
size_waste=hemdistcache(&(cache[j]),&(cache[k]));
if (size_waste > waste) {
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if (seed_1 == 0 || seed_2 == 0) {
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if (cache[seed_1].allistrue) {
datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
datum_l->flag = SIGNKEY | ALLISTRUE;
} else {
datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
datum_l->len = CALCGTSIZE(SIGNKEY, 0);
datum_l->flag = SIGNKEY;
memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
}
if (cache[seed_2].allistrue) {
datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
datum_r->flag = SIGNKEY | ALLISTRUE;
} else {
datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0));
datum_r->len = CALCGTSIZE(SIGNKEY, 0);
datum_r->flag = SIGNKEY;
memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
}
union_l=GETSIGN(datum_l);
union_r=GETSIGN(datum_r);
maxoff = OffsetNumberNext(maxoff);
fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
/* sort before ... */
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
costvector[j - 1].pos = j;
size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
costvector[j - 1].cost = abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
for (k = 0; k < maxoff; k++) {
j = costvector[k].pos;
if (j == seed_1) {
*left++ = j;
v->spl_nleft++;
continue;
} else if (j == seed_2) {
*right++ = j;
v->spl_nright++;
continue;
}
if (ISALLTRUE(datum_l) || cache[j].allistrue) {
if ( ISALLTRUE(datum_l) && cache[j].allistrue )
size_alpha=0;
else
size_alpha = SIGLENBIT-sizebitvec(
( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
);
} else {
size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l));
}
if (ISALLTRUE(datum_r) || cache[j].allistrue) {
if ( ISALLTRUE(datum_r) && cache[j].allistrue )
size_beta=0;
else
size_beta = SIGLENBIT-sizebitvec(
( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
);
} else {
size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r));
}
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) {
if (ISALLTRUE(datum_l) || cache[j].allistrue) {
if (! ISALLTRUE(datum_l) )
MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
} else {
ptr=cache[j].sign;
LOOPBYTE(
union_l[i] |= ptr[i];
);
}
*left++ = j;
v->spl_nleft++;
} else {
if (ISALLTRUE(datum_r) || cache[j].allistrue) {
if (! ISALLTRUE(datum_r) )
MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
} else {
ptr=cache[j].sign;
LOOPBYTE(
union_r[i] |= ptr[i];
);
}
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
pfree(costvector);
pfree(cache);
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}
#ifndef __GISTIDX_H__
#define __GISTIDX_H__
/*
#define GISTIDX_DEBUG
*/
/*
* signature defines
*/
#define BITBYTE 8
#define SIGLENINT 63 /* >121 => key will toast, so it will not
* work !!! */
#define SIGLEN ( sizeof(int4)*SIGLENINT )
#define SIGLENBIT (SIGLEN*BITBYTE)
typedef char BITVEC[SIGLEN];
typedef char *BITVECP;
#define LOOPBYTE(a) \
for(i=0;i<SIGLEN;i++) {\
a;\
}
#define LOOPBIT(a) \
for(i=0;i<SIGLENBIT;i++) {\
a;\
}
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
#define abs(a) ((a) < (0) ? -(a) : (a))
#define min(a,b) ((a) < (b) ? (a) : (b))
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
/*
* type of index key
*/
typedef struct
{
int4 len;
int4 flag;
char data[1];
} GISTTYPE;
#define ARRKEY 0x01
#define SIGNKEY 0x02
#define ALLISTRUE 0x04
#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY )
#define ISSIGNKEY(x) ( ((GISTTYPE*)x)->flag & SIGNKEY )
#define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE )
#define GTHDRSIZE ( sizeof(int4)*2 )
#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) )
#define GETARR(x) ( (int4*)( (char*)x+GTHDRSIZE ) )
#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) )
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "spell.h"
#define MAXNORMLEN 56
#define STRNCASECMP(x,y) (strncasecmp(x,y,strlen(y)))
static int cmpspell(const void *s1,const void *s2){
return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word));
}
static void
strlower( char * str ) {
unsigned char *ptr = (unsigned char *)str;
while ( *ptr ) {
*ptr = tolower( *ptr );
ptr++;
}
}
/* backward string compaire for suffix tree operations */
static int
strbcmp(const char *s1, const char *s2) {
int l1 = strlen(s1)-1, l2 = strlen(s2)-1;
while (l1 >= 0 && l2 >= 0) {
if (s1[l1] < s2[l2]) return -1;
if (s1[l1] > s2[l2]) return 1;
l1--; l2--;
}
if (l1 < l2) return -1;
if (l1 > l2) return 1;
return 0;
}
static int
strbncmp(const char *s1, const char *s2, size_t count) {
int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count;
while (l1 >= 0 && l2 >= 0 && l > 0) {
if (s1[l1] < s2[l2]) return -1;
if (s1[l1] > s2[l2]) return 1;
l1--;
l2--;
l--;
}
if (l == 0) return 0;
if (l1 < l2) return -1;
if (l1 > l2) return 1;
return 0;
}
static int
cmpaffix(const void *s1,const void *s2){
if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1;
if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1;
if (((const AFFIX*)s1)->type == 'p')
return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
else
return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl));
}
int
AddSpell(IspellDict * Conf,const char * word,const char *flag){
if(Conf->nspell>=Conf->mspell){
if(Conf->mspell){
Conf->mspell+=1024*20;
Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL));
}else{
Conf->mspell=1024*20;
Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL));
}
if ( Conf->Spell == NULL )
elog(ERROR,"No memory for AddSpell");
}
Conf->Spell[Conf->nspell].word=strdup(word);
if ( !Conf->Spell[Conf->nspell].word )
elog(ERROR,"No memory for AddSpell");
strncpy(Conf->Spell[Conf->nspell].flag,flag,10);
Conf->nspell++;
return(0);
}
int
ImportDictionary(IspellDict * Conf,const char *filename){
unsigned char str[BUFSIZ];
FILE *dict;
if(!(dict=fopen(filename,"r")))return(1);
while(fgets(str,sizeof(str),dict)){
unsigned char *s;
const unsigned char *flag;
flag = NULL;
if((s=strchr(str,'/'))){
*s=0;
s++;flag=s;
while(*s){
if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z')))
s++;
else {
*s=0;
break;
}
}
}else{
flag="";
}
strlower(str);
/* Dont load words if first letter is not required */
/* It allows to optimize loading at search time */
s=str;
while(*s){
if(*s=='\r')*s=0;
if(*s=='\n')*s=0;
s++;
}
AddSpell(Conf,str,flag);
}
fclose(dict);
return(0);
}
static SPELL *
FindWord(IspellDict * Conf, const char *word, int affixflag) {
int l,c,r,resc,resl,resr, i;
i = (int)(*word) & 255;
l = Conf->SpellTree.Left[i];
r = Conf->SpellTree.Right[i];
if (l == -1) return (NULL);
while(l<=r){
c = (l + r) >> 1;
resc = strcmp(Conf->Spell[c].word, word);
if( (resc == 0) &&
((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) {
return(&Conf->Spell[c]);
}
resl = strcmp(Conf->Spell[l].word, word);
if( (resl == 0) &&
((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) {
return(&Conf->Spell[l]);
}
resr = strcmp(Conf->Spell[r].word, word);
if( (resr == 0) &&
((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) {
return(&Conf->Spell[r]);
}
if(resc < 0){
l = c + 1;
r--;
} else if(resc > 0){
r = c - 1;
l++;
} else {
l++;
r--;
}
}
return(NULL);
}
int
AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) {
if(Conf->naffixes>=Conf->maffixes){
if(Conf->maffixes){
Conf->maffixes+=16;
Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX));
}else{
Conf->maffixes=16;
Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX));
}
if ( Conf->Affix == NULL )
elog(ERROR,"No memory for AddAffix");
}
if (type=='s') {
sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask);
} else {
sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask);
}
Conf->Affix[Conf->naffixes].compile = 1;
Conf->Affix[Conf->naffixes].flag=flag;
Conf->Affix[Conf->naffixes].type=type;
strcpy(Conf->Affix[Conf->naffixes].find,find);
strcpy(Conf->Affix[Conf->naffixes].repl,repl);
Conf->Affix[Conf->naffixes].replen=strlen(repl);
Conf->naffixes++;
return(0);
}
static char *
remove_spaces(char *dist,char *src){
char *d,*s;
d=dist;
s=src;
while(*s){
if(*s!=' '&&*s!='-'&&*s!='\t'){
*d=*s;
d++;
}
s++;
}
*d=0;
return(dist);
}
int
ImportAffixes(IspellDict * Conf,const char *filename){
unsigned char str[BUFSIZ];
unsigned char flag=0;
unsigned char mask[BUFSIZ]="";
unsigned char find[BUFSIZ]="";
unsigned char repl[BUFSIZ]="";
unsigned char *s;
int i;
int suffixes=0;
int prefixes=0;
FILE *affix;
if(!(affix=fopen(filename,"r")))
return(1);
while(fgets(str,sizeof(str),affix)){
if(!STRNCASECMP(str,"suffixes")){
suffixes=1;
prefixes=0;
continue;
}
if(!STRNCASECMP(str,"prefixes")){
suffixes=0;
prefixes=1;
continue;
}
if(!STRNCASECMP(str,"flag ")){
s=str+5;
while(strchr("* ",*s))
s++;
flag=*s;
continue;
}
if((!suffixes)&&(!prefixes))continue;
if((s=strchr(str,'#')))*s=0;
if(!*str)continue;
strlower(str);
strcpy(mask,"");
strcpy(find,"");
strcpy(repl,"");
i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl);
remove_spaces(str,repl);strcpy(repl,str);
remove_spaces(str,find);strcpy(find,str);
remove_spaces(str,mask);strcpy(mask,str);
switch(i){
case 3:
break;
case 2:
if(*find != '\0'){
strcpy(repl,find);
strcpy(find,"");
}
break;
default:
continue;
}
AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p');
}
fclose(affix);
return(0);
}
void
SortDictionary(IspellDict * Conf){
int CurLet = -1, Let;size_t i;
qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell);
for(i = 0; i < 256 ; i++ )
Conf->SpellTree.Left[i] = -1;
for(i = 0; i < Conf->nspell; i++) {
Let = (int)(*(Conf->Spell[i].word)) & 255;
if (CurLet != Let) {
Conf->SpellTree.Left[Let] = i;
CurLet = Let;
}
Conf->SpellTree.Right[Let] = i;
}
}
void
SortAffixes(IspellDict * Conf) {
int CurLetP = -1, CurLetS = -1, Let;
AFFIX *Affix; size_t i;
if (Conf->naffixes > 1)
qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix);
for(i = 0; i < 256; i++) {
Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1;
Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1;
}
for(i = 0; i < Conf->naffixes; i++) {
Affix = &(((AFFIX*)Conf->Affix)[i]);
if(Affix->type == 'p') {
Let = (int)(*(Affix->repl)) & 255;
if (CurLetP != Let) {
Conf->PrefixTree.Left[Let] = i;
CurLetP = Let;
}
Conf->PrefixTree.Right[Let] = i;
} else {
Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0;
if (CurLetS != Let) {
Conf->SuffixTree.Left[Let] = i;
CurLetS = Let;
}
Conf->SuffixTree.Right[Let] = i;
}
}
}
static char *
CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) {
regmatch_t subs[2]; /* workaround for apache&linux */
char newword[2*MAXNORMLEN] = "";
int err;
*res = strbncmp(word, Affix->repl, Affix->replen);
if (*res < 0) {
return NULL;
}
if (*res > 0) {
return NULL;
}
strcpy(newword, word);
strcpy(newword+len-Affix->replen, Affix->find);
if (Affix->compile) {
err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
if(err){
/*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
regfree(&(Affix->reg));
return(NULL);
}
Affix->compile = 0;
}
if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
if(FindWord(Conf, newword, Affix->flag))
return pstrdup(newword);
}
return NULL;
}
#define NS 1
#define MAX_NORM 512
static int
CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi,
char **forms, char ***cur ) {
regmatch_t subs[NS*2];
char newword[2*MAXNORMLEN] = "";
int err, ls, res, lres;
size_t newlen;
AFFIX *CAffix = Conf->Affix;
res = strncmp(word, Affix->repl, Affix->replen);
if (res != 0) {
return res;
}
strcpy(newword, Affix->find);
strcat(newword, word+Affix->replen);
if (Affix->compile) {
err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB);
if(err){
/*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/
regfree(&(Affix->reg));
return (0);
}
Affix->compile = 0;
}
if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){
SPELL * curspell;
if((curspell=FindWord(Conf, newword, Affix->flag))){
if ((*cur - forms) < (MAX_NORM-1)) {
**cur = pstrdup(newword);
(*cur)++; **cur = NULL;
}
}
newlen = strlen(newword);
ls = Conf->SuffixTree.Left[pi];
if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) {
**cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf);
if (**cur) {
(*cur)++; **cur = NULL;
}
}
}
return 0;
}
char **
NormalizeWord(IspellDict * Conf,char *word){
/*regmatch_t subs[NS];*/
size_t len;
char ** forms;
char **cur;
AFFIX * Affix;
int ri, pi, ipi, lp, rp, cp, ls, rs;
int lres, rres, cres = 0;
SPELL *spell;
len=strlen(word);
if (len > MAXNORMLEN)
return(NULL);
strlower(word);
forms=(char **) palloc(MAX_NORM*sizeof(char **));
cur=forms;*cur=NULL;
ri = (int)(*word) & 255;
pi = (int)(word[strlen(word)-1]) & 255;
Affix=(AFFIX*)Conf->Affix;
/* Check that the word itself is normal form */
if((spell = FindWord(Conf, word, 0))){
*cur=pstrdup(word);
cur++;*cur=NULL;
}
/* Find all other NORMAL forms of the 'word' */
for (ipi = 0; ipi <= pi; ipi += pi) {
/* check prefix */
lp = Conf->PrefixTree.Left[ri];
rp = Conf->PrefixTree.Right[ri];
while (lp >= 0 && lp <= rp) {
cp = (lp + rp) >> 1;
cres = 0;
if ((cur - forms) < (MAX_NORM-1)) {
cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur);
}
if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) {
lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur);
}
if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) {
rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur);
}
if (cres < 0) {
rp = cp - 1;
lp++;
} else if (cres > 0) {
lp = cp + 1;
rp--;
} else {
lp++;
rp--;
}
}
/* check suffix */
ls = Conf->SuffixTree.Left[ipi];
rs = Conf->SuffixTree.Right[ipi];
while (ls >= 0 && ls <= rs) {
if ( ((cur - forms) < (MAX_NORM-1)) ) {
*cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf);
if (*cur) {
cur++; *cur = NULL;
}
}
if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) {
*cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf);
if (*cur) {
cur++; *cur = NULL;
}
}
ls++;
rs--;
} /* end while */
} /* for ipi */
if(cur==forms){
pfree(forms);
return(NULL);
}
return(forms);
}
void
FreeIspell (IspellDict *Conf) {
int i;
AFFIX *Affix = (AFFIX *)Conf->Affix;
for (i = 0; i < Conf->naffixes; i++) {
if (Affix[i].compile == 0) {
regfree(&(Affix[i].reg));
}
}
for (i = 0; i < Conf->naffixes; i++) {
free( Conf->Spell[i].word );
}
free(Conf->Affix);
free(Conf->Spell);
memset( (void*)Conf, 0, sizeof(IspellDict) );
return;
}
#ifndef __SPELL_H__
#define __SPELL_H__
#include <sys/types.h>
#include <regex.h>
typedef struct spell_struct {
char * word;
char flag[10];
} SPELL;
typedef struct aff_struct {
char flag;
char type;
char mask[33];
char find[16];
char repl[16];
regex_t reg;
size_t replen;
char compile;
} AFFIX;
typedef struct Tree_struct {
int Left[256], Right[256];
} Tree_struct;
typedef struct {
int maffixes;
int naffixes;
AFFIX * Affix;
int nspell;
int mspell;
SPELL *Spell;
Tree_struct SpellTree;
Tree_struct PrefixTree;
Tree_struct SuffixTree;
} IspellDict;
char ** NormalizeWord(IspellDict * Conf,char *word);
int ImportAffixes(IspellDict * Conf, const char *filename);
int ImportDictionary(IspellDict * Conf,const char *filename);
int AddSpell(IspellDict * Conf,const char * word,const char *flag);
int AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type);
void SortDictionary(IspellDict * Conf);
void SortAffixes(IspellDict * Conf);
void FreeIspell (IspellDict *Conf);
#endif
/*
* Simple config parser
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "dict.h"
#include "common.h"
#define CS_WAITKEY 0
#define CS_INKEY 1
#define CS_WAITEQ 2
#define CS_WAITVALUE 3
#define CS_INVALUE 4
#define CS_IN2VALUE 5
#define CS_WAITDELIM 6
#define CS_INESC 7
#define CS_IN2ESC 8
static char *
nstrdup(char *ptr, int len) {
char *res=palloc(len+1), *cptr;
memcpy(res,ptr,len);
res[len]='\0';
cptr = ptr = res;
while(*ptr) {
if ( *ptr == '\\' )
ptr++;
*cptr=*ptr; ptr++; cptr++;
}
*cptr='\0';
return res;
}
void
parse_cfgdict(text *in, Map **m) {
Map *mptr;
char *ptr=VARDATA(in), *begin=NULL;
char num=0;
int state=CS_WAITKEY;
while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
if ( *ptr==',' ) num++;
ptr++;
}
*m=mptr=(Map*)palloc( sizeof(Map)*(num+2) );
memset(mptr, 0, sizeof(Map)*(num+2) );
ptr=VARDATA(in);
while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) {
if (state==CS_WAITKEY) {
if (isalpha(*ptr)) {
begin=ptr;
state=CS_INKEY;
} else if ( !isspace(*ptr) )
elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
} else if (state==CS_INKEY) {
if ( isspace(*ptr) ) {
mptr->key=nstrdup(begin, ptr-begin);
state=CS_WAITEQ;
} else if ( *ptr=='=' ) {
mptr->key=nstrdup(begin, ptr-begin);
state=CS_WAITVALUE;
} else if ( !isalpha(*ptr) )
elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
} else if ( state==CS_WAITEQ ) {
if ( *ptr=='=' )
state=CS_WAITVALUE;
else if ( !isspace(*ptr) )
elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
} else if ( state==CS_WAITVALUE ) {
if ( *ptr=='"' ) {
begin=ptr+1;
state=CS_INVALUE;
} else if ( !isspace(*ptr) ) {
begin=ptr;
state=CS_IN2VALUE;
}
} else if ( state==CS_INVALUE ) {
if ( *ptr=='"' ) {
mptr->value = nstrdup(begin, ptr-begin);
mptr++;
state=CS_WAITDELIM;
} else if ( *ptr=='\\' )
state=CS_INESC;
} else if ( state==CS_IN2VALUE ) {
if ( isspace(*ptr) || *ptr==',' ) {
mptr->value = nstrdup(begin, ptr-begin);
mptr++;
state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM;
} else if ( *ptr=='\\' )
state=CS_INESC;
} else if ( state==CS_WAITDELIM ) {
if ( *ptr==',' )
state=CS_WAITKEY;
else if ( !isspace(*ptr) )
elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr);
} else if ( state == CS_INESC ) {
state=CS_INVALUE;
} else if ( state == CS_IN2ESC ) {
state=CS_IN2VALUE;
} else
elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr);
ptr++;
}
if (state==CS_IN2VALUE) {
mptr->value = nstrdup(begin, ptr-begin);
mptr++;
} else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) )
elog(ERROR,"Unexpected end of line");
}
/*
* IO definitions for tsquery and mtsquery. This type
* are identical, but for parsing mtsquery used parser for text
* and also morphology is used.
* Internal structure:
* query tree, then string with original value.
* Query tree with plain view. It's means that in array of nodes
* right child is always next and left position = item+item->left
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include <float.h>
#include <ctype.h>
#include "access/gist.h"
#include "access/itup.h"
#include "access/rtree.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
#include "ts_cfg.h"
#include "tsvector.h"
#include "crc32.h"
#include "query.h"
#include "rewrite.h"
#include "common.h"
PG_FUNCTION_INFO_V1(tsquery_in);
Datum tsquery_in(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsquery_out);
Datum tsquery_out(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(exectsq);
Datum exectsq(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(rexectsq);
Datum rexectsq(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsquerytree);
Datum tsquerytree(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsquery);
Datum to_tsquery(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsquery_name);
Datum to_tsquery_name(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsquery_current);
Datum to_tsquery_current(PG_FUNCTION_ARGS);
#define END 0
#define ERR 1
#define VAL 2
#define OPR 3
#define OPEN 4
#define CLOSE 5
#define VALTRUE 6 /* for stop words */
#define VALFALSE 7
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
/*
* node of query tree, also used
* for storing polish notation in parser
*/
typedef struct NODE
{
int2 weight;
int2 type;
int4 val;
int2 distance;
int2 length;
struct NODE *next;
} NODE;
typedef struct
{
char *buf;
int4 state;
int4 count;
/* reverse polish notation in list (for temprorary usage) */
NODE *str;
/* number in str */
int4 num;
/* user-friendly operand */
int4 lenop;
int4 sumlen;
char *op;
char *curop;
/* state for value's parser */
TI_IN_STATE valstate;
/* tscfg */
int cfg_id;
} QPRS_STATE;
static char*
get_weight(char *buf, int2 *weight) {
*weight = 0;
if ( *buf != ':' )
return buf;
buf++;
while( *buf ) {
switch(tolower(*buf)) {
case 'a': *weight |= 1<<3; break;
case 'b': *weight |= 1<<2; break;
case 'c': *weight |= 1<<1; break;
case 'd': *weight |= 1; break;
default: return buf;
}
buf++;
}
return buf;
}
/*
* get token from query string
*/
static int4
gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
{
while (1)
{
switch (state->state)
{
case WAITOPERAND:
if (*(state->buf) == '!')
{
(state->buf)++;
*val = (int4) '!';
return OPR;
}
else if (*(state->buf) == '(')
{
state->count++;
(state->buf)++;
return OPEN;
} else if ( *(state->buf) == ':' ) {
elog(ERROR,"Error at start of operand");
} else if (*(state->buf) != ' ') {
state->valstate.prsbuf = state->buf;
state->state = WAITOPERATOR;
if (gettoken_tsvector(&(state->valstate)))
{
*strval = state->valstate.word;
*lenval = state->valstate.curpos - state->valstate.word;
state->buf = get_weight(state->valstate.prsbuf, weight);
return VAL;
}
else
elog(ERROR, "No operand");
}
break;
case WAITOPERATOR:
if (*(state->buf) == '&' || *(state->buf) == '|')
{
state->state = WAITOPERAND;
*val = (int4) *(state->buf);
(state->buf)++;
return OPR;
}
else if (*(state->buf) == ')')
{
(state->buf)++;
state->count--;
return (state->count < 0) ? ERR : CLOSE;
}
else if (*(state->buf) == '\0')
return (state->count) ? ERR : END;
else if (*(state->buf) != ' ')
return ERR;
break;
default:
return ERR;
break;
}
(state->buf)++;
}
return END;
}
/*
* push new one in polish notation reverse view
*/
static void
pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
{
NODE *tmp = (NODE *) palloc(sizeof(NODE));
tmp->weight = weight;
tmp->type = type;
tmp->val = val;
if (distance >= MAXSTRPOS)
elog(ERROR, "Value is too big");
if (lenval >= MAXSTRLEN)
elog(ERROR, "Operand is too long");
tmp->distance = distance;
tmp->length = lenval;
tmp->next = state->str;
state->str = tmp;
state->num++;
}
/*
* This function is used for tsquery parsing
*/
static void
pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
{
if (lenval >= MAXSTRLEN)
elog(ERROR, "Word is too long");
pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
state->curop - state->op, lenval, weight);
while (state->curop - state->op + lenval + 1 >= state->lenop)
{
int4 tmp = state->curop - state->op;
state->lenop *= 2;
state->op = (char *) repalloc((void *) state->op, state->lenop);
state->curop = state->op + tmp;
}
memcpy((void *) state->curop, (void *) strval, lenval);
state->curop += lenval;
*(state->curop) = '\0';
state->curop++;
state->sumlen += lenval + 1;
return;
}
/*
* This function is used for morph parsing
*/
static void
pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
{
int4 count = 0;
PRSTEXT prs;
prs.lenwords = 32;
prs.curwords = 0;
prs.pos = 0;
prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
for(count=0;count<prs.curwords;count++) {
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
pfree( prs.words[count].word );
if (count)
pushquery(state, OPR, (int4) '&', 0, 0, 0 );
}
pfree(prs.words);
/* XXX */
if ( prs.curwords==0 )
pushval_asis(state, VALTRUE, 0, 0, 0);
}
#define STACKDEPTH 32
/*
* make polish notaion of query
*/
static int4
makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
{
int4 val,
type;
int4 lenval;
char *strval;
int4 stack[STACKDEPTH];
int4 lenstack = 0;
int2 weight;
while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
{
switch (type)
{
case VAL:
(*pushval) (state, VAL, strval, lenval, weight);
while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
stack[lenstack - 1] == (int4) '!'))
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
}
break;
case OPR:
if (lenstack && val == (int4) '|')
pushquery(state, OPR, val, 0, 0, 0);
else
{
if (lenstack == STACKDEPTH)
elog(ERROR, "Stack too short");
stack[lenstack] = val;
lenstack++;
}
break;
case OPEN:
if (makepol(state, pushval) == ERR)
return ERR;
if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
stack[lenstack - 1] == (int4) '!'))
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
}
break;
case CLOSE:
while (lenstack)
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
};
return END;
break;
case ERR:
default:
elog(ERROR, "Syntax error");
return ERR;
}
}
while (lenstack)
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
};
return END;
}
typedef struct
{
WordEntry *arrb;
WordEntry *arre;
char *values;
char *operand;
} CHKVAL;
/*
* compare 2 string values
*/
static int4
ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
{
if (ptr->len == item->length)
return strncmp(
&(chkval->values[ptr->pos]),
&(chkval->operand[item->distance]),
item->length);
return (ptr->len > item->length) ? 1 : -1;
}
/*
* check weight info
*/
static bool
checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) {
WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16));
uint16 len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) );
while (len--) {
if ( item->weight & ( 1<<ptr->weight ) )
return true;
ptr++;
}
return false;
}
/*
* is there value 'val' in array or not ?
*/
static bool
checkcondition_str(void *checkval, ITEM * val)
{
WordEntry *StopLow = ((CHKVAL *) checkval)->arrb;
WordEntry *StopHigh = ((CHKVAL *) checkval)->arre;
WordEntry *StopMiddle;
int difference;
/* Loop invariant: StopLow <= val < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
if (difference == 0)
return ( val->weight && StopMiddle->haspos ) ?
checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
return (false);
}
/*
* check for boolean condition
*/
bool
TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
{
if (curitem->type == VAL)
return (*chkcond) (checkval, curitem);
else if (curitem->val == (int4) '!')
{
return (calcnot) ?
((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
: true;
}
else if (curitem->val == (int4) '&')
{
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
else
return false;
}
else
{ /* |-operator */
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
return true;
else
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
}
return false;
}
/*
* boolean operations
*/
Datum
rexectsq(PG_FUNCTION_ARGS)
{
return DirectFunctionCall2(
exectsq,
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(0)
);
}
Datum
exectsq(PG_FUNCTION_ARGS)
{
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
CHKVAL chkval;
bool result;
if (!val->size || !query->size)
{
PG_FREE_IF_COPY(val, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_BOOL(false);
}
chkval.arrb = ARRPTR(val);
chkval.arre = chkval.arrb + val->size;
chkval.values = STRPTR(val);
chkval.operand = GETOPERAND(query);
result = TS_execute(
GETQUERY(query),
&chkval,
true,
checkcondition_str
);
PG_FREE_IF_COPY(val, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_BOOL(result);
}
/*
* find left operand in polish notation view
*/
static void
findoprnd(ITEM * ptr, int4 *pos)
{
#ifdef BS_DEBUG
elog(DEBUG3, (ptr[*pos].type == OPR) ?
"%d %c" : "%d %d ", *pos, ptr[*pos].val);
#endif
if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
{
ptr[*pos].left = 0;
(*pos)++;
}
else if (ptr[*pos].val == (int4) '!')
{
ptr[*pos].left = 1;
(*pos)++;
findoprnd(ptr, pos);
}
else
{
ITEM *curitem = &ptr[*pos];
int4 tmp = *pos;
(*pos)++;
findoprnd(ptr, pos);
curitem->left = *pos - tmp;
findoprnd(ptr, pos);
}
}
/*
* input
*/
static QUERYTYPE *
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
{
QPRS_STATE state;
int4 i;
QUERYTYPE *query;
int4 commonlen;
ITEM *ptr;
NODE *tmp;
int4 pos = 0;
#ifdef BS_DEBUG
char pbuf[16384],
*cur;
#endif
/* init state */
state.buf = buf;
state.state = WAITOPERAND;
state.count = 0;
state.num = 0;
state.str = NULL;
state.cfg_id=cfg_id;
/* init value parser's state */
state.valstate.oprisdelim = true;
state.valstate.len = 32;
state.valstate.word = (char *) palloc(state.valstate.len);
/* init list of operand */
state.sumlen = 0;
state.lenop = 64;
state.curop = state.op = (char *) palloc(state.lenop);
*(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */
makepol(&state, pushval);
pfree(state.valstate.word);
if (!state.num)
elog(ERROR, "Empty query");
/* make finish struct */
commonlen = COMPUTESIZE(state.num, state.sumlen);
query = (QUERYTYPE *) palloc(commonlen);
query->len = commonlen;
query->size = state.num;
ptr = GETQUERY(query);
/* set item in polish notation */
for (i = 0; i < state.num; i++)
{
ptr[i].weight = state.str->weight;
ptr[i].type = state.str->type;
ptr[i].val = state.str->val;
ptr[i].distance = state.str->distance;
ptr[i].length = state.str->length;
tmp = state.str->next;
pfree(state.str);
state.str = tmp;
}
/* set user friendly-operand view */
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op);
/* set left operand's position for every operator */
pos = 0;
findoprnd(ptr, &pos);
#ifdef BS_DEBUG
cur = pbuf;
*cur = '\0';
for (i = 0; i < query->size; i++)
{
if (ptr[i].type == OPR)
sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
else
sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
cur = strchr(cur, '\0');
}
elog(DEBUG3, "POR: %s", pbuf);
#endif
return query;
}
/*
* in without morphology
*/
Datum
tsquery_in(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
}
/*
* out function
*/
typedef struct
{
ITEM *curpol;
char *buf;
char *cur;
char *op;
int4 buflen;
} INFIX;
#define RESIZEBUF(inf,addsize) \
while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
{ \
int4 len = inf->cur - inf->buf; \
inf->buflen *= 2; \
inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
inf->cur = inf->buf + len; \
}
/*
* recursive walk on tree and print it in
* infix (human-readable) view
*/
static void
infix(INFIX * in, bool first)
{
if (in->curpol->type == VAL)
{
char *op = in->op + in->curpol->distance;
RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
*(in->cur) = '\'';
in->cur++;
while (*op)
{
if (*op == '\'')
{
*(in->cur) = '\\';
in->cur++;
}
*(in->cur) = *op;
op++;
in->cur++;
}
*(in->cur) = '\'';
in->cur++;
if ( in->curpol->weight ) {
*(in->cur) = ':'; in->cur++;
if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; }
if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; }
if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; }
if ( in->curpol->weight & 1 ) { *(in->cur) = 'D'; in->cur++; }
}
*(in->cur) = '\0';
in->curpol++;
}
else if (in->curpol->val == (int4) '!')
{
bool isopr = false;
RESIZEBUF(in, 1);
*(in->cur) = '!';
in->cur++;
*(in->cur) = '\0';
in->curpol++;
if (in->curpol->type == OPR)
{
isopr = true;
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
infix(in, isopr);
if (isopr)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
in->cur = strchr(in->cur, '\0');
}
}
else
{
int4 op = in->curpol->val;
INFIX nrm;
in->curpol++;
if (op == (int4) '|' && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
nrm.curpol = in->curpol;
nrm.op = in->op;
nrm.buflen = 16;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
/* get right operand */
infix(&nrm, false);
/* get & print left operand */
in->curpol = nrm.curpol;
infix(in, false);
/* print operator & right operand */
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
sprintf(in->cur, " %c %s", op, nrm.buf);
in->cur = strchr(in->cur, '\0');
pfree(nrm.buf);
if (op == (int4) '|' && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
in->cur = strchr(in->cur, '\0');
}
}
}
Datum
tsquery_out(PG_FUNCTION_ARGS)
{
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
INFIX nrm;
if (query->size == 0)
{
char *b = palloc(1);
*b = '\0';
PG_RETURN_POINTER(b);
}
nrm.curpol = GETQUERY(query);
nrm.buflen = 32;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
*(nrm.cur) = '\0';
nrm.op = GETOPERAND(query);
infix(&nrm, true);
PG_FREE_IF_COPY(query, 0);
PG_RETURN_POINTER(nrm.buf);
}
/*
* debug function, used only for view query
* which will be executed in non-leaf pages in index
*/
Datum
tsquerytree(PG_FUNCTION_ARGS)
{
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
INFIX nrm;
text *res;
ITEM *q;
int4 len;
if (query->size == 0)
{
res = (text *) palloc(VARHDRSZ);
VARATT_SIZEP(res) = VARHDRSZ;
PG_RETURN_POINTER(res);
}
q = clean_NOT_v2(GETQUERY(query), &len);
if (!q)
{
res = (text *) palloc(1 + VARHDRSZ);
VARATT_SIZEP(res) = 1 + VARHDRSZ;
*((char *) VARDATA(res)) = 'T';
}
else
{
nrm.curpol = q;
nrm.buflen = 32;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
*(nrm.cur) = '\0';
nrm.op = GETOPERAND(query);
infix(&nrm, true);
res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
pfree(q);
}
PG_FREE_IF_COPY(query, 0);
PG_RETURN_POINTER(res);
}
Datum
to_tsquery(PG_FUNCTION_ARGS) {
text *in = PG_GETARG_TEXT_P(1);
char *str;
QUERYTYPE *query;
ITEM *res;
int4 len;
str=text2char(in);
PG_FREE_IF_COPY(in,1);
query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
res = clean_fakeval_v2(GETQUERY(query), &len);
if (!res)
{
query->len = HDRSIZEQT;
query->size = 0;
PG_RETURN_POINTER(query);
}
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
pfree(res);
PG_RETURN_POINTER(query);
}
Datum
to_tsquery_name(PG_FUNCTION_ARGS) {
text *name=PG_GETARG_TEXT_P(0);
Datum res= DirectFunctionCall2(
to_tsquery,
Int32GetDatum( name2id_cfg(name) ),
PG_GETARG_DATUM(1)
);
PG_FREE_IF_COPY(name,1);
PG_RETURN_DATUM(res);
}
Datum
to_tsquery_current(PG_FUNCTION_ARGS) {
PG_RETURN_DATUM( DirectFunctionCall2(
to_tsquery,
Int32GetDatum( get_currcfg() ),
PG_GETARG_DATUM(0)
));
}
#ifndef __QUERY_H__
#define __QUERY_H__
/*
#define BS_DEBUG
*/
/*
* item in polish notation with back link
* to left operand
*/
typedef struct ITEM
{
int8 type;
int8 weight;
int2 left;
int4 val;
/* user-friendly value, must correlate with WordEntry */
uint32
unused:1,
length:11,
distance:20;
} ITEM;
/*
*Storage:
* (len)(size)(array of ITEM)(array of operand in user-friendly form)
*/
typedef struct
{
int4 len;
int4 size;
char data[1];
} QUERYTYPE;
#define HDRSIZEQT ( 2*sizeof(int4) )
#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand )
#define GETQUERY(x) (ITEM*)( (char*)(x)+HDRSIZEQT )
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) )
#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' )
#define END 0
#define ERR 1
#define VAL 2
#define OPR 3
#define OPEN 4
#define CLOSE 5
#define VALTRUE 6 /* for stop words */
#define VALFALSE 7
bool TS_execute(ITEM * curitem, void *checkval,
bool calcnot, bool (*chkcond) (void *checkval, ITEM * val));
#endif
/*
* Relevation
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include <math.h>
#include "access/gist.h"
#include "access/itup.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/builtins.h"
#include "fmgr.h"
#include "funcapi.h"
#include "storage/bufpage.h"
#include "executor/spi.h"
#include "commands/trigger.h"
#include "nodes/pg_list.h"
#include "catalog/namespace.h"
#include "utils/array.h"
#include "tsvector.h"
#include "query.h"
#include "common.h"
PG_FUNCTION_INFO_V1(rank);
Datum rank(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(rank_def);
Datum rank_def(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(rank_cd);
Datum rank_cd(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(rank_cd_def);
Datum rank_cd_def(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(get_covers);
Datum get_covers(PG_FUNCTION_ARGS);
static float weights[]={0.1, 0.2, 0.4, 1.0};
#define wpos(wep) ( w[ ((WordEntryPos*)(wep))->weight ] )
#define DEF_NORM_METHOD 0
/*
* Returns a weight of a word collocation
*/
static float4 word_distance ( int4 w ) {
if ( w>100 )
return 1e-30;
return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
}
static int
cnt_length( tsvector *t ) {
WordEntry *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
int len = 0, clen;
while(ptr < end) {
if ( (clen=POSDATALEN(t, ptr)) == 0 )
len += 1;
else
len += clen;
ptr++;
}
return len;
}
static int4
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
if (ptr->len == item->length)
return strncmp(
eval + ptr->pos,
qval + item->distance,
item->length);
return (ptr->len > item->length) ? 1 : -1;
}
static WordEntry*
find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry*)STRPTR(t);
WordEntry *StopMiddle;
int difference;
/* Loop invariant: StopLow <= item < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
if (difference == 0)
return StopMiddle;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
return NULL;
}
static WordEntryPos POSNULL[]={
{0,0},
{0,MAXENTRYPOS-1}
};
static float
calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
int i,k,l,p;
WordEntry *entry;
WordEntryPos *post,*ct;
int4 dimt,lenct,dist;
float res=-1.0;
ITEM *item=GETQUERY(q);
memset(pos,0,sizeof(uint16**) * q->size);
*(uint16*)POSNULL = lengthof(POSNULL)-1;
for(i=0; i<q->size; i++) {
if ( item[i].type != VAL )
continue;
entry=find_wordentry(t,q,&(item[i]));
if ( !entry )
continue;
if ( entry->haspos )
pos[i] = (uint16*)_POSDATAPTR(t,entry);
else
pos[i] = (uint16*)POSNULL;
dimt = *(uint16*)(pos[i]);
post = (WordEntryPos*)(pos[i]+1);
for( k=0; k<i; k++ ) {
if ( !pos[k] ) continue;
lenct = *(uint16*)(pos[k]);
ct = (WordEntryPos*)(pos[k]+1);
for(l=0; l<dimt; l++) {
for(p=0; p<lenct; p++) {
dist = abs( post[l].pos - ct[p].pos );
if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
float curw;
if ( !dist ) dist=MAXENTRYPOS;
curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
}
}
}
}
}
pfree(pos);
return res;
}
static float
calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
WordEntry *entry;
WordEntryPos *post;
int4 dimt,j,i;
float res=-1.0;
ITEM *item=GETQUERY(q);
*(uint16*)POSNULL = lengthof(POSNULL)-1;
for(i=0; i<q->size; i++) {
if ( item[i].type != VAL )
continue;
entry=find_wordentry(t,q,&(item[i]));
if ( !entry )
continue;
if ( entry->haspos ) {
dimt = POSDATALEN(t,entry);
post = POSDATAPTR(t,entry);
} else {
dimt = *(uint16*)POSNULL;
post = POSNULL+1;
}
for(j=0;j<dimt;j++) {
if ( res < 0 )
res = wpos( &(post[j]) );
else
res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
}
}
return res;
}
static float
calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
ITEM *item = GETQUERY(q);
float res=0.0;
if (!t->size || !q->size)
return 0.0;
res = ( item->type != VAL && item->val == (int4) '&' ) ?
calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
if ( res < 0 )
res = 1e-20;
switch(method) {
case 0: break;
case 1: res /= log((float)cnt_length(t)); break;
case 2: res /= (float)cnt_length(t); break;
default:
elog(ERROR,"Unknown normalization method: %d",method);
}
return res;
}
Datum
rank(PG_FUNCTION_ARGS) {
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
int method=DEF_NORM_METHOD;
float res=0.0;
float ws[ lengthof(weights) ];
int i;
if ( ARR_NDIM(win) != 1 )
elog(ERROR,"Array of weight is not one dimentional");
if ( ARRNELEMS(win) < lengthof(weights) )
elog(ERROR,"Array of weight is too short");
for(i=0;i<lengthof(weights);i++) {
ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
if ( ws[ i ] > 1.0 )
elog(ERROR,"Weight out of range");
}
if ( PG_NARGS() == 4 )
method=PG_GETARG_INT32(3);
res=calc_rank(ws, txt, query, method);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
rank_def(PG_FUNCTION_ARGS) {
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
float res=0.0;
int method=DEF_NORM_METHOD;
if ( PG_NARGS() == 3 )
method=PG_GETARG_INT32(2);
res=calc_rank(weights, txt, query, method);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
typedef struct {
ITEM *item;
int32 pos;
} DocRepresentation;
static int
compareDocR(const void *a, const void *b) {
if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
return 1;
return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
}
typedef struct {
DocRepresentation *doc;
int len;
} ChkDocR;
static bool
checkcondition_DR(void *checkval, ITEM *val) {
DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
if ( val == ptr->item )
return true;
ptr++;
}
return false;
}
static bool
Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
int i;
DocRepresentation *ptr,*f=(DocRepresentation*)0xffffffff;
ITEM *item=GETQUERY(query);
int lastpos=*pos;
int oldq=*q;
*p=0x7fffffff;
*q=0;
for(i=0; i<query->size; i++) {
if ( item->type != VAL ) {
item++;
continue;
}
ptr = doc + *pos;
while(ptr-doc<len) {
if ( ptr->item == item ) {
if ( ptr->pos > *q ) {
*q = ptr->pos;
lastpos= ptr - doc;
}
break;
}
ptr++;
}
item++;
}
if (*q==0 )
return false;
if (*q==oldq) { /* already check this pos */
(*pos)++;
return Cover(doc, len, query, pos,p,q);
}
item=GETQUERY(query);
for(i=0; i<query->size; i++) {
if ( item->type != VAL ) {
item++;
continue;
}
ptr = doc + lastpos;
while(ptr>=doc+*pos) {
if ( ptr->item == item ) {
if ( ptr->pos < *p ) {
*p = ptr->pos;
f=ptr;
}
break;
}
ptr--;
}
item++;
}
if ( *p<=*q ) {
ChkDocR ch = { f, (doc + lastpos)-f+1 };
*pos = f-doc+1;
if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) {
/*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/
return true;
} else
return Cover(doc, len, query, pos,p,q);
}
return false;
}
static DocRepresentation*
get_docrep(tsvector *txt, QUERYTYPE *query, int *doclen) {
ITEM *item=GETQUERY(query);
WordEntry *entry;
WordEntryPos *post;
int4 dimt,j,i;
int len=query->size*4,cur=0;
DocRepresentation *doc;
*(uint16*)POSNULL = lengthof(POSNULL)-1;
doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
for(i=0; i<query->size; i++) {
if ( item[i].type != VAL )
continue;
entry=find_wordentry(txt,query,&(item[i]));
if ( !entry )
continue;
if ( entry->haspos ) {
dimt = POSDATALEN(txt,entry);
post = POSDATAPTR(txt,entry);
} else {
dimt = *(uint16*)POSNULL;
post = POSNULL+1;
}
while( cur+dimt >= len ) {
len*=2;
doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
}
for(j=0;j<dimt;j++) {
doc[cur].item=&(item[i]);
doc[cur].pos=post[j].pos;
cur++;
}
}
*doclen=cur;
if ( cur>0 ) {
if ( cur>1 )
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc;
}
pfree(doc);
return NULL;
}
Datum
rank_cd(PG_FUNCTION_ARGS) {
int K = PG_GETARG_INT32(0);
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
int method=DEF_NORM_METHOD;
DocRepresentation *doc;
float res=0.0;
int p=0,q=0,len,cur;
doc = get_docrep(txt, query, &len);
if ( !doc ) {
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(0.0);
}
cur=0;
if (K<=0)
K=4;
while( Cover(doc, len, query, &cur, &p, &q) )
res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
if ( PG_NARGS() == 4 )
method=PG_GETARG_INT32(3);
switch(method) {
case 0: break;
case 1: res /= log((float)cnt_length(txt)); break;
case 2: res /= (float)cnt_length(txt); break;
default:
elog(ERROR,"Unknown normalization method: %d",method);
}
pfree(doc);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
rank_cd_def(PG_FUNCTION_ARGS) {
PG_RETURN_DATUM( DirectFunctionCall4(
rank_cd,
Int32GetDatum(-1),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
));
}
/**************debug*************/
typedef struct {
char *w;
int2 len;
int2 pos;
int2 start;
int2 finish;
} DocWord;
static int
compareDocWord(const void *a, const void *b) {
if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
return 1;
return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
}
Datum
get_covers(PG_FUNCTION_ARGS) {
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
WordEntry *pptr=ARRPTR(txt);
int i,dlen=0,j,cur=0,len=0,rlen;
DocWord *dw,*dwptr;
text *out;
char *cptr;
DocRepresentation *doc;
int pos=0,p,q,olddwpos=0;
int ncover=1;
doc = get_docrep(txt, query, &rlen);
if ( !doc ) {
out=palloc(VARHDRSZ);
VARATT_SIZEP(out) = VARHDRSZ;
PG_FREE_IF_COPY(txt,0);
PG_FREE_IF_COPY(query,1);
PG_RETURN_POINTER(out);
}
for(i=0;i<txt->size;i++) {
if (!pptr[i].haspos)
elog(ERROR,"No pos info");
dlen += POSDATALEN(txt,&(pptr[i]));
}
dwptr=dw=palloc(sizeof(DocWord)*dlen);
memset(dw,0,sizeof(DocWord)*dlen);
for(i=0;i<txt->size;i++) {
WordEntryPos *posdata = POSDATAPTR(txt,&(pptr[i]));
for(j=0;j<POSDATALEN(txt,&(pptr[i]));j++) {
dw[cur].w=STRPTR(txt)+pptr[i].pos;
dw[cur].len=pptr[i].len;
dw[cur].pos=posdata[j].pos;
cur++;
}
len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
}
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
while( Cover(doc, rlen, query, &pos, &p, &q) ) {
dwptr=dw+olddwpos;
while(dwptr->pos < p && dwptr-dw<dlen)
dwptr++;
olddwpos=dwptr-dw;
dwptr->start=ncover;
while(dwptr->pos < q+1 && dwptr-dw<dlen)
dwptr++;
(dwptr-1)->finish=ncover;
len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
ncover++;
}
out=palloc(VARHDRSZ+len);
cptr=((char*)out)+VARHDRSZ;
dwptr=dw;
while( dwptr-dw < dlen) {
if ( dwptr->start ) {
sprintf(cptr,"{%d ",dwptr->start);
cptr=strchr(cptr,'\0');
}
memcpy(cptr,dwptr->w,dwptr->len);
cptr+=dwptr->len;
*cptr=' ';
cptr++;
if ( dwptr->finish ) {
sprintf(cptr,"}%d ",dwptr->finish);
cptr=strchr(cptr,'\0');
}
dwptr++;
}
VARATT_SIZEP(out) = cptr - ((char*)out);
pfree(dw);
pfree(doc);
PG_FREE_IF_COPY(txt,0);
PG_FREE_IF_COPY(query,1);
PG_RETURN_POINTER(out);
}
/*
* Rewrite routines of query tree
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include <float.h>
#include "access/gist.h"
#include "access/itup.h"
#include "access/rtree.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
#include "query.h"
#include "rewrite.h"
typedef struct NODE
{
struct NODE *left;
struct NODE *right;
ITEM *valnode;
} NODE;
/*
* make query tree from plain view of query
*/
static NODE *
maketree(ITEM * in)
{
NODE *node = (NODE *) palloc(sizeof(NODE));
node->valnode = in;
node->right = node->left = NULL;
if (in->type == OPR)
{
node->right = maketree(in + 1);
if (in->val != (int4) '!')
node->left = maketree(in + in->left);
}
return node;
}
typedef struct
{
ITEM *ptr;
int4 len;
int4 cur;
} PLAINTREE;
static void
plainnode(PLAINTREE * state, NODE * node)
{
if (state->cur == state->len)
{
state->len *= 2;
state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM));
}
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM));
if (node->valnode->type == VAL)
state->cur++;
else if (node->valnode->val == (int4) '!')
{
state->ptr[state->cur].left = 1;
state->cur++;
plainnode(state, node->right);
}
else
{
int4 cur = state->cur;
state->cur++;
plainnode(state, node->right);
state->ptr[cur].left = state->cur - cur;
plainnode(state, node->left);
}
pfree(node);
}
/*
* make plain view of tree from 'normal' view of tree
*/
static ITEM *
plaintree(NODE * root, int4 *len)
{
PLAINTREE pl;
pl.cur = 0;
pl.len = 16;
if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
{
pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM));
plainnode(&pl, root);
}
else
pl.ptr = NULL;
*len = pl.cur;
return pl.ptr;
}
static void
freetree(NODE * node)
{
if (!node)
return;
if (node->left)
freetree(node->left);
if (node->right)
freetree(node->right);
pfree(node);
}
/*
* clean tree for ! operator.
* It's usefull for debug, but in
* other case, such view is used with search in index.
* Operator ! always return TRUE
*/
static NODE *
clean_NOT_intree(NODE * node)
{
if (node->valnode->type == VAL)
return node;
if (node->valnode->val == (int4) '!')
{
freetree(node);
return NULL;
}
/* operator & or | */
if (node->valnode->val == (int4) '|')
{
if ((node->left = clean_NOT_intree(node->left)) == NULL ||
(node->right = clean_NOT_intree(node->right)) == NULL)
{
freetree(node);
return NULL;
}
}
else
{
NODE *res = node;
node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right);
if (node->left == NULL && node->right == NULL)
{
pfree(node);
res = NULL;
}
else if (node->left == NULL)
{
res = node->right;
pfree(node);
}
else if (node->right == NULL)
{
res = node->left;
pfree(node);
}
return res;
}
return node;
}
ITEM *
clean_NOT_v2(ITEM * ptr, int4 *len)
{
NODE *root = maketree(ptr);
return plaintree(clean_NOT_intree(root), len);
}
#define V_UNKNOWN 0
#define V_TRUE 1
#define V_FALSE 2
/*
* Clean query tree from values which is always in
* text (stopword)
*/
static NODE *
clean_fakeval_intree(NODE * node, char *result)
{
char lresult = V_UNKNOWN,
rresult = V_UNKNOWN;
if (node->valnode->type == VAL)
return node;
else if (node->valnode->type == VALTRUE)
{
pfree(node);
*result = V_TRUE;
return NULL;
}
if (node->valnode->val == (int4) '!')
{
node->right = clean_fakeval_intree(node->right, &rresult);
if (!node->right)
{
*result = (rresult == V_TRUE) ? V_FALSE : V_TRUE;
freetree(node);
return NULL;
}
}
else if (node->valnode->val == (int4) '|')
{
NODE *res = node;
node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult);
if (lresult == V_TRUE || rresult == V_TRUE)
{
freetree(node);
*result = V_TRUE;
return NULL;
}
else if (lresult == V_FALSE && rresult == V_FALSE)
{
freetree(node);
*result = V_FALSE;
return NULL;
}
else if (lresult == V_FALSE)
{
res = node->right;
pfree(node);
}
else if (rresult == V_FALSE)
{
res = node->left;
pfree(node);
}
return res;
}
else
{
NODE *res = node;
node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult);
if (lresult == V_FALSE || rresult == V_FALSE)
{
freetree(node);
*result = V_FALSE;
return NULL;
}
else if (lresult == V_TRUE && rresult == V_TRUE)
{
freetree(node);
*result = V_TRUE;
return NULL;
}
else if (lresult == V_TRUE)
{
res = node->right;
pfree(node);
}
else if (rresult == V_TRUE)
{
res = node->left;
pfree(node);
}
return res;
}
return node;
}
ITEM *
clean_fakeval_v2(ITEM * ptr, int4 *len)
{
NODE *root = maketree(ptr);
char result = V_UNKNOWN;
NODE *resroot;
resroot = clean_fakeval_intree(root, &result);
if (result != V_UNKNOWN)
{
elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored");
*len = 0;
return NULL;
}
return plaintree(resroot, len);
}
#ifndef __REWRITE_H__
#define __REWRITE_H__
ITEM *clean_NOT_v2(ITEM * ptr, int4 *len);
ITEM *clean_fakeval_v2(ITEM * ptr, int4 *len);
#endif
/*
* simple but fast map from str to Oid
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "postgres.h"
#include "snmap.h"
#include "common.h"
static int
compareSNMapEntry(const void *a, const void *b) {
return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key );
}
void
addSNMap( SNMap *map, char *key, Oid value ) {
if (map->len>=map->reallen) {
SNMapEntry *tmp;
int len = (map->reallen) ? 2*map->reallen : 16;
tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len);
if ( !tmp )
elog(ERROR, "No memory");
map->reallen=len;
map->list=tmp;
}
map->list[ map->len ].key = strdup(key);
if ( ! map->list[ map->len ].key )
elog(ERROR, "No memory");
map->list[ map->len ].value=value;
map->len++;
if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
}
void
addSNMap_t( SNMap *map, text *key, Oid value ) {
char *k=text2char( key );
addSNMap(map, k, value);
pfree(k);
}
Oid
findSNMap( SNMap *map, char *key ) {
SNMapEntry *ptr;
SNMapEntry ks = {key, 0};
if ( map->len==0 || !map->list )
return 0;
ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry);
return (ptr) ? ptr->value : 0;
}
Oid
findSNMap_t( SNMap *map, text *key ) {
char *k=text2char(key);
int res;
res= findSNMap(map, k);
pfree(k);
return res;
}
void freeSNMap( SNMap *map ) {
SNMapEntry *entry=map->list;
if ( map->list ) {
while( map->len ) {
if ( entry->key ) free(entry->key);
entry++; map->len--;
}
free( map->list );
}
memset(map,0,sizeof(SNMap));
}
#ifndef __SNMAP_H__
#define __SNMAP_H__
#include "postgres.h"
typedef struct {
char *key;
Oid value;
} SNMapEntry;
typedef struct {
int len;
int reallen;
SNMapEntry *list;
} SNMap;
void addSNMap( SNMap *map, char *key, Oid value );
void addSNMap_t( SNMap *map, text *key, Oid value );
Oid findSNMap( SNMap *map, char *key );
Oid findSNMap_t( SNMap *map, text *key );
void freeSNMap( SNMap *map );
#endif
#include "header.h"
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
{ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
z->p = create_s();
if (S_size)
{ z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
{ int i;
for (i = 0; i < S_size; i++) z->S[i] = create_s();
}
z->S_size = S_size;
}
if (I_size)
{ z->I = (int *) calloc(I_size, sizeof(int));
z->I_size = I_size;
}
if (B_size)
{ z->B = (symbol *) calloc(B_size, sizeof(symbol));
z->B_size = B_size;
}
return z;
}
extern void SN_close_env(struct SN_env * z)
{
if (z->S_size)
{
{ int i;
for (i = 0; i < z->S_size; i++) lose_s(z->S[i]);
}
free(z->S);
}
if (z->I_size) free(z->I);
if (z->B_size) free(z->B);
if (z->p) lose_s(z->p);
free(z);
}
extern void SN_set_current(struct SN_env * z, int size, const symbol * s)
{
replace_s(z, 0, z->l, size, s);
z->c = 0;
}
typedef unsigned char symbol;
/* Or replace 'char' above with 'short' for 16 bit characters.
More precisely, replace 'char' with whatever type guarantees the
character width you need. Note however that sizeof(symbol) should divide
HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
there is an alignment problem. In the unlikely event of a problem here,
consult Martin Porter.
*/
struct SN_env {
symbol * p;
int c; int a; int l; int lb; int bra; int ket;
int S_size; int I_size; int B_size;
symbol * * S;
int * I;
symbol * B;
};
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
extern void SN_close_env(struct SN_env * z);
extern void SN_set_current(struct SN_env * z, int size, const symbol * s);
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
extern int english_stem(struct SN_env * z);
static int r_exception2(struct SN_env * z);
static int r_exception1(struct SN_env * z);
static int r_Step_5(struct SN_env * z);
static int r_Step_4(struct SN_env * z);
static int r_Step_3(struct SN_env * z);
static int r_Step_2(struct SN_env * z);
static int r_Step_1c(struct SN_env * z);
static int r_Step_1b(struct SN_env * z);
static int r_Step_1a(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_shortv(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);
extern struct SN_env * english_create_env(void);
extern void english_close_env(struct SN_env * z);
static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' };
static struct among a_0[1] =
{
/* 0 */ { 5, s_0_0, -1, -1, 0}
};
static symbol s_1_0[3] = { 'i', 'e', 'd' };
static symbol s_1_1[1] = { 's' };
static symbol s_1_2[3] = { 'i', 'e', 's' };
static symbol s_1_3[4] = { 's', 's', 'e', 's' };
static symbol s_1_4[2] = { 's', 's' };
static symbol s_1_5[2] = { 'u', 's' };
static struct among a_1[6] =
{
/* 0 */ { 3, s_1_0, -1, 2, 0},
/* 1 */ { 1, s_1_1, -1, 3, 0},
/* 2 */ { 3, s_1_2, 1, 2, 0},
/* 3 */ { 4, s_1_3, 1, 1, 0},
/* 4 */ { 2, s_1_4, 1, -1, 0},
/* 5 */ { 2, s_1_5, 1, -1, 0}
};
static symbol s_2_1[2] = { 'b', 'b' };
static symbol s_2_2[2] = { 'd', 'd' };
static symbol s_2_3[2] = { 'f', 'f' };
static symbol s_2_4[2] = { 'g', 'g' };
static symbol s_2_5[2] = { 'b', 'l' };
static symbol s_2_6[2] = { 'm', 'm' };
static symbol s_2_7[2] = { 'n', 'n' };
static symbol s_2_8[2] = { 'p', 'p' };
static symbol s_2_9[2] = { 'r', 'r' };
static symbol s_2_10[2] = { 'a', 't' };
static symbol s_2_11[2] = { 't', 't' };
static symbol s_2_12[2] = { 'i', 'z' };
static struct among a_2[13] =
{
/* 0 */ { 0, 0, -1, 3, 0},
/* 1 */ { 2, s_2_1, 0, 2, 0},
/* 2 */ { 2, s_2_2, 0, 2, 0},
/* 3 */ { 2, s_2_3, 0, 2, 0},
/* 4 */ { 2, s_2_4, 0, 2, 0},
/* 5 */ { 2, s_2_5, 0, 1, 0},
/* 6 */ { 2, s_2_6, 0, 2, 0},
/* 7 */ { 2, s_2_7, 0, 2, 0},
/* 8 */ { 2, s_2_8, 0, 2, 0},
/* 9 */ { 2, s_2_9, 0, 2, 0},
/* 10 */ { 2, s_2_10, 0, 1, 0},
/* 11 */ { 2, s_2_11, 0, 2, 0},
/* 12 */ { 2, s_2_12, 0, 1, 0}
};
static symbol s_3_0[2] = { 'e', 'd' };
static symbol s_3_1[3] = { 'e', 'e', 'd' };
static symbol s_3_2[3] = { 'i', 'n', 'g' };
static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' };
static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' };
static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' };
static struct among a_3[6] =
{
/* 0 */ { 2, s_3_0, -1, 2, 0},
/* 1 */ { 3, s_3_1, 0, 1, 0},
/* 2 */ { 3, s_3_2, -1, 2, 0},
/* 3 */ { 4, s_3_3, -1, 2, 0},
/* 4 */ { 5, s_3_4, 3, 1, 0},
/* 5 */ { 5, s_3_5, -1, 2, 0}
};
static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' };
static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' };
static symbol s_4_2[3] = { 'o', 'g', 'i' };
static symbol s_4_3[2] = { 'l', 'i' };
static symbol s_4_4[3] = { 'b', 'l', 'i' };
static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' };
static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' };
static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' };
static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' };
static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' };
static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' };
static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' };
static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' };
static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' };
static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' };
static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' };
static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' };
static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' };
static symbol s_4_20[4] = { 'a', 't', 'o', 'r' };
static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' };
static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' };
static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' };
static struct among a_4[24] =
{
/* 0 */ { 4, s_4_0, -1, 3, 0},
/* 1 */ { 4, s_4_1, -1, 2, 0},
/* 2 */ { 3, s_4_2, -1, 13, 0},
/* 3 */ { 2, s_4_3, -1, 16, 0},
/* 4 */ { 3, s_4_4, 3, 12, 0},
/* 5 */ { 4, s_4_5, 4, 4, 0},
/* 6 */ { 4, s_4_6, 3, 8, 0},
/* 7 */ { 5, s_4_7, 3, 14, 0},
/* 8 */ { 6, s_4_8, 3, 15, 0},
/* 9 */ { 5, s_4_9, 3, 10, 0},
/* 10 */ { 5, s_4_10, 3, 5, 0},
/* 11 */ { 5, s_4_11, -1, 8, 0},
/* 12 */ { 6, s_4_12, -1, 12, 0},
/* 13 */ { 5, s_4_13, -1, 11, 0},
/* 14 */ { 6, s_4_14, -1, 1, 0},
/* 15 */ { 7, s_4_15, 14, 7, 0},
/* 16 */ { 5, s_4_16, -1, 8, 0},
/* 17 */ { 5, s_4_17, -1, 7, 0},
/* 18 */ { 7, s_4_18, 17, 6, 0},
/* 19 */ { 4, s_4_19, -1, 6, 0},
/* 20 */ { 4, s_4_20, -1, 7, 0},
/* 21 */ { 7, s_4_21, -1, 11, 0},
/* 22 */ { 7, s_4_22, -1, 9, 0},
/* 23 */ { 7, s_4_23, -1, 10, 0}
};
static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' };
static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' };
static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' };
static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' };
static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' };
static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' };
static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' };
static symbol s_5_7[3] = { 'f', 'u', 'l' };
static symbol s_5_8[4] = { 'n', 'e', 's', 's' };
static struct among a_5[9] =
{
/* 0 */ { 5, s_5_0, -1, 4, 0},
/* 1 */ { 5, s_5_1, -1, 6, 0},
/* 2 */ { 5, s_5_2, -1, 3, 0},
/* 3 */ { 5, s_5_3, -1, 4, 0},
/* 4 */ { 4, s_5_4, -1, 4, 0},
/* 5 */ { 6, s_5_5, -1, 1, 0},
/* 6 */ { 7, s_5_6, 5, 2, 0},
/* 7 */ { 3, s_5_7, -1, 5, 0},
/* 8 */ { 4, s_5_8, -1, 5, 0}
};
static symbol s_6_0[2] = { 'i', 'c' };
static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' };
static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' };
static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' };
static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' };
static symbol s_6_5[3] = { 'a', 't', 'e' };
static symbol s_6_6[3] = { 'i', 'v', 'e' };
static symbol s_6_7[3] = { 'i', 'z', 'e' };
static symbol s_6_8[3] = { 'i', 't', 'i' };
static symbol s_6_9[2] = { 'a', 'l' };
static symbol s_6_10[3] = { 'i', 's', 'm' };
static symbol s_6_11[3] = { 'i', 'o', 'n' };
static symbol s_6_12[2] = { 'e', 'r' };
static symbol s_6_13[3] = { 'o', 'u', 's' };
static symbol s_6_14[3] = { 'a', 'n', 't' };
static symbol s_6_15[3] = { 'e', 'n', 't' };
static symbol s_6_16[4] = { 'm', 'e', 'n', 't' };
static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' };
static struct among a_6[18] =
{
/* 0 */ { 2, s_6_0, -1, 1, 0},
/* 1 */ { 4, s_6_1, -1, 1, 0},
/* 2 */ { 4, s_6_2, -1, 1, 0},
/* 3 */ { 4, s_6_3, -1, 1, 0},
/* 4 */ { 4, s_6_4, -1, 1, 0},
/* 5 */ { 3, s_6_5, -1, 1, 0},
/* 6 */ { 3, s_6_6, -1, 1, 0},
/* 7 */ { 3, s_6_7, -1, 1, 0},
/* 8 */ { 3, s_6_8, -1, 1, 0},
/* 9 */ { 2, s_6_9, -1, 1, 0},
/* 10 */ { 3, s_6_10, -1, 1, 0},
/* 11 */ { 3, s_6_11, -1, 2, 0},
/* 12 */ { 2, s_6_12, -1, 1, 0},
/* 13 */ { 3, s_6_13, -1, 1, 0},
/* 14 */ { 3, s_6_14, -1, 1, 0},
/* 15 */ { 3, s_6_15, -1, 1, 0},
/* 16 */ { 4, s_6_16, 15, 1, 0},
/* 17 */ { 5, s_6_17, 16, 1, 0}
};
static symbol s_7_0[1] = { 'e' };
static symbol s_7_1[1] = { 'l' };
static struct among a_7[2] =
{
/* 0 */ { 1, s_7_0, -1, 1, 0},
/* 1 */ { 1, s_7_1, -1, 2, 0}
};
static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' };
static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' };
static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' };
static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' };
static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' };
static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' };
static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' };
static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' };
static struct among a_8[8] =
{
/* 0 */ { 7, s_8_0, -1, -1, 0},
/* 1 */ { 7, s_8_1, -1, -1, 0},
/* 2 */ { 6, s_8_2, -1, -1, 0},
/* 3 */ { 7, s_8_3, -1, -1, 0},
/* 4 */ { 6, s_8_4, -1, -1, 0},
/* 5 */ { 7, s_8_5, -1, -1, 0},
/* 6 */ { 7, s_8_6, -1, -1, 0},
/* 7 */ { 6, s_8_7, -1, -1, 0}
};
static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' };
static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' };
static symbol s_9_2[4] = { 'b', 'i', 'a', 's' };
static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' };
static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' };
static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' };
static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' };
static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' };
static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' };
static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' };
static symbol s_9_10[4] = { 'n', 'e', 'w', 's' };
static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' };
static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' };
static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' };
static symbol s_9_14[4] = { 's', 'k', 'i', 's' };
static symbol s_9_15[3] = { 's', 'k', 'y' };
static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' };
static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' };
static struct among a_9[18] =
{
/* 0 */ { 5, s_9_0, -1, -1, 0},
/* 1 */ { 5, s_9_1, -1, -1, 0},
/* 2 */ { 4, s_9_2, -1, -1, 0},
/* 3 */ { 6, s_9_3, -1, -1, 0},
/* 4 */ { 5, s_9_4, -1, 3, 0},
/* 5 */ { 5, s_9_5, -1, 9, 0},
/* 6 */ { 6, s_9_6, -1, 7, 0},
/* 7 */ { 4, s_9_7, -1, -1, 0},
/* 8 */ { 4, s_9_8, -1, 6, 0},
/* 9 */ { 5, s_9_9, -1, 4, 0},
/* 10 */ { 4, s_9_10, -1, -1, 0},
/* 11 */ { 4, s_9_11, -1, 10, 0},
/* 12 */ { 6, s_9_12, -1, 11, 0},
/* 13 */ { 5, s_9_13, -1, 2, 0},
/* 14 */ { 4, s_9_14, -1, 1, 0},
/* 15 */ { 3, s_9_15, -1, -1, 0},
/* 16 */ { 5, s_9_16, -1, 5, 0},
/* 17 */ { 4, s_9_17, -1, 8, 0}
};
static unsigned char g_v[] = { 17, 65, 16, 1 };
static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 };
static unsigned char g_valid_LI[] = { 55, 141, 2 };
static symbol s_0[] = { 'y' };
static symbol s_1[] = { 'Y' };
static symbol s_2[] = { 'y' };
static symbol s_3[] = { 'Y' };
static symbol s_4[] = { 's', 's' };
static symbol s_5[] = { 'i', 'e' };
static symbol s_6[] = { 'i' };
static symbol s_7[] = { 'e', 'e' };
static symbol s_8[] = { 'e' };
static symbol s_9[] = { 'e' };
static symbol s_10[] = { 'y' };
static symbol s_11[] = { 'Y' };
static symbol s_12[] = { 'i' };
static symbol s_13[] = { 't', 'i', 'o', 'n' };
static symbol s_14[] = { 'e', 'n', 'c', 'e' };
static symbol s_15[] = { 'a', 'n', 'c', 'e' };
static symbol s_16[] = { 'a', 'b', 'l', 'e' };
static symbol s_17[] = { 'e', 'n', 't' };
static symbol s_18[] = { 'i', 'z', 'e' };
static symbol s_19[] = { 'a', 't', 'e' };
static symbol s_20[] = { 'a', 'l' };
static symbol s_21[] = { 'f', 'u', 'l' };
static symbol s_22[] = { 'o', 'u', 's' };
static symbol s_23[] = { 'i', 'v', 'e' };
static symbol s_24[] = { 'b', 'l', 'e' };
static symbol s_25[] = { 'l' };
static symbol s_26[] = { 'o', 'g' };
static symbol s_27[] = { 'f', 'u', 'l' };
static symbol s_28[] = { 'l', 'e', 's', 's' };
static symbol s_29[] = { 't', 'i', 'o', 'n' };
static symbol s_30[] = { 'a', 't', 'e' };
static symbol s_31[] = { 'a', 'l' };
static symbol s_32[] = { 'i', 'c' };
static symbol s_33[] = { 's' };
static symbol s_34[] = { 't' };
static symbol s_35[] = { 'l' };
static symbol s_36[] = { 's', 'k', 'i' };
static symbol s_37[] = { 's', 'k', 'y' };
static symbol s_38[] = { 'd', 'i', 'e' };
static symbol s_39[] = { 'l', 'i', 'e' };
static symbol s_40[] = { 't', 'i', 'e' };
static symbol s_41[] = { 'i', 'd', 'l' };
static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' };
static symbol s_43[] = { 'u', 'g', 'l', 'i' };
static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' };
static symbol s_45[] = { 'o', 'n', 'l', 'i' };
static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' };
static symbol s_47[] = { 'Y' };
static symbol s_48[] = { 'y' };
static int r_prelude(struct SN_env * z) {
z->B[0] = 0; /* unset Y_found, line 24 */
{ int c = z->c; /* do, line 25 */
z->bra = z->c; /* [, line 25 */
if (!(eq_s(z, 1, s_0))) goto lab0;
z->ket = z->c; /* ], line 25 */
if (!(in_grouping(z, g_v, 97, 121))) goto lab0;
slice_from_s(z, 1, s_1); /* <-, line 25 */
z->B[0] = 1; /* set Y_found, line 25 */
lab0:
z->c = c;
}
{ int c = z->c; /* do, line 26 */
while(1) { /* repeat, line 26 */
int c = z->c;
while(1) { /* goto, line 26 */
int c = z->c;
if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
z->bra = z->c; /* [, line 26 */
if (!(eq_s(z, 1, s_2))) goto lab3;
z->ket = z->c; /* ], line 26 */
z->c = c;
break;
lab3:
z->c = c;
if (z->c >= z->l) goto lab2;
z->c++;
}
slice_from_s(z, 1, s_3); /* <-, line 26 */
z->B[0] = 1; /* set Y_found, line 26 */
continue;
lab2:
z->c = c;
break;
}
lab1:
z->c = c;
}
return 1;
}
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ int c = z->c; /* do, line 32 */
{ int c = z->c; /* or, line 36 */
if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */
goto lab1;
lab2:
z->c = c;
while(1) { /* gopast, line 36 */
if (!(in_grouping(z, g_v, 97, 121))) goto lab3;
break;
lab3:
if (z->c >= z->l) goto lab0;
z->c++;
}
while(1) { /* gopast, line 36 */
if (!(out_grouping(z, g_v, 97, 121))) goto lab4;
break;
lab4:
if (z->c >= z->l) goto lab0;
z->c++;
}
}
lab1:
z->I[0] = z->c; /* setmark p1, line 37 */
while(1) { /* gopast, line 38 */
if (!(in_grouping(z, g_v, 97, 121))) goto lab5;
break;
lab5:
if (z->c >= z->l) goto lab0;
z->c++;
}
while(1) { /* gopast, line 38 */
if (!(out_grouping(z, g_v, 97, 121))) goto lab6;
break;
lab6:
if (z->c >= z->l) goto lab0;
z->c++;
}
z->I[1] = z->c; /* setmark p2, line 38 */
lab0:
z->c = c;
}
return 1;
}
static int r_shortv(struct SN_env * z) {
{ int m = z->l - z->c; /* or, line 46 */
if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1;
if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1;
if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m;
if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
if (!(in_grouping_b(z, g_v, 97, 121))) return 0;
if (z->c > z->lb) return 0; /* atlimit, line 47 */
}
lab0:
return 1;
}
static int r_R1(struct SN_env * z) {
if (!(z->I[0] <= z->c)) return 0;
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_Step_1a(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 54 */
among_var = find_among_b(z, a_1, 6); /* substring, line 54 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 54 */
switch(among_var) {
case 0: return 0;
case 1:
slice_from_s(z, 2, s_4); /* <-, line 55 */
break;
case 2:
{ int m = z->l - z->c; /* or, line 57 */
if (z->c <= z->lb) goto lab1;
z->c--; /* next, line 57 */
if (z->c > z->lb) goto lab1; /* atlimit, line 57 */
slice_from_s(z, 2, s_5); /* <-, line 57 */
goto lab0;
lab1:
z->c = z->l - m;
slice_from_s(z, 1, s_6); /* <-, line 57 */
}
lab0:
break;
case 3:
if (z->c <= z->lb) return 0;
z->c--; /* next, line 58 */
while(1) { /* gopast, line 58 */
if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2;
break;
lab2:
if (z->c <= z->lb) return 0;
z->c--;
}
slice_del(z); /* delete, line 58 */
break;
}
return 1;
}
static int r_Step_1b(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 64 */
among_var = find_among_b(z, a_3, 6); /* substring, line 64 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 64 */
switch(among_var) {
case 0: return 0;
case 1:
if (!r_R1(z)) return 0; /* call R1, line 66 */
slice_from_s(z, 2, s_7); /* <-, line 66 */
break;
case 2:
{ int m_test = z->l - z->c; /* test, line 69 */
while(1) { /* gopast, line 69 */
if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0;
break;
lab0:
if (z->c <= z->lb) return 0;
z->c--;
}
z->c = z->l - m_test;
}
slice_del(z); /* delete, line 69 */
{ int m_test = z->l - z->c; /* test, line 70 */
among_var = find_among_b(z, a_2, 13); /* substring, line 70 */
if (!(among_var)) return 0;
z->c = z->l - m_test;
}
switch(among_var) {
case 0: return 0;
case 1:
{ int c = z->c;
insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */
z->c = c;
}
break;
case 2:
z->ket = z->c; /* [, line 75 */
if (z->c <= z->lb) return 0;
z->c--; /* next, line 75 */
z->bra = z->c; /* ], line 75 */
slice_del(z); /* delete, line 75 */
break;
case 3:
if (z->c != z->I[0]) return 0; /* atmark, line 76 */
{ int m_test = z->l - z->c; /* test, line 76 */
if (!r_shortv(z)) return 0; /* call shortv, line 76 */
z->c = z->l - m_test;
}
{ int c = z->c;
insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */
z->c = c;
}
break;
}
break;
}
return 1;
}
static int r_Step_1c(struct SN_env * z) {
z->ket = z->c; /* [, line 83 */
{ int m = z->l - z->c; /* or, line 83 */
if (!(eq_s_b(z, 1, s_10))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m;
if (!(eq_s_b(z, 1, s_11))) return 0;
}
lab0:
z->bra = z->c; /* ], line 83 */
if (!(out_grouping_b(z, g_v, 97, 121))) return 0;
{ int m = z->l - z->c; /* not, line 84 */
if (z->c > z->lb) goto lab2; /* atlimit, line 84 */
return 0;
lab2:
z->c = z->l - m;
}
slice_from_s(z, 1, s_12); /* <-, line 85 */
return 1;
}
static int r_Step_2(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 89 */
among_var = find_among_b(z, a_4, 24); /* substring, line 89 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 89 */
if (!r_R1(z)) return 0; /* call R1, line 89 */
switch(among_var) {
case 0: return 0;
case 1:
slice_from_s(z, 4, s_13); /* <-, line 90 */
break;
case 2:
slice_from_s(z, 4, s_14); /* <-, line 91 */
break;
case 3:
slice_from_s(z, 4, s_15); /* <-, line 92 */
break;
case 4:
slice_from_s(z, 4, s_16); /* <-, line 93 */
break;
case 5:
slice_from_s(z, 3, s_17); /* <-, line 94 */
break;
case 6:
slice_from_s(z, 3, s_18); /* <-, line 96 */
break;
case 7:
slice_from_s(z, 3, s_19); /* <-, line 98 */
break;
case 8:
slice_from_s(z, 2, s_20); /* <-, line 100 */
break;
case 9:
slice_from_s(z, 3, s_21); /* <-, line 101 */
break;
case 10:
slice_from_s(z, 3, s_22); /* <-, line 103 */
break;
case 11:
slice_from_s(z, 3, s_23); /* <-, line 105 */
break;
case 12:
slice_from_s(z, 3, s_24); /* <-, line 107 */
break;
case 13:
if (!(eq_s_b(z, 1, s_25))) return 0;
slice_from_s(z, 2, s_26); /* <-, line 108 */
break;
case 14:
slice_from_s(z, 3, s_27); /* <-, line 109 */
break;
case 15:
slice_from_s(z, 4, s_28); /* <-, line 110 */
break;
case 16:
if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0;
slice_del(z); /* delete, line 111 */
break;
}
return 1;
}
static int r_Step_3(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 116 */
among_var = find_among_b(z, a_5, 9); /* substring, line 116 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 116 */
if (!r_R1(z)) return 0; /* call R1, line 116 */
switch(among_var) {
case 0: return 0;
case 1:
slice_from_s(z, 4, s_29); /* <-, line 117 */
break;
case 2:
slice_from_s(z, 3, s_30); /* <-, line 118 */
break;
case 3:
slice_from_s(z, 2, s_31); /* <-, line 119 */
break;
case 4:
slice_from_s(z, 2, s_32); /* <-, line 121 */
break;
case 5:
slice_del(z); /* delete, line 123 */
break;
case 6:
if (!r_R2(z)) return 0; /* call R2, line 125 */
slice_del(z); /* delete, line 125 */
break;
}
return 1;
}
static int r_Step_4(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 130 */
among_var = find_among_b(z, a_6, 18); /* substring, line 130 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 130 */
if (!r_R2(z)) return 0; /* call R2, line 130 */
switch(among_var) {
case 0: return 0;
case 1:
slice_del(z); /* delete, line 133 */
break;
case 2:
{ int m = z->l - z->c; /* or, line 134 */
if (!(eq_s_b(z, 1, s_33))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m;
if (!(eq_s_b(z, 1, s_34))) return 0;
}
lab0:
slice_del(z); /* delete, line 134 */
break;
}
return 1;
}
static int r_Step_5(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 139 */
among_var = find_among_b(z, a_7, 2); /* substring, line 139 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 139 */
switch(among_var) {
case 0: return 0;
case 1:
{ int m = z->l - z->c; /* or, line 140 */
if (!r_R2(z)) goto lab1; /* call R2, line 140 */
goto lab0;
lab1:
z->c = z->l - m;
if (!r_R1(z)) return 0; /* call R1, line 140 */
{ int m = z->l - z->c; /* not, line 140 */
if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */
return 0;
lab2:
z->c = z->l - m;
}
}
lab0:
slice_del(z); /* delete, line 140 */
break;
case 2:
if (!r_R2(z)) return 0; /* call R2, line 141 */
if (!(eq_s_b(z, 1, s_35))) return 0;
slice_del(z); /* delete, line 141 */
break;
}
return 1;
}
static int r_exception2(struct SN_env * z) {
z->ket = z->c; /* [, line 147 */
if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */
z->bra = z->c; /* ], line 147 */
if (z->c > z->lb) return 0; /* atlimit, line 147 */
return 1;
}
static int r_exception1(struct SN_env * z) {
int among_var;
z->bra = z->c; /* [, line 159 */
among_var = find_among(z, a_9, 18); /* substring, line 159 */
if (!(among_var)) return 0;
z->ket = z->c; /* ], line 159 */
if (z->c < z->l) return 0; /* atlimit, line 159 */
switch(among_var) {
case 0: return 0;
case 1:
slice_from_s(z, 3, s_36); /* <-, line 163 */
break;
case 2:
slice_from_s(z, 3, s_37); /* <-, line 164 */
break;
case 3:
slice_from_s(z, 3, s_38); /* <-, line 165 */
break;
case 4:
slice_from_s(z, 3, s_39); /* <-, line 166 */
break;
case 5:
slice_from_s(z, 3, s_40); /* <-, line 167 */
break;
case 6:
slice_from_s(z, 3, s_41); /* <-, line 171 */
break;
case 7:
slice_from_s(z, 5, s_42); /* <-, line 172 */
break;
case 8:
slice_from_s(z, 4, s_43); /* <-, line 173 */
break;
case 9:
slice_from_s(z, 5, s_44); /* <-, line 174 */
break;
case 10:
slice_from_s(z, 4, s_45); /* <-, line 175 */
break;
case 11:
slice_from_s(z, 5, s_46); /* <-, line 176 */
break;
}
return 1;
}
static int r_postlude(struct SN_env * z) {
if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */
while(1) { /* repeat, line 192 */
int c = z->c;
while(1) { /* goto, line 192 */
int c = z->c;
z->bra = z->c; /* [, line 192 */
if (!(eq_s(z, 1, s_47))) goto lab1;
z->ket = z->c; /* ], line 192 */
z->c = c;
break;
lab1:
z->c = c;
if (z->c >= z->l) goto lab0;
z->c++;
}
slice_from_s(z, 1, s_48); /* <-, line 192 */
continue;
lab0:
z->c = c;
break;
}
return 1;
}
extern int english_stem(struct SN_env * z) {
{ int c = z->c; /* or, line 196 */
if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */
goto lab0;
lab1:
z->c = c;
{ int c_test = z->c; /* test, line 198 */
{ int c = z->c + 3;
if (0 > c || c > z->l) return 0;
z->c = c; /* hop, line 198 */
}
z->c = c_test;
}
{ int c = z->c; /* do, line 199 */
if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */
lab2:
z->c = c;
}
{ int c = z->c; /* do, line 200 */
if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */
lab3:
z->c = c;
}
z->lb = z->c; z->c = z->l; /* backwards, line 201 */
{ int m = z->l - z->c; /* do, line 203 */
if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */
lab4:
z->c = z->l - m;
}
{ int m = z->l - z->c; /* or, line 205 */
if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */
goto lab5;
lab6:
z->c = z->l - m;
{ int m = z->l - z->c; /* do, line 207 */
if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */
lab7:
z->c = z->l - m;
}
{ int m = z->l - z->c; /* do, line 208 */
if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */
lab8:
z->c = z->l - m;
}
{ int m = z->l - z->c; /* do, line 210 */
if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */
lab9:
z->c = z->l - m;
}
{ int m = z->l - z->c; /* do, line 211 */
if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */
lab10:
z->c = z->l - m;
}
{ int m = z->l - z->c; /* do, line 212 */
if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */
lab11:
z->c = z->l - m;
}
{ int m = z->l - z->c; /* do, line 214 */
if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */
lab12:
z->c = z->l - m;
}
}
lab5:
z->c = z->lb;
{ int c = z->c; /* do, line 217 */
if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */
lab13:
z->c = c;
}
}
lab0:
return 1;
}
extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); }
extern void english_close_env(struct SN_env * z) { SN_close_env(z); }
/* This file was generated automatically by the Snowball to ANSI C compiler */
extern struct SN_env * english_create_env(void);
extern void english_close_env(struct SN_env * z);
extern int english_stem(struct SN_env * z);
#include <limits.h>
#include "api.h"
#define MAXINT INT_MAX
#define MININT INT_MIN
#define HEAD 2*sizeof(int)
#define SIZE(p) ((int *)(p))[-1]
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
#define CAPACITY(p) ((int *)(p))[-2]
struct among
{ int s_size; /* number of chars in string */
symbol * s; /* search string */
int substring_i;/* index to longest matching substring */
int result; /* result of the lookup */
int (* function)(struct SN_env *);
};
extern symbol * create_s(void);
extern void lose_s(symbol * p);
extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
extern int in_range(struct SN_env * z, int min, int max);
extern int in_range_b(struct SN_env * z, int min, int max);
extern int out_range(struct SN_env * z, int min, int max);
extern int out_range_b(struct SN_env * z, int min, int max);
extern int eq_s(struct SN_env * z, int s_size, symbol * s);
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
extern int eq_v(struct SN_env * z, symbol * p);
extern int eq_v_b(struct SN_env * z, symbol * p);
extern int find_among(struct SN_env * z, struct among * v, int v_size);
extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
extern symbol * increase_size(symbol * p, int n);
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s);
extern void slice_from_s(struct SN_env * z, int s_size, symbol * s);
extern void slice_from_v(struct SN_env * z, symbol * p);
extern void slice_del(struct SN_env * z);
extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p);
extern symbol * slice_to(struct SN_env * z, symbol * p);
extern symbol * assign_to(struct SN_env * z, symbol * p);
extern void debug(struct SN_env * z, int number, int line_count);
/* This file was generated automatically by the Snowball to ANSI C compiler */
#include "header.h"
extern int russian_stem(struct SN_env * z);
static int r_tidy_up(struct SN_env * z);
static int r_derivational(struct SN_env * z);
static int r_noun(struct SN_env * z);
static int r_verb(struct SN_env * z);
static int r_reflexive(struct SN_env * z);
static int r_adjectival(struct SN_env * z);
static int r_adjective(struct SN_env * z);
static int r_perfective_gerund(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
extern struct SN_env * russian_create_env(void);
extern void russian_close_env(struct SN_env * z);
static symbol s_0_0[3] = { 215, 219, 201 };
static symbol s_0_1[4] = { 201, 215, 219, 201 };
static symbol s_0_2[4] = { 217, 215, 219, 201 };
static symbol s_0_3[1] = { 215 };
static symbol s_0_4[2] = { 201, 215 };
static symbol s_0_5[2] = { 217, 215 };
static symbol s_0_6[5] = { 215, 219, 201, 211, 216 };
static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 };
static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 };
static struct among a_0[9] =
{
/* 0 */ { 3, s_0_0, -1, 1, 0},
/* 1 */ { 4, s_0_1, 0, 2, 0},
/* 2 */ { 4, s_0_2, 0, 2, 0},
/* 3 */ { 1, s_0_3, -1, 1, 0},
/* 4 */ { 2, s_0_4, 3, 2, 0},
/* 5 */ { 2, s_0_5, 3, 2, 0},
/* 6 */ { 5, s_0_6, -1, 1, 0},
/* 7 */ { 6, s_0_7, 6, 2, 0},
/* 8 */ { 6, s_0_8, 6, 2, 0}
};
static symbol s_1_0[2] = { 192, 192 };
static symbol s_1_1[2] = { 197, 192 };
static symbol s_1_2[2] = { 207, 192 };
static symbol s_1_3[2] = { 213, 192 };
static symbol s_1_4[2] = { 197, 197 };
static symbol s_1_5[2] = { 201, 197 };
static symbol s_1_6[2] = { 207, 197 };
static symbol s_1_7[2] = { 217, 197 };
static symbol s_1_8[2] = { 201, 200 };
static symbol s_1_9[2] = { 217, 200 };
static symbol s_1_10[3] = { 201, 205, 201 };
static symbol s_1_11[3] = { 217, 205, 201 };
static symbol s_1_12[2] = { 197, 202 };
static symbol s_1_13[2] = { 201, 202 };
static symbol s_1_14[2] = { 207, 202 };
static symbol s_1_15[2] = { 217, 202 };
static symbol s_1_16[2] = { 197, 205 };
static symbol s_1_17[2] = { 201, 205 };
static symbol s_1_18[2] = { 207, 205 };
static symbol s_1_19[2] = { 217, 205 };
static symbol s_1_20[3] = { 197, 199, 207 };
static symbol s_1_21[3] = { 207, 199, 207 };
static symbol s_1_22[2] = { 193, 209 };
static symbol s_1_23[2] = { 209, 209 };
static symbol s_1_24[3] = { 197, 205, 213 };
static symbol s_1_25[3] = { 207, 205, 213 };
static struct among a_1[26] =
{
/* 0 */ { 2, s_1_0, -1, 1, 0},
/* 1 */ { 2, s_1_1, -1, 1, 0},
/* 2 */ { 2, s_1_2, -1, 1, 0},
/* 3 */ { 2, s_1_3, -1, 1, 0},
/* 4 */ { 2, s_1_4, -1, 1, 0},
/* 5 */ { 2, s_1_5, -1, 1, 0},
/* 6 */ { 2, s_1_6, -1, 1, 0},
/* 7 */ { 2, s_1_7, -1, 1, 0},
/* 8 */ { 2, s_1_8, -1, 1, 0},
/* 9 */ { 2, s_1_9, -1, 1, 0},
/* 10 */ { 3, s_1_10, -1, 1, 0},
/* 11 */ { 3, s_1_11, -1, 1, 0},
/* 12 */ { 2, s_1_12, -1, 1, 0},
/* 13 */ { 2, s_1_13, -1, 1, 0},
/* 14 */ { 2, s_1_14, -1, 1, 0},
/* 15 */ { 2, s_1_15, -1, 1, 0},
/* 16 */ { 2, s_1_16, -1, 1, 0},
/* 17 */ { 2, s_1_17, -1, 1, 0},
/* 18 */ { 2, s_1_18, -1, 1, 0},
/* 19 */ { 2, s_1_19, -1, 1, 0},
/* 20 */ { 3, s_1_20, -1, 1, 0},
/* 21 */ { 3, s_1_21, -1, 1, 0},
/* 22 */ { 2, s_1_22, -1, 1, 0},
/* 23 */ { 2, s_1_23, -1, 1, 0},
/* 24 */ { 3, s_1_24, -1, 1, 0},
/* 25 */ { 3, s_1_25, -1, 1, 0}
};
static symbol s_2_0[2] = { 197, 205 };
static symbol s_2_1[2] = { 206, 206 };
static symbol s_2_2[2] = { 215, 219 };
static symbol s_2_3[3] = { 201, 215, 219 };
static symbol s_2_4[3] = { 217, 215, 219 };
static symbol s_2_5[1] = { 221 };
static symbol s_2_6[2] = { 192, 221 };
static symbol s_2_7[3] = { 213, 192, 221 };
static struct among a_2[8] =
{
/* 0 */ { 2, s_2_0, -1, 1, 0},
/* 1 */ { 2, s_2_1, -1, 1, 0},
/* 2 */ { 2, s_2_2, -1, 1, 0},
/* 3 */ { 3, s_2_3, 2, 2, 0},
/* 4 */ { 3, s_2_4, 2, 2, 0},
/* 5 */ { 1, s_2_5, -1, 1, 0},
/* 6 */ { 2, s_2_6, 5, 1, 0},
/* 7 */ { 3, s_2_7, 6, 2, 0}
};
static symbol s_3_0[2] = { 211, 209 };
static symbol s_3_1[2] = { 211, 216 };
static struct among a_3[2] =
{
/* 0 */ { 2, s_3_0, -1, 1, 0},
/* 1 */ { 2, s_3_1, -1, 1, 0}
};
static symbol s_4_0[1] = { 192 };
static symbol s_4_1[2] = { 213, 192 };
static symbol s_4_2[2] = { 204, 193 };
static symbol s_4_3[3] = { 201, 204, 193 };
static symbol s_4_4[3] = { 217, 204, 193 };
static symbol s_4_5[2] = { 206, 193 };
static symbol s_4_6[3] = { 197, 206, 193 };
static symbol s_4_7[3] = { 197, 212, 197 };
static symbol s_4_8[3] = { 201, 212, 197 };
static symbol s_4_9[3] = { 202, 212, 197 };
static symbol s_4_10[4] = { 197, 202, 212, 197 };
static symbol s_4_11[4] = { 213, 202, 212, 197 };
static symbol s_4_12[2] = { 204, 201 };
static symbol s_4_13[3] = { 201, 204, 201 };
static symbol s_4_14[3] = { 217, 204, 201 };
static symbol s_4_15[1] = { 202 };
static symbol s_4_16[2] = { 197, 202 };
static symbol s_4_17[2] = { 213, 202 };
static symbol s_4_18[1] = { 204 };
static symbol s_4_19[2] = { 201, 204 };
static symbol s_4_20[2] = { 217, 204 };
static symbol s_4_21[2] = { 197, 205 };
static symbol s_4_22[2] = { 201, 205 };
static symbol s_4_23[2] = { 217, 205 };
static symbol s_4_24[1] = { 206 };
static symbol s_4_25[2] = { 197, 206 };
static symbol s_4_26[2] = { 204, 207 };
static symbol s_4_27[3] = { 201, 204, 207 };
static symbol s_4_28[3] = { 217, 204, 207 };
static symbol s_4_29[2] = { 206, 207 };
static symbol s_4_30[3] = { 197, 206, 207 };
static symbol s_4_31[3] = { 206, 206, 207 };
static symbol s_4_32[2] = { 192, 212 };
static symbol s_4_33[3] = { 213, 192, 212 };
static symbol s_4_34[2] = { 197, 212 };
static symbol s_4_35[3] = { 213, 197, 212 };
static symbol s_4_36[2] = { 201, 212 };
static symbol s_4_37[2] = { 209, 212 };
static symbol s_4_38[2] = { 217, 212 };
static symbol s_4_39[2] = { 212, 216 };
static symbol s_4_40[3] = { 201, 212, 216 };
static symbol s_4_41[3] = { 217, 212, 216 };
static symbol s_4_42[3] = { 197, 219, 216 };
static symbol s_4_43[3] = { 201, 219, 216 };
static symbol s_4_44[2] = { 206, 217 };
static symbol s_4_45[3] = { 197, 206, 217 };
static struct among a_4[46] =
{
/* 0 */ { 1, s_4_0, -1, 2, 0},
/* 1 */ { 2, s_4_1, 0, 2, 0},
/* 2 */ { 2, s_4_2, -1, 1, 0},
/* 3 */ { 3, s_4_3, 2, 2, 0},
/* 4 */ { 3, s_4_4, 2, 2, 0},
/* 5 */ { 2, s_4_5, -1, 1, 0},
/* 6 */ { 3, s_4_6, 5, 2, 0},
/* 7 */ { 3, s_4_7, -1, 1, 0},
/* 8 */ { 3, s_4_8, -1, 2, 0},
/* 9 */ { 3, s_4_9, -1, 1, 0},
/* 10 */ { 4, s_4_10, 9, 2, 0},
/* 11 */ { 4, s_4_11, 9, 2, 0},
/* 12 */ { 2, s_4_12, -1, 1, 0},
/* 13 */ { 3, s_4_13, 12, 2, 0},
/* 14 */ { 3, s_4_14, 12, 2, 0},
/* 15 */ { 1, s_4_15, -1, 1, 0},
/* 16 */ { 2, s_4_16, 15, 2, 0},
/* 17 */ { 2, s_4_17, 15, 2, 0},
/* 18 */ { 1, s_4_18, -1, 1, 0},
/* 19 */ { 2, s_4_19, 18, 2, 0},
/* 20 */ { 2, s_4_20, 18, 2, 0},
/* 21 */ { 2, s_4_21, -1, 1, 0},
/* 22 */ { 2, s_4_22, -1, 2, 0},
/* 23 */ { 2, s_4_23, -1, 2, 0},
/* 24 */ { 1, s_4_24, -1, 1, 0},
/* 25 */ { 2, s_4_25, 24, 2, 0},
/* 26 */ { 2, s_4_26, -1, 1, 0},
/* 27 */ { 3, s_4_27, 26, 2, 0},
/* 28 */ { 3, s_4_28, 26, 2, 0},
/* 29 */ { 2, s_4_29, -1, 1, 0},
/* 30 */ { 3, s_4_30, 29, 2, 0},
/* 31 */ { 3, s_4_31, 29, 1, 0},
/* 32 */ { 2, s_4_32, -1, 1, 0},
/* 33 */ { 3, s_4_33, 32, 2, 0},
/* 34 */ { 2, s_4_34, -1, 1, 0},
/* 35 */ { 3, s_4_35, 34, 2, 0},
/* 36 */ { 2, s_4_36, -1, 2, 0},
/* 37 */ { 2, s_4_37, -1, 2, 0},
/* 38 */ { 2, s_4_38, -1, 2, 0},
/* 39 */ { 2, s_4_39, -1, 1, 0},
/* 40 */ { 3, s_4_40, 39, 2, 0},
/* 41 */ { 3, s_4_41, 39, 2, 0},
/* 42 */ { 3, s_4_42, -1, 1, 0},
/* 43 */ { 3, s_4_43, -1, 2, 0},
/* 44 */ { 2, s_4_44, -1, 1, 0},
/* 45 */ { 3, s_4_45, 44, 2, 0}
};
static symbol s_5_0[1] = { 192 };
static symbol s_5_1[2] = { 201, 192 };
static symbol s_5_2[2] = { 216, 192 };
static symbol s_5_3[1] = { 193 };
static symbol s_5_4[1] = { 197 };
static symbol s_5_5[2] = { 201, 197 };
static symbol s_5_6[2] = { 216, 197 };
static symbol s_5_7[2] = { 193, 200 };
static symbol s_5_8[2] = { 209, 200 };
static symbol s_5_9[3] = { 201, 209, 200 };
static symbol s_5_10[1] = { 201 };
static symbol s_5_11[2] = { 197, 201 };
static symbol s_5_12[2] = { 201, 201 };
static symbol s_5_13[3] = { 193, 205, 201 };
static symbol s_5_14[3] = { 209, 205, 201 };
static symbol s_5_15[4] = { 201, 209, 205, 201 };
static symbol s_5_16[1] = { 202 };
static symbol s_5_17[2] = { 197, 202 };
static symbol s_5_18[3] = { 201, 197, 202 };
static symbol s_5_19[2] = { 201, 202 };
static symbol s_5_20[2] = { 207, 202 };
static symbol s_5_21[2] = { 193, 205 };
static symbol s_5_22[2] = { 197, 205 };
static symbol s_5_23[3] = { 201, 197, 205 };
static symbol s_5_24[2] = { 207, 205 };
static symbol s_5_25[2] = { 209, 205 };
static symbol s_5_26[3] = { 201, 209, 205 };
static symbol s_5_27[1] = { 207 };
static symbol s_5_28[1] = { 209 };
static symbol s_5_29[2] = { 201, 209 };
static symbol s_5_30[2] = { 216, 209 };
static symbol s_5_31[1] = { 213 };
static symbol s_5_32[2] = { 197, 215 };
static symbol s_5_33[2] = { 207, 215 };
static symbol s_5_34[1] = { 216 };
static symbol s_5_35[1] = { 217 };
static struct among a_5[36] =
{
/* 0 */ { 1, s_5_0, -1, 1, 0},
/* 1 */ { 2, s_5_1, 0, 1, 0},
/* 2 */ { 2, s_5_2, 0, 1, 0},
/* 3 */ { 1, s_5_3, -1, 1, 0},
/* 4 */ { 1, s_5_4, -1, 1, 0},
/* 5 */ { 2, s_5_5, 4, 1, 0},
/* 6 */ { 2, s_5_6, 4, 1, 0},
/* 7 */ { 2, s_5_7, -1, 1, 0},
/* 8 */ { 2, s_5_8, -1, 1, 0},
/* 9 */ { 3, s_5_9, 8, 1, 0},
/* 10 */ { 1, s_5_10, -1, 1, 0},
/* 11 */ { 2, s_5_11, 10, 1, 0},
/* 12 */ { 2, s_5_12, 10, 1, 0},
/* 13 */ { 3, s_5_13, 10, 1, 0},
/* 14 */ { 3, s_5_14, 10, 1, 0},
/* 15 */ { 4, s_5_15, 14, 1, 0},
/* 16 */ { 1, s_5_16, -1, 1, 0},
/* 17 */ { 2, s_5_17, 16, 1, 0},
/* 18 */ { 3, s_5_18, 17, 1, 0},
/* 19 */ { 2, s_5_19, 16, 1, 0},
/* 20 */ { 2, s_5_20, 16, 1, 0},
/* 21 */ { 2, s_5_21, -1, 1, 0},
/* 22 */ { 2, s_5_22, -1, 1, 0},
/* 23 */ { 3, s_5_23, 22, 1, 0},
/* 24 */ { 2, s_5_24, -1, 1, 0},
/* 25 */ { 2, s_5_25, -1, 1, 0},
/* 26 */ { 3, s_5_26, 25, 1, 0},
/* 27 */ { 1, s_5_27, -1, 1, 0},
/* 28 */ { 1, s_5_28, -1, 1, 0},
/* 29 */ { 2, s_5_29, 28, 1, 0},
/* 30 */ { 2, s_5_30, 28, 1, 0},
/* 31 */ { 1, s_5_31, -1, 1, 0},
/* 32 */ { 2, s_5_32, -1, 1, 0},
/* 33 */ { 2, s_5_33, -1, 1, 0},
/* 34 */ { 1, s_5_34, -1, 1, 0},
/* 35 */ { 1, s_5_35, -1, 1, 0}
};
static symbol s_6_0[3] = { 207, 211, 212 };
static symbol s_6_1[4] = { 207, 211, 212, 216 };
static struct among a_6[2] =
{
/* 0 */ { 3, s_6_0, -1, 1, 0},
/* 1 */ { 4, s_6_1, -1, 1, 0}
};
static symbol s_7_0[4] = { 197, 202, 219, 197 };
static symbol s_7_1[1] = { 206 };
static symbol s_7_2[1] = { 216 };
static symbol s_7_3[3] = { 197, 202, 219 };
static struct among a_7[4] =
{
/* 0 */ { 4, s_7_0, -1, 1, 0},
/* 1 */ { 1, s_7_1, -1, 2, 0},
/* 2 */ { 1, s_7_2, -1, 3, 0},
/* 3 */ { 3, s_7_3, -1, 1, 0}
};
static unsigned char g_v[] = { 35, 130, 34, 18 };
static symbol s_0[] = { 193 };
static symbol s_1[] = { 209 };
static symbol s_2[] = { 193 };
static symbol s_3[] = { 209 };
static symbol s_4[] = { 193 };
static symbol s_5[] = { 209 };
static symbol s_6[] = { 206 };
static symbol s_7[] = { 206 };
static symbol s_8[] = { 206 };
static symbol s_9[] = { 201 };
static int r_mark_regions(struct SN_env * z) {
z->I[0] = z->l;
z->I[1] = z->l;
{ int c = z->c; /* do, line 100 */
while(1) { /* gopast, line 101 */
if (!(in_grouping(z, g_v, 192, 220))) goto lab1;
break;
lab1:
if (z->c >= z->l) goto lab0;
z->c++;
}
z->I[0] = z->c; /* setmark pV, line 101 */
while(1) { /* gopast, line 101 */
if (!(out_grouping(z, g_v, 192, 220))) goto lab2;
break;
lab2:
if (z->c >= z->l) goto lab0;
z->c++;
}
while(1) { /* gopast, line 102 */
if (!(in_grouping(z, g_v, 192, 220))) goto lab3;
break;
lab3:
if (z->c >= z->l) goto lab0;
z->c++;
}
while(1) { /* gopast, line 102 */
if (!(out_grouping(z, g_v, 192, 220))) goto lab4;
break;
lab4:
if (z->c >= z->l) goto lab0;
z->c++;
}
z->I[1] = z->c; /* setmark p2, line 102 */
lab0:
z->c = c;
}
return 1;
}
static int r_R2(struct SN_env * z) {
if (!(z->I[1] <= z->c)) return 0;
return 1;
}
static int r_perfective_gerund(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 111 */
among_var = find_among_b(z, a_0, 9); /* substring, line 111 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 111 */
switch(among_var) {
case 0: return 0;
case 1:
{ int m = z->l - z->c; /* or, line 115 */
if (!(eq_s_b(z, 1, s_0))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m;
if (!(eq_s_b(z, 1, s_1))) return 0;
}
lab0:
slice_del(z); /* delete, line 115 */
break;
case 2:
slice_del(z); /* delete, line 122 */
break;
}
return 1;
}
static int r_adjective(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 127 */
among_var = find_among_b(z, a_1, 26); /* substring, line 127 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 127 */
switch(among_var) {
case 0: return 0;
case 1:
slice_del(z); /* delete, line 136 */
break;
}
return 1;
}
static int r_adjectival(struct SN_env * z) {
int among_var;
if (!r_adjective(z)) return 0; /* call adjective, line 141 */
{ int m = z->l - z->c; /* try, line 148 */
z->ket = z->c; /* [, line 149 */
among_var = find_among_b(z, a_2, 8); /* substring, line 149 */
if (!(among_var)) { z->c = z->l - m; goto lab0; }
z->bra = z->c; /* ], line 149 */
switch(among_var) {
case 0: { z->c = z->l - m; goto lab0; }
case 1:
{ int m = z->l - z->c; /* or, line 154 */
if (!(eq_s_b(z, 1, s_2))) goto lab2;
goto lab1;
lab2:
z->c = z->l - m;
if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; }
}
lab1:
slice_del(z); /* delete, line 154 */
break;
case 2:
slice_del(z); /* delete, line 161 */
break;
}
lab0:
;
}
return 1;
}
static int r_reflexive(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 168 */
among_var = find_among_b(z, a_3, 2); /* substring, line 168 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 168 */
switch(among_var) {
case 0: return 0;
case 1:
slice_del(z); /* delete, line 171 */
break;
}
return 1;
}
static int r_verb(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 176 */
among_var = find_among_b(z, a_4, 46); /* substring, line 176 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 176 */
switch(among_var) {
case 0: return 0;
case 1:
{ int m = z->l - z->c; /* or, line 182 */
if (!(eq_s_b(z, 1, s_4))) goto lab1;
goto lab0;
lab1:
z->c = z->l - m;
if (!(eq_s_b(z, 1, s_5))) return 0;
}
lab0:
slice_del(z); /* delete, line 182 */
break;
case 2:
slice_del(z); /* delete, line 190 */
break;
}
return 1;
}
static int r_noun(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 199 */
among_var = find_among_b(z, a_5, 36); /* substring, line 199 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 199 */
switch(among_var) {
case 0: return 0;
case 1:
slice_del(z); /* delete, line 206 */
break;
}
return 1;
}
static int r_derivational(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 215 */
among_var = find_among_b(z, a_6, 2); /* substring, line 215 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 215 */
if (!r_R2(z)) return 0; /* call R2, line 215 */
switch(among_var) {
case 0: return 0;
case 1:
slice_del(z); /* delete, line 218 */
break;
}
return 1;
}
static int r_tidy_up(struct SN_env * z) {
int among_var;
z->ket = z->c; /* [, line 223 */
among_var = find_among_b(z, a_7, 4); /* substring, line 223 */
if (!(among_var)) return 0;
z->bra = z->c; /* ], line 223 */
switch(among_var) {
case 0: return 0;
case 1:
slice_del(z); /* delete, line 227 */
z->ket = z->c; /* [, line 228 */
if (!(eq_s_b(z, 1, s_6))) return 0;
z->bra = z->c; /* ], line 228 */
if (!(eq_s_b(z, 1, s_7))) return 0;
slice_del(z); /* delete, line 228 */
break;
case 2:
if (!(eq_s_b(z, 1, s_8))) return 0;
slice_del(z); /* delete, line 231 */
break;
case 3:
slice_del(z); /* delete, line 233 */
break;
}
return 1;
}
extern int russian_stem(struct SN_env * z) {
{ int c = z->c; /* do, line 240 */
if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */
lab0:
z->c = c;
}
z->lb = z->c; z->c = z->l; /* backwards, line 241 */
{ int m = z->l - z->c; /* setlimit, line 241 */
int m3;
if (z->c < z->I[0]) return 0;
z->c = z->I[0]; /* tomark, line 241 */
m3 = z->lb; z->lb = z->c;
z->c = z->l - m;
{ int m = z->l - z->c; /* do, line 242 */
{ int m = z->l - z->c; /* or, line 243 */
if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */
goto lab2;
lab3:
z->c = z->l - m;
{ int m = z->l - z->c; /* try, line 244 */
if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */
lab4:
;
}
{ int m = z->l - z->c; /* or, line 245 */
if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */
goto lab5;
lab6:
z->c = z->l - m;
if (!r_verb(z)) goto lab7; /* call verb, line 245 */
goto lab5;
lab7:
z->c = z->l - m;
if (!r_noun(z)) goto lab1; /* call noun, line 245 */
}
lab5:
;
}
lab2:
lab1:
z->c = z->l - m;
}
{ int m = z->l - z->c; /* try, line 248 */
z->ket = z->c; /* [, line 248 */
if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; }
z->bra = z->c; /* ], line 248 */
slice_del(z); /* delete, line 248 */
lab8:
;
}
{ int m = z->l - z->c; /* do, line 251 */
if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */
lab9:
z->c = z->l - m;
}
{ int m = z->l - z->c; /* do, line 252 */
if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */
lab10:
z->c = z->l - m;
}
z->lb = m3;
}
z->c = z->lb;
return 1;
}
extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); }
extern void russian_close_env(struct SN_env * z) { SN_close_env(z); }
/* This file was generated automatically by the Snowball to ANSI C compiler */
extern struct SN_env * russian_create_env(void);
extern void russian_close_env(struct SN_env * z);
extern int russian_stem(struct SN_env * z);
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "header.h"
#define unless(C) if(!(C))
#define CREATE_SIZE 1
extern symbol * create_s(void)
{ symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)));
CAPACITY(p) = CREATE_SIZE;
SET_SIZE(p, CREATE_SIZE);
return p;
}
extern void lose_s(symbol * p) { free((char *) p - HEAD); }
extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
{ if (z->c >= z->l) return 0;
{ int ch = z->p[z->c];
if
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c++; return 1;
}
extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
{ if (z->c <= z->lb) return 0;
{ int ch = z->p[z->c - 1];
if
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c--; return 1;
}
extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
{ if (z->c >= z->l) return 0;
{ int ch = z->p[z->c];
unless
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c++; return 1;
}
extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
{ if (z->c <= z->lb) return 0;
{ int ch = z->p[z->c - 1];
unless
(ch > max || (ch -= min) < 0 ||
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
}
z->c--; return 1;
}
extern int in_range(struct SN_env * z, int min, int max)
{ if (z->c >= z->l) return 0;
{ int ch = z->p[z->c];
if
(ch > max || ch < min) return 0;
}
z->c++; return 1;
}
extern int in_range_b(struct SN_env * z, int min, int max)
{ if (z->c <= z->lb) return 0;
{ int ch = z->p[z->c - 1];
if
(ch > max || ch < min) return 0;
}
z->c--; return 1;
}
extern int out_range(struct SN_env * z, int min, int max)
{ if (z->c >= z->l) return 0;
{ int ch = z->p[z->c];
unless
(ch > max || ch < min) return 0;
}
z->c++; return 1;
}
extern int out_range_b(struct SN_env * z, int min, int max)
{ if (z->c <= z->lb) return 0;
{ int ch = z->p[z->c - 1];
unless
(ch > max || ch < min) return 0;
}
z->c--; return 1;
}
extern int eq_s(struct SN_env * z, int s_size, symbol * s)
{ if (z->l - z->c < s_size ||
memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
z->c += s_size; return 1;
}
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
{ if (z->c - z->lb < s_size ||
memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
z->c -= s_size; return 1;
}
extern int eq_v(struct SN_env * z, symbol * p)
{ return eq_s(z, SIZE(p), p);
}
extern int eq_v_b(struct SN_env * z, symbol * p)
{ return eq_s_b(z, SIZE(p), p);
}
extern int find_among(struct SN_env * z, struct among * v, int v_size)
{
int i = 0;
int j = v_size;
int c = z->c; int l = z->l;
symbol * q = z->p + c;
struct among * w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while(1)
{ int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j; /* smaller */
w = v + k;
{ int i; for (i = common; i < w->s_size; i++)
{ if (c + common == l) { diff = -1; break; }
diff = q[common] - w->s[i];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1)
{ if (i > 0) break; /* v->s has been inspected */
if (j == i) break; /* only one item in v */
/* - but now we need to go round once more to get
v->s inspected. This looks messy, but is actually
the optimal approach. */
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1)
{ w = v + i;
if (common_i >= w->s_size)
{ z->c = c + w->s_size;
if (w->function == 0) return w->result;
{ int res = w->function(z);
z->c = c + w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}
/* find_among_b is for backwards processing. Same comments apply */
extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
{
int i = 0;
int j = v_size;
int c = z->c; int lb = z->lb;
symbol * q = z->p + c - 1;
struct among * w;
int common_i = 0;
int common_j = 0;
int first_key_inspected = 0;
while(1)
{ int k = i + ((j - i) >> 1);
int diff = 0;
int common = common_i < common_j ? common_i : common_j;
w = v + k;
{ int i; for (i = w->s_size - 1 - common; i >= 0; i--)
{ if (c - common == lb) { diff = -1; break; }
diff = q[- common] - w->s[i];
if (diff != 0) break;
common++;
}
}
if (diff < 0) { j = k; common_j = common; }
else { i = k; common_i = common; }
if (j - i <= 1)
{ if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = 1;
}
}
while(1)
{ w = v + i;
if (common_i >= w->s_size)
{ z->c = c - w->s_size;
if (w->function == 0) return w->result;
{ int res = w->function(z);
z->c = c - w->s_size;
if (res) return w->result;
}
}
i = w->substring_i;
if (i < 0) return 0;
}
}
extern symbol * increase_size(symbol * p, int n)
{ int new_size = n + 20;
symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol)));
CAPACITY(q) = new_size;
memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q;
}
/* to replace symbols between c_bra and c_ket in z->p by the
s_size symbols at s
*/
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s)
{ int adjustment = s_size - (c_ket - c_bra);
int len = SIZE(z->p);
if (adjustment != 0)
{ if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len);
memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol));
SET_SIZE(z->p, adjustment + len);
z->l += adjustment;
if (z->c >= c_ket) z->c += adjustment; else
if (z->c > c_bra) z->c = c_bra;
}
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
return adjustment;
}
static void slice_check(struct SN_env * z)
{
if (!(0 <= z->bra &&
z->bra <= z->ket &&
z->ket <= z->l &&
z->l <= SIZE(z->p))) /* this line could be removed */
{
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
exit(1);
}
}
extern void slice_from_s(struct SN_env * z, int s_size, symbol * s)
{ slice_check(z);
replace_s(z, z->bra, z->ket, s_size, s);
}
extern void slice_from_v(struct SN_env * z, symbol * p)
{ slice_from_s(z, SIZE(p), p);
}
extern void slice_del(struct SN_env * z)
{ slice_from_s(z, 0, 0);
}
extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
{ int adjustment = replace_s(z, bra, ket, s_size, s);
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
}
extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p)
{ int adjustment = replace_s(z, bra, ket, SIZE(p), p);
if (bra <= z->bra) z->bra += adjustment;
if (bra <= z->ket) z->ket += adjustment;
}
extern symbol * slice_to(struct SN_env * z, symbol * p)
{ slice_check(z);
{ int len = z->ket - z->bra;
if (CAPACITY(p) < len) p = increase_size(p, len);
memmove(p, z->p + z->bra, len * sizeof(symbol));
SET_SIZE(p, len);
}
return p;
}
extern symbol * assign_to(struct SN_env * z, symbol * p)
{ int len = z->l;
if (CAPACITY(p) < len) p = increase_size(p, len);
memmove(p, z->p, len * sizeof(symbol));
SET_SIZE(p, len);
return p;
}
extern void debug(struct SN_env * z, int number, int line_count)
{ int i;
int limit = SIZE(z->p);
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++)
{ if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
}
printf("'\n");
}
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of seg.sql.
--
\set ECHO none
\i tsearch2.sql
\set ECHO all
--tsvector
SELECT '1'::tsvector;
SELECT '1 '::tsvector;
SELECT ' 1'::tsvector;
SELECT ' 1 '::tsvector;
SELECT '1 2'::tsvector;
SELECT '\'1 2\''::tsvector;
SELECT '\'1 \\\'2\''::tsvector;
SELECT '\'1 \\\'2\'3'::tsvector;
SELECT '\'1 \\\'2\' 3'::tsvector;
SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector;
select '\'w\':4A,3B,2C,1D,5 a:8';
select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
--tsquery
SELECT '1'::tsquery;
SELECT '1 '::tsquery;
SELECT ' 1'::tsquery;
SELECT ' 1 '::tsquery;
SELECT '\'1 2\''::tsquery;
SELECT '\'1 \\\'2\''::tsquery;
SELECT '!1'::tsquery;
SELECT '1|2'::tsquery;
SELECT '1|!2'::tsquery;
SELECT '!1|2'::tsquery;
SELECT '!1|!2'::tsquery;
SELECT '!(!1|!2)'::tsquery;
SELECT '!(!1|2)'::tsquery;
SELECT '!(1|!2)'::tsquery;
SELECT '!(1|2)'::tsquery;
SELECT '1&2'::tsquery;
SELECT '!1&2'::tsquery;
SELECT '1&!2'::tsquery;
SELECT '!1&!2'::tsquery;
SELECT '(1&2)'::tsquery;
SELECT '1&(2)'::tsquery;
SELECT '!(1)&2'::tsquery;
SELECT '!(1&2)'::tsquery;
SELECT '1|2&3'::tsquery;
SELECT '1|(2&3)'::tsquery;
SELECT '(1|2)&3'::tsquery;
SELECT '1|2&!3'::tsquery;
SELECT '1|!2&3'::tsquery;
SELECT '!1|2&3'::tsquery;
SELECT '!1|(2&3)'::tsquery;
SELECT '!(1|2)&3'::tsquery;
SELECT '(!1|2)&3'::tsquery;
SELECT '1|(2|(4|(5|6)))'::tsquery;
SELECT '1|2|4|5|6'::tsquery;
SELECT '1&(2&(4&(5&6)))'::tsquery;
SELECT '1&2&4&5&6'::tsquery;
SELECT '1&(2&(4&(5|6)))'::tsquery;
SELECT '1&(2&(4&(5|!6)))'::tsquery;
SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery;
SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a';
select lexize('simple', 'ASD56 hsdkf');
select lexize('en_stem', 'SKIES Problems identity');
select * from token_type('default');
select * from parse('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty');
SELECT to_tsvector('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty');
SELECT length(to_tsvector('default', '345 qw'));
SELECT length(to_tsvector('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234
<i <b> wow < jqw <> qwerty'));
select to_tsquery('default', 'qwe & sKies ');
select to_tsquery('simple', 'qwe & sKies ');
select to_tsquery('default', '\'the wether\':dc & \' sKies \':BC ');
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca';
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B';
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A';
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C';
select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB';
CREATE TABLE test_tsvector( t text, a tsvector );
\copy test_tsvector from 'data/test_tsearch.data'
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
create index wowidx on test_tsvector using gist (a);
set enable_seqscan=off;
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
select set_curcfg('default');
CREATE TRIGGER tsvectorupdate
BEFORE UPDATE OR INSERT ON test_tsvector
FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t);
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
INSERT INTO test_tsvector (t) VALUES ('345 qwerty');
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
UPDATE test_tsvector SET t = null WHERE t = '345 qwerty';
SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
drop trigger tsvectorupdate on test_tsvector;
create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql;
create trigger tsvectorupdate before update or insert on test_tsvector
for each row execute procedure tsearch2(a, wow, t);
insert into test_tsvector (t) values ('345 qwerty');
select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty');
select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
select rank(' a:1 s:2C d g'::tsvector, 'a | s');
select rank(' a:1 s:2B d g'::tsvector, 'a | s');
select rank(' a:1 s:2 d g'::tsvector, 'a | s');
select rank(' a:1 s:2C d g'::tsvector, 'a & s');
select rank(' a:1 s:2B d g'::tsvector, 'a & s');
select rank(' a:1 s:2 d g'::tsvector, 'a & s');
insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
select reset_tsearch();
select to_tsquery('default', 'skies & books');
select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('sea&thousand&years'));
select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('granite&sea'));
select rank_cd(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('sea'));
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('sea&thousand&years'));
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('granite&sea'));
select get_covers(to_tsvector('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
'), to_tsquery('sea'));
select headline('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
', to_tsquery('sea&thousand&years'));
select headline('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
', to_tsquery('granite&sea'));
select headline('Erosion It took the sea a thousand years,
A thousand years to trace
The granite features of this cliff
In crag and scarp and base.
It took the sea an hour one night
An hour of storm to place
The sculpture of these granite seams,
Upon a woman s face. E. J. Pratt (1882 1964)
', to_tsquery('sea'));
/*
* stopword library
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "common.h"
#include "dict.h"
#define STOPBUFLEN 4096
char*
lowerstr(char *str) {
char *ptr=str;
while(*ptr) {
*ptr = tolower(*(unsigned char*)ptr);
ptr++;
}
return str;
}
void
freestoplist(StopList *s) {
char **ptr=s->stop;
if ( ptr )
while( *ptr && s->len >0 ) {
free(*ptr);
ptr++; s->len--;
free(s->stop);
}
memset(s,0,sizeof(StopList));
}
void
readstoplist(text *in, StopList *s) {
char **stop=NULL;
s->len=0;
if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
char *filename=text2char(in);
FILE *hin=NULL;
char buf[STOPBUFLEN];
int reallen=0;
if ( (hin=fopen(filename,"r")) == NULL )
elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
while( fgets(buf,STOPBUFLEN,hin) ) {
buf[strlen(buf)-1] = '\0';
if ( *buf=='\0' ) continue;
if ( s->len>= reallen ) {
char **tmp;
reallen=(reallen) ? reallen*2 : 16;
tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
if (!tmp) {
freestoplist(s);
fclose(hin);
elog(ERROR,"Not enough memory");
}
stop=tmp;
}
stop[s->len]=strdup(buf);
if ( !stop[s->len] ) {
freestoplist(s);
fclose(hin);
elog(ERROR,"Not enough memory");
}
if ( s->wordop )
stop[s->len]=(s->wordop)(stop[s->len]);
(s->len)++;
}
fclose(hin);
pfree(filename);
}
s->stop=stop;
}
static int
comparestr(const void *a, const void *b) {
return strcmp( *(char**)a, *(char**)b );
}
void
sortstoplist(StopList *s) {
if (s->stop && s->len>0)
qsort(s->stop, s->len, sizeof(char*), comparestr);
}
bool
searchstoplist(StopList *s, char *key) {
if ( s->wordop )
key=(*(s->wordop))(key);
return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
}
i
me
my
myself
we
our
ours
ourselves
you
your
yours
yourself
yourselves
he
him
his
himself
she
her
hers
herself
it
its
itself
they
them
their
theirs
themselves
what
which
who
whom
this
that
these
those
am
is
are
was
were
be
been
being
have
has
had
having
do
does
did
doing
a
an
the
and
but
if
or
because
as
until
while
of
at
by
for
with
about
against
between
into
through
during
before
after
above
below
to
from
up
down
in
out
on
off
over
under
again
further
then
once
here
there
when
where
why
how
all
any
both
each
few
more
most
other
some
such
no
nor
not
only
own
same
so
than
too
very
s
t
can
will
just
don
should
now
/*
* interface functions to tscfg
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <locale.h>
#include "postgres.h"
#include "fmgr.h"
#include "utils/array.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "ts_cfg.h"
#include "dict.h"
#include "wparser.h"
#include "snmap.h"
#include "common.h"
#include "tsvector.h"
/*********top interface**********/
static void *plan_getcfg_bylocale=NULL;
static void *plan_getcfg=NULL;
static void *plan_getmap=NULL;
static void *plan_name2id=NULL;
static Oid current_cfg_id=0;
void
init_cfg(Oid id, TSCfgInfo *cfg) {
Oid arg[2]={ OIDOID, OIDOID };
bool isnull;
Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ;
int stat,i,j;
text *ptr;
text *prsname=NULL;
MemoryContext oldcontext;
memset(cfg,0,sizeof(TSCfgInfo));
SPI_connect();
if ( !plan_getcfg ) {
plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) );
if ( !plan_getcfg )
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getcfg, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 ) {
prsname = (text*) DatumGetPointer(
SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)
);
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
prsname = ptextdup( prsname );
MemoryContextSwitchTo(oldcontext);
cfg->id=id;
} else
ts_error(ERROR, "No tsearch cfg with id %d", id);
arg[0]=TEXTOID;
if ( !plan_getmap ) {
plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) );
if ( !plan_getmap )
ts_error(ERROR, "SPI_prepare() failed");
}
pars[0]=PointerGetDatum( prsname );
stat = SPI_execp(plan_getmap, pars, " ", 0);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed <= 0 )
ts_error(ERROR, "No parser with id %d", id);
for(i=0;i<SPI_processed;i++) {
int lexid = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull));
ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull));
ArrayType *a;
if ( !cfg->map ) {
cfg->len=lexid+1;
cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len );
if ( !cfg->map )
ts_error(ERROR,"No memory");
memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len );
}
if (isnull)
continue;
a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) );
if ( ARR_NDIM(a) != 1 )
ts_error(ERROR,"Wrong dimension");
if ( ARRNELEMS(a) < 1 )
continue;
cfg->map[lexid].len=ARRNELEMS(a);
cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len );
memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len );
ptr=(text*)ARR_DATA_PTR(a);
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
for(j=0;j<cfg->map[lexid].len;j++) {
cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr));
ptr=NEXTVAL(ptr);
}
MemoryContextSwitchTo(oldcontext);
if ( a != toasted_a )
pfree(a);
}
SPI_finish();
cfg->prs_id = name2id_prs( prsname );
pfree(prsname);
for(i=0;i<cfg->len;i++) {
for(j=0;j<cfg->map[i].len;j++) {
ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] );
cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) );
pfree(ptr);
}
}
}
typedef struct {
TSCfgInfo *last_cfg;
int len;
int reallen;
TSCfgInfo *list;
SNMap name2id_map;
} CFGList;
static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}};
void
reset_cfg(void) {
freeSNMap( &(CList.name2id_map) );
if ( CList.list ) {
int i,j;
for(i=0;i<CList.len;i++)
if ( CList.list[i].map ) {
for(j=0;j<CList.list[i].len;j++)
if ( CList.list[i].map[j].dict_id )
free(CList.list[i].map[j].dict_id);
free( CList.list[i].map );
}
free(CList.list);
}
memset(&CList,0,sizeof(CFGList));
}
static int
comparecfg(const void *a, const void *b) {
return ((TSCfgInfo*)a)->id - ((TSCfgInfo*)b)->id;
}
TSCfgInfo *
findcfg(Oid id) {
/* last used cfg */
if ( CList.last_cfg && CList.last_cfg->id==id )
return CList.last_cfg;
/* already used cfg */
if ( CList.len != 0 ) {
TSCfgInfo key;
key.id=id;
CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
if ( CList.last_cfg != NULL )
return CList.last_cfg;
}
/* last chance */
if ( CList.len==CList.reallen ) {
TSCfgInfo *tmp;
int reallen = ( CList.reallen ) ? 2*CList.reallen : 16;
tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen);
if ( !tmp )
ts_error(ERROR,"No memory");
CList.reallen=reallen;
CList.list=tmp;
}
CList.last_cfg=&(CList.list[CList.len]);
init_cfg(id, CList.last_cfg);
CList.len++;
qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg);
return findcfg(id); /* qsort changed order!! */;
}
Oid
name2id_cfg(text *name) {
Oid arg[1]={ TEXTOID };
bool isnull;
Datum pars[1]={ PointerGetDatum(name) };
int stat;
Oid id=findSNMap_t( &(CList.name2id_map), name );
if ( id )
return id;
SPI_connect();
if ( !plan_name2id ) {
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) );
if ( !plan_name2id )
elog(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if ( stat < 0 )
elog (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 ) {
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
if ( isnull )
elog(ERROR, "Null id for tsearch config");
} else
elog(ERROR, "No tsearch config");
SPI_finish();
addSNMap_t( &(CList.name2id_map), name, id );
return id;
}
void
parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) {
int type, lenlemm, i;
char *lemm=NULL;
WParserInfo *prsobj = findprs(cfg->prs_id);
prsobj->prs=(void*)DatumGetPointer(
FunctionCall2(
&(prsobj->start_info),
PointerGetDatum(buf),
Int32GetDatum(buflen)
)
);
while( ( type=DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm))) ) != 0 ) {
if ( lenlemm >= MAXSTRLEN )
elog(ERROR, "Word is too long");
if ( type >= cfg->len ) /* skip this type of lexem */
continue;
for(i=0;i<cfg->map[type].len;i++) {
DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
char **norms, **ptr;
norms = ptr = (char**)DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if ( !norms ) /* dictionary doesn't know this lexem */
continue;
prs->pos++; /*set pos*/
while( *ptr ) {
if (prs->curwords == prs->lenwords) {
prs->lenwords *= 2;
prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD));
}
prs->words[prs->curwords].len = strlen(*ptr);
prs->words[prs->curwords].word = *ptr;
prs->words[prs->curwords].alen = 0;
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
ptr++;
prs->curwords++;
}
pfree(norms);
break; /* lexem already normalized or is stop word*/
}
}
FunctionCall1(
&(prsobj->end_info),
PointerGetDatum(prsobj->prs)
);
}
static void
hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) {
while (prs->curwords >= prs->lenwords) {
prs->lenwords *= 2;
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
}
memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) );
prs->words[prs->curwords].type = (uint8)type;
prs->words[prs->curwords].len = buflen;
prs->words[prs->curwords].word = palloc(buflen);
memcpy(prs->words[prs->curwords].word, buf, buflen);
prs->curwords++;
}
static void
hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) {
int i;
ITEM *item=GETQUERY(query);
HLWORD *word=&( prs->words[prs->curwords-1] );
while (prs->curwords + query->size >= prs->lenwords) {
prs->lenwords *= 2;
prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD));
}
for(i=0; i<query->size; i++) {
if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) {
if ( word->item ) {
memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) );
prs->words[prs->curwords].item=item;
prs->words[prs->curwords].repeated=1;
prs->curwords++;
} else
word->item=item;
}
item++;
}
}
void
hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) {
int type, lenlemm, i;
char *lemm=NULL;
WParserInfo *prsobj = findprs(cfg->prs_id);
prsobj->prs=(void*)DatumGetPointer(
FunctionCall2(
&(prsobj->start_info),
PointerGetDatum(buf),
Int32GetDatum(buflen)
)
);
while( ( type=DatumGetInt32(FunctionCall3(
&(prsobj->getlexeme_info),
PointerGetDatum(prsobj->prs),
PointerGetDatum(&lemm),
PointerGetDatum(&lenlemm))) ) != 0 ) {
if ( lenlemm >= MAXSTRLEN )
elog(ERROR, "Word is too long");
hladdword(prs,lemm,lenlemm,type);
if ( type >= cfg->len )
continue;
for(i=0;i<cfg->map[type].len;i++) {
DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) );
char **norms, **ptr;
norms = ptr = (char**)DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(lemm),
PointerGetDatum(lenlemm)
)
);
if ( !norms ) /* dictionary doesn't know this lexem */
continue;
while( *ptr ) {
hlfinditem(prs,query,*ptr,strlen(*ptr));
pfree(*ptr);
ptr++;
}
pfree(norms);
break; /* lexem already normalized or is stop word*/
}
}
FunctionCall1(
&(prsobj->end_info),
PointerGetDatum(prsobj->prs)
);
}
text*
genhl(HLPRSTEXT * prs) {
text *out;
int len=128;
char *ptr;
HLWORD *wrd=prs->words;
out = (text*)palloc( len );
ptr=((char*)out) + VARHDRSZ;
while( wrd - prs->words < prs->curwords ) {
while ( wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) {
int dist = ptr - ((char*)out);
len*= 2;
out = (text *) repalloc(out, len);
ptr=((char*)out) + dist;
}
if ( wrd->in && !wrd->skip && !wrd->repeated ) {
if ( wrd->replace ) {
*ptr=' ';
ptr++;
} else {
if (wrd->selected) {
memcpy(ptr,prs->startsel,prs->startsellen);
ptr+=prs->startsellen;
}
memcpy(ptr,wrd->word,wrd->len);
ptr+=wrd->len;
if (wrd->selected) {
memcpy(ptr,prs->stopsel,prs->stopsellen);
ptr+=prs->stopsellen;
}
}
}
if ( !wrd->repeated )
pfree(wrd->word);
wrd++;
}
VARATT_SIZEP(out)=ptr - ((char*)out);
return out;
}
int
get_currcfg(void) {
Oid arg[1]={ TEXTOID };
const char *curlocale;
Datum pars[1];
bool isnull;
int stat;
if ( current_cfg_id > 0 )
return current_cfg_id;
SPI_connect();
if ( !plan_getcfg_bylocale ) {
plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) );
if ( !plan_getcfg_bylocale )
elog(ERROR, "SPI_prepare() failed");
}
curlocale = setlocale(LC_CTYPE, NULL);
pars[0] = PointerGetDatum( char2text((char*)curlocale) );
stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1);
if ( stat < 0 )
elog (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 )
current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
else
elog(ERROR,"Can't find tsearch config by locale");
pfree(DatumGetPointer(pars[0]));
SPI_finish();
return current_cfg_id;
}
PG_FUNCTION_INFO_V1(set_curcfg);
Datum set_curcfg(PG_FUNCTION_ARGS);
Datum
set_curcfg(PG_FUNCTION_ARGS) {
findcfg(PG_GETARG_OID(0));
current_cfg_id=PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curcfg_byname);
Datum set_curcfg_byname(PG_FUNCTION_ARGS);
Datum
set_curcfg_byname(PG_FUNCTION_ARGS) {
text *name=PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curcfg,
ObjectIdGetDatum( name2id_cfg(name) )
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(show_curcfg);
Datum show_curcfg(PG_FUNCTION_ARGS);
Datum
show_curcfg(PG_FUNCTION_ARGS) {
PG_RETURN_OID( get_currcfg() );
}
PG_FUNCTION_INFO_V1(reset_tsearch);
Datum reset_tsearch(PG_FUNCTION_ARGS);
Datum
reset_tsearch(PG_FUNCTION_ARGS) {
ts_error(NOTICE,"TSearch cache cleaned");
PG_RETURN_VOID();
}
#ifndef __TS_CFG_H__
#define __TS_CFG_H__
#include "postgres.h"
#include "query.h"
typedef struct {
int len;
Datum *dict_id;
} ListDictionary;
typedef struct {
Oid id;
Oid prs_id;
int len;
ListDictionary *map;
} TSCfgInfo;
Oid name2id_cfg(text *name);
TSCfgInfo * findcfg(Oid id);
void init_cfg(Oid id, TSCfgInfo *cfg);
void reset_cfg(void);
typedef struct {
uint16 len;
union {
uint16 pos;
uint16 *apos;
} pos;
char *word;
uint32 alen;
} WORD;
typedef struct {
WORD *words;
int4 lenwords;
int4 curwords;
int4 pos;
} PRSTEXT;
typedef struct {
uint16 len;
uint8 selected:1,
in:1,
skip:1,
replace:1,
repeated:1;
uint8 type;
char *word;
ITEM *item;
} HLWORD;
typedef struct {
HLWORD *words;
int4 lenwords;
int4 curwords;
char *startsel;
char *stopsel;
int2 startsellen;
int2 stopsellen;
} HLPRSTEXT;
void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen);
text* genhl(HLPRSTEXT * prs);
void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen);
int get_currcfg(void);
#endif
/*
* stat functions
*/
#include "tsvector.h"
#include "ts_stat.h"
#include "funcapi.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "common.h"
PG_FUNCTION_INFO_V1(tsstat_in);
Datum tsstat_in(PG_FUNCTION_ARGS);
Datum
tsstat_in(PG_FUNCTION_ARGS) {
tsstat *stat=palloc(STATHDRSIZE);
stat->len=STATHDRSIZE;
stat->size=0;
PG_RETURN_POINTER(stat);
}
PG_FUNCTION_INFO_V1(tsstat_out);
Datum tsstat_out(PG_FUNCTION_ARGS);
Datum
tsstat_out(PG_FUNCTION_ARGS) {
elog(ERROR,"Unimplemented");
PG_RETURN_NULL();
}
static WordEntry**
SEI_realloc( WordEntry** in, uint32 *len ) {
if ( *len==0 || in==NULL ) {
*len=8;
in=palloc( sizeof(WordEntry*)* (*len) );
} else {
*len *= 2;
in=repalloc( in, sizeof(WordEntry*)* (*len) );
}
return in;
}
static int
compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) {
if ( a->len == b->len )
return strncmp(
STATSTRPTR(stat) + a->pos,
STRPTR(txt) + b->pos,
a->len
);
return ( a->len > b->len ) ? 1 : -1;
}
static tsstat*
formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) {
tsstat *newstat;
uint32 totallen, nentry;
uint32 slen=0;
WordEntry **ptr=entry;
char *curptr;
StatEntry *sptr,*nptr;
while(ptr-entry<len) {
slen += (*ptr)->len;
ptr++;
}
nentry=stat->size + len;
slen+=STATSTRSIZE(stat);
totallen=CALCSTATSIZE(nentry,slen);
newstat=palloc(totallen);
newstat->len=totallen;
newstat->size=nentry;
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat);
ptr=entry;
sptr=STATPTR(stat);
nptr=STATPTR(newstat);
if ( len == 1 ) {
StatEntry *StopLow = STATPTR(stat);
StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
while (StopLow < StopHigh) {
sptr=StopLow + (StopHigh - StopLow) / 2;
if ( compareStatWord(sptr,*ptr,stat,txt) < 0 )
StopLow = sptr + 1;
else
StopHigh = sptr;
}
nptr =STATPTR(newstat) + (StopLow-STATPTR(stat));
memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) );
nptr->nentry=POSDATALEN(txt,*ptr);
if ( nptr->nentry==0 )
nptr->nentry=1;
nptr->ndoc=1;
nptr->len=(*ptr)->len;
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
nptr->pos = curptr - STATSTRPTR(newstat);
memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) );
} else {
while( sptr-STATPTR(stat) < stat->size && ptr-entry<len) {
if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) {
memcpy(nptr, sptr, sizeof(StatEntry));
sptr++;
} else {
nptr->nentry=POSDATALEN(txt,*ptr);
if ( nptr->nentry==0 )
nptr->nentry=1;
nptr->ndoc=1;
nptr->len=(*ptr)->len;
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
nptr->pos = curptr - STATSTRPTR(newstat);
curptr += nptr->len;
ptr++;
}
nptr++;
}
memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) );
while(ptr-entry<len) {
nptr->nentry=POSDATALEN(txt,*ptr);
if ( nptr->nentry==0 )
nptr->nentry=1;
nptr->ndoc=1;
nptr->len=(*ptr)->len;
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
nptr->pos = curptr - STATSTRPTR(newstat);
curptr += nptr->len;
ptr++; nptr++;
}
}
return newstat;
}
PG_FUNCTION_INFO_V1(ts_accum);
Datum ts_accum(PG_FUNCTION_ARGS);
Datum
ts_accum(PG_FUNCTION_ARGS) {
tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0);
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
WordEntry **newentry=NULL;
uint32 len=0, cur=0;
StatEntry *sptr;
WordEntry *wptr;
if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */
stat=palloc(STATHDRSIZE);
stat->len=STATHDRSIZE;
stat->size=0;
}
/* simple check of correctness */
if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) {
PG_FREE_IF_COPY(txt,1);
PG_RETURN_POINTER(stat);
}
sptr=STATPTR(stat);
wptr=ARRPTR(txt);
if ( stat->size < 100*txt->size ) { /* merge */
while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) {
int cmp = compareStatWord(sptr,wptr,stat,txt);
if ( cmp<0 ) {
sptr++;
} else if ( cmp==0 ) {
int n=POSDATALEN(txt,wptr);
if (n==0) n=1;
sptr->ndoc++;
sptr->nentry +=n ;
sptr++; wptr++;
} else {
if ( cur==len )
newentry=SEI_realloc(newentry, &len);
newentry[cur]=wptr;
wptr++; cur++;
}
}
while( wptr-ARRPTR(txt) < txt->size ) {
if ( cur==len )
newentry=SEI_realloc(newentry, &len);
newentry[cur]=wptr;
wptr++; cur++;
}
} else { /* search */
while( wptr-ARRPTR(txt) < txt->size ) {
StatEntry *StopLow = STATPTR(stat);
StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat);
int cmp;
while (StopLow < StopHigh) {
sptr=StopLow + (StopHigh - StopLow) / 2;
cmp = compareStatWord(sptr,wptr,stat,txt);
if (cmp==0) {
int n=POSDATALEN(txt,wptr);
if (n==0) n=1;
sptr->ndoc++;
sptr->nentry +=n ;
break;
} else if ( cmp < 0 )
StopLow = sptr + 1;
else
StopHigh = sptr;
}
if ( StopLow >= StopHigh ) { /* not found */
if ( cur==len )
newentry=SEI_realloc(newentry, &len);
newentry[cur]=wptr;
cur++;
}
wptr++;
}
}
if ( cur==0 ) { /* no new words */
PG_FREE_IF_COPY(txt,1);
PG_RETURN_POINTER(stat);
}
newstat = formstat(stat, txt, newentry, cur);
pfree(newentry);
PG_FREE_IF_COPY(txt,1);
/* pfree(stat); */
PG_RETURN_POINTER(newstat);
}
typedef struct {
uint32 cur;
tsvector *stat;
} StatStorage;
static void
ts_setup_firstcall(FuncCallContext *funcctx, tsstat *stat) {
TupleDesc tupdesc;
MemoryContext oldcontext;
StatStorage *st;
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st=palloc( sizeof(StatStorage) );
st->cur=0;
st->stat=palloc( stat->len );
memcpy(st->stat, stat, stat->len);
funcctx->user_fctx = (void*)st;
tupdesc = RelationNameGetTupleDesc("statinfo");
funcctx->slot = TupleDescGetSlot(tupdesc);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
ts_process_call(FuncCallContext *funcctx) {
StatStorage *st;
st=(StatStorage*)funcctx->user_fctx;
if ( st->cur < st->stat->size ) {
Datum result;
char* values[3];
char ndoc[16];
char nentry[16];
StatEntry *entry=STATPTR(st->stat) + st->cur;
HeapTuple tuple;
values[1]=ndoc;
sprintf(ndoc,"%d",entry->ndoc);
values[2]=nentry;
sprintf(nentry,"%d",entry->nentry);
values[0]=palloc( entry->len+1 );
memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len);
(values[0])[entry->len]='\0';
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = TupleGetDatum(funcctx->slot, tuple);
pfree(values[0]);
st->cur++;
return result;
} else {
pfree(st->stat);
pfree(st);
}
return (Datum)0;
}
PG_FUNCTION_INFO_V1(ts_accum_finish);
Datum ts_accum_finish(PG_FUNCTION_ARGS);
Datum
ts_accum_finish(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
funcctx = SRF_FIRSTCALL_INIT();
ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) );
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=ts_process_call(funcctx)) != (Datum)0 )
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
static Oid tiOid=InvalidOid;
static void
get_ti_Oid(void) {
int ret;
bool isnull;
if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 )
elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret);
if ( SPI_processed<0 )
elog(ERROR, "There is no tsvector type");
tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
if ( tiOid==InvalidOid )
elog(ERROR, "tsvector type has InvalidOid");
}
static tsstat*
ts_stat_sql(text *txt) {
char *query=text2char(txt);
int i;
tsstat *newstat,*stat;
bool isnull;
Portal portal;
void *plan;
if ( tiOid==InvalidOid )
get_ti_Oid();
if ( (plan = SPI_prepare(query,0,NULL))==NULL )
elog(ERROR, "SPI_prepare('%s') returns NULL",query);
if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL )
elog(ERROR, "SPI_cursor_open('%s') returns NULL",query);
SPI_cursor_fetch(portal, true, 100);
if ( SPI_tuptable->tupdesc->natts != 1 )
elog(ERROR, "Number of fields doesn't equal to 1");
if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid )
elog(ERROR, "Column isn't of tsvector type");
stat=palloc(STATHDRSIZE);
stat->len=STATHDRSIZE;
stat->size=0;
while(SPI_processed>0) {
for(i=0;i<SPI_processed;i++) {
Datum data=SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
if ( !isnull ) {
newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2(
ts_accum,
PointerGetDatum(stat),
data
));
if ( stat!=newstat && stat )
pfree(stat);
stat=newstat;
}
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_fetch(portal, true, 100);
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_close(portal);
SPI_freeplan(plan);
pfree(query);
return stat;
}
PG_FUNCTION_INFO_V1(ts_stat);
Datum ts_stat(PG_FUNCTION_ARGS);
Datum
ts_stat(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
tsstat *stat;
text *txt=PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
SPI_connect();
stat = ts_stat_sql(txt);
PG_FREE_IF_COPY(txt,0);
ts_setup_firstcall(funcctx, stat );
SPI_finish();
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=ts_process_call(funcctx)) != (Datum)0 )
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
#ifndef __TXTIDX_STAT_H__
#define __TXTIDX_STAT_H__
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
typedef struct {
uint32 len;
uint32 pos;
uint32 ndoc;
uint32 nentry;
} StatEntry;
typedef struct {
int4 len;
int4 size;
char data[1];
} tsstat;
#define STATHDRSIZE (sizeof(int4)*2)
#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
#define STATSTRPTR(x) ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
#endif
-- Adjust this setting to control where the objects get CREATEd.
SET search_path = public;
BEGIN;
--dict conf
CREATE TABLE pg_ts_dict (
dict_name text not null primary key,
dict_init oid,
dict_initoption text,
dict_lexize oid not null,
dict_comment text
) with oids;
--dict interface
CREATE FUNCTION lexize(oid, text)
returns _text
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
CREATE FUNCTION lexize(text, text)
returns _text
as 'MODULE_PATHNAME', 'lexize_byname'
language 'C'
with (isstrict);
CREATE FUNCTION lexize(text)
returns _text
as 'MODULE_PATHNAME', 'lexize_bycurrent'
language 'C'
with (isstrict);
CREATE FUNCTION set_curdict(int)
returns void
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
CREATE FUNCTION set_curdict(text)
returns void
as 'MODULE_PATHNAME', 'set_curdict_byname'
language 'C'
with (isstrict);
--built-in dictionaries
CREATE FUNCTION dex_init(text)
returns internal
as 'MODULE_PATHNAME'
language 'C';
CREATE FUNCTION dex_lexize(internal,internal,int4)
returns internal
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
insert into pg_ts_dict select
'simple',
(select oid from pg_proc where proname='dex_init'),
null,
(select oid from pg_proc where proname='dex_lexize'),
'Simple example of dictionary.'
;
CREATE FUNCTION snb_en_init(text)
returns internal
as 'MODULE_PATHNAME'
language 'C';
CREATE FUNCTION snb_lexize(internal,internal,int4)
returns internal
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
insert into pg_ts_dict select
'en_stem',
(select oid from pg_proc where proname='snb_en_init'),
'DATA_PATH/english.stop',
(select oid from pg_proc where proname='snb_lexize'),
'English Stemmer. Snowball.'
;
CREATE FUNCTION snb_ru_init(text)
returns internal
as 'MODULE_PATHNAME'
language 'C';
insert into pg_ts_dict select
'ru_stem',
(select oid from pg_proc where proname='snb_ru_init'),
'DATA_PATH/russian.stop',
(select oid from pg_proc where proname='snb_lexize'),
'Russian Stemmer. Snowball.'
;
CREATE FUNCTION spell_init(text)
returns internal
as 'MODULE_PATHNAME'
language 'C';
CREATE FUNCTION spell_lexize(internal,internal,int4)
returns internal
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
insert into pg_ts_dict select
'ispell_template',
(select oid from pg_proc where proname='spell_init'),
null,
(select oid from pg_proc where proname='spell_lexize'),
'ISpell interface. Must have .dict and .aff files'
;
CREATE FUNCTION syn_init(text)
returns internal
as 'MODULE_PATHNAME'
language 'C';
CREATE FUNCTION syn_lexize(internal,internal,int4)
returns internal
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
insert into pg_ts_dict select
'synonym',
(select oid from pg_proc where proname='syn_init'),
null,
(select oid from pg_proc where proname='syn_lexize'),
'Example of synonym dictionary'
;
--dict conf
CREATE TABLE pg_ts_parser (
prs_name text not null primary key,
prs_start oid not null,
prs_nexttoken oid not null,
prs_end oid not null,
prs_headline oid not null,
prs_lextype oid not null,
prs_comment text
) with oids;
--sql-level interface
CREATE TYPE tokentype
as (tokid int4, alias text, descr text);
CREATE FUNCTION token_type(int4)
returns setof tokentype
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
CREATE FUNCTION token_type(text)
returns setof tokentype
as 'MODULE_PATHNAME', 'token_type_byname'
language 'C'
with (isstrict);
CREATE FUNCTION token_type()
returns setof tokentype
as 'MODULE_PATHNAME', 'token_type_current'
language 'C'
with (isstrict);
CREATE FUNCTION set_curprs(int)
returns void
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
CREATE FUNCTION set_curprs(text)
returns void
as 'MODULE_PATHNAME', 'set_curprs_byname'
language 'C'
with (isstrict);
CREATE TYPE tokenout
as (tokid int4, token text);
CREATE FUNCTION parse(oid,text)
returns setof tokenout
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
CREATE FUNCTION parse(text,text)
returns setof tokenout
as 'MODULE_PATHNAME', 'parse_byname'
language 'C'
with (isstrict);
CREATE FUNCTION parse(text)
returns setof tokenout
as 'MODULE_PATHNAME', 'parse_current'
language 'C'
with (isstrict);
--default parser
CREATE FUNCTION prsd_start(internal,int4)
returns internal
as 'MODULE_PATHNAME'
language 'C';
CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
returns int4
as 'MODULE_PATHNAME'
language 'C';
CREATE FUNCTION prsd_end(internal)
returns void
as 'MODULE_PATHNAME'
language 'C';
CREATE FUNCTION prsd_lextype(internal)
returns internal
as 'MODULE_PATHNAME'
language 'C';
CREATE FUNCTION prsd_headline(internal,internal,internal)
returns internal
as 'MODULE_PATHNAME'
language 'C';
insert into pg_ts_parser select
'default',
(select oid from pg_proc where proname='prsd_start'),
(select oid from pg_proc where proname='prsd_getlexeme'),
(select oid from pg_proc where proname='prsd_end'),
(select oid from pg_proc where proname='prsd_headline'),
(select oid from pg_proc where proname='prsd_lextype'),
'Parser from OpenFTS v0.34'
;
--tsearch config
CREATE TABLE pg_ts_cfg (
ts_name text not null primary key,
prs_name text not null,
locale text
) with oids;
CREATE TABLE pg_ts_cfgmap (
ts_name text not null,
tok_alias text not null,
dict_name text[],
primary key (ts_name,tok_alias)
) with oids;
CREATE FUNCTION set_curcfg(int)
returns void
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
CREATE FUNCTION set_curcfg(text)
returns void
as 'MODULE_PATHNAME', 'set_curcfg_byname'
language 'C'
with (isstrict);
CREATE FUNCTION show_curcfg()
returns oid
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
insert into pg_ts_cfg values ('default', 'default','C');
insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
insert into pg_ts_cfg values ('simple', 'default');
copy pg_ts_cfgmap from stdin;
default lword {en_stem}
default nlword {simple}
default word {simple}
default email {simple}
default url {simple}
default host {simple}
default sfloat {simple}
default version {simple}
default part_hword {simple}
default nlpart_hword {simple}
default lpart_hword {en_stem}
default hword {simple}
default lhword {en_stem}
default nlhword {simple}
default uri {simple}
default file {simple}
default float {simple}
default int {simple}
default uint {simple}
default_russian lword {en_stem}
default_russian nlword {ru_stem}
default_russian word {ru_stem}
default_russian email {simple}
default_russian url {simple}
default_russian host {simple}
default_russian sfloat {simple}
default_russian version {simple}
default_russian part_hword {simple}
default_russian nlpart_hword {ru_stem}
default_russian lpart_hword {en_stem}
default_russian hword {ru_stem}
default_russian lhword {en_stem}
default_russian nlhword {ru_stem}
default_russian uri {simple}
default_russian file {simple}
default_russian float {simple}
default_russian int {simple}
default_russian uint {simple}
simple lword {simple}
simple nlword {simple}
simple word {simple}
simple email {simple}
simple url {simple}
simple host {simple}
simple sfloat {simple}
simple version {simple}
simple part_hword {simple}
simple nlpart_hword {simple}
simple lpart_hword {simple}
simple hword {simple}
simple lhword {simple}
simple nlhword {simple}
simple uri {simple}
simple file {simple}
simple float {simple}
simple int {simple}
simple uint {simple}
\.
--tsvector type
CREATE FUNCTION tsvector_in(cstring)
RETURNS tsvector
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION tsvector_out(tsvector)
RETURNS cstring
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE TYPE tsvector (
INTERNALLENGTH = -1,
INPUT = tsvector_in,
OUTPUT = tsvector_out,
STORAGE = extended
);
CREATE FUNCTION length(tsvector)
RETURNS int4
AS 'MODULE_PATHNAME', 'tsvector_length'
LANGUAGE 'C' with (isstrict,iscachable);
CREATE FUNCTION to_tsvector(oid, text)
RETURNS tsvector
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict,iscachable);
CREATE FUNCTION to_tsvector(text, text)
RETURNS tsvector
AS 'MODULE_PATHNAME', 'to_tsvector_name'
LANGUAGE 'C' with (isstrict,iscachable);
CREATE FUNCTION to_tsvector(text)
RETURNS tsvector
AS 'MODULE_PATHNAME', 'to_tsvector_current'
LANGUAGE 'C' with (isstrict,iscachable);
CREATE FUNCTION strip(tsvector)
RETURNS tsvector
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict,iscachable);
CREATE FUNCTION setweight(tsvector,"char")
RETURNS tsvector
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict,iscachable);
CREATE FUNCTION concat(tsvector,tsvector)
RETURNS tsvector
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict,iscachable);
CREATE OPERATOR || (
LEFTARG = tsvector,
RIGHTARG = tsvector,
PROCEDURE = concat
);
--query type
CREATE FUNCTION tsquery_in(cstring)
RETURNS tsquery
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION tsquery_out(tsquery)
RETURNS cstring
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE TYPE tsquery (
INTERNALLENGTH = -1,
INPUT = tsquery_in,
OUTPUT = tsquery_out
);
CREATE FUNCTION querytree(tsquery)
RETURNS text
AS 'MODULE_PATHNAME', 'tsquerytree'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION to_tsquery(oid, text)
RETURNS tsquery
AS 'MODULE_PATHNAME'
LANGUAGE 'c' with (isstrict,iscachable);
CREATE FUNCTION to_tsquery(text, text)
RETURNS tsquery
AS 'MODULE_PATHNAME','to_tsquery_name'
LANGUAGE 'c' with (isstrict,iscachable);
CREATE FUNCTION to_tsquery(text)
RETURNS tsquery
AS 'MODULE_PATHNAME','to_tsquery_current'
LANGUAGE 'c' with (isstrict,iscachable);
--operations
CREATE FUNCTION exectsq(tsvector, tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict, iscachable);
COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
CREATE FUNCTION rexectsq(tsquery, tsvector)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict, iscachable);
COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
CREATE OPERATOR @@ (
LEFTARG = tsvector,
RIGHTARG = tsquery,
PROCEDURE = exectsq,
COMMUTATOR = '@@',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR @@ (
LEFTARG = tsquery,
RIGHTARG = tsvector,
PROCEDURE = rexectsq,
COMMUTATOR = '@@',
RESTRICT = contsel,
JOIN = contjoinsel
);
--Trigger
CREATE FUNCTION tsearch2()
RETURNS trigger
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
--Relevation
CREATE FUNCTION rank(float4[], tsvector, tsquery)
RETURNS float4
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
RETURNS float4
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION rank(tsvector, tsquery)
RETURNS float4
AS 'MODULE_PATHNAME', 'rank_def'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION rank(tsvector, tsquery, int4)
RETURNS float4
AS 'MODULE_PATHNAME', 'rank_def'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
RETURNS float4
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
RETURNS float4
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION rank_cd(tsvector, tsquery)
RETURNS float4
AS 'MODULE_PATHNAME', 'rank_cd_def'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
RETURNS float4
AS 'MODULE_PATHNAME', 'rank_cd_def'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION headline(oid, text, tsquery, text)
RETURNS text
AS 'MODULE_PATHNAME', 'headline'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION headline(oid, text, tsquery)
RETURNS text
AS 'MODULE_PATHNAME', 'headline'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION headline(text, text, tsquery, text)
RETURNS text
AS 'MODULE_PATHNAME', 'headline_byname'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION headline(text, text, tsquery)
RETURNS text
AS 'MODULE_PATHNAME', 'headline_byname'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION headline(text, tsquery, text)
RETURNS text
AS 'MODULE_PATHNAME', 'headline_current'
LANGUAGE 'C' WITH (isstrict, iscachable);
CREATE FUNCTION headline(text, tsquery)
RETURNS text
AS 'MODULE_PATHNAME', 'headline_current'
LANGUAGE 'C' WITH (isstrict, iscachable);
--GiST
--GiST key type
CREATE FUNCTION gtsvector_in(cstring)
RETURNS gtsvector
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION gtsvector_out(gtsvector)
RETURNS cstring
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE TYPE gtsvector (
INTERNALLENGTH = -1,
INPUT = gtsvector_in,
OUTPUT = gtsvector_out
);
-- support FUNCTIONs
CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsvector_compress(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsvector_decompress(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION gtsvector_picksplit(internal, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsvector_union(bytea, internal)
RETURNS _int4
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
-- CREATE the OPERATOR class
CREATE OPERATOR CLASS gist_tsvector_ops
DEFAULT FOR TYPE tsvector USING gist
AS
OPERATOR 1 @@ (tsvector, tsquery) RECHECK ,
FUNCTION 1 gtsvector_consistent (gtsvector, internal, int4),
FUNCTION 2 gtsvector_union (bytea, internal),
FUNCTION 3 gtsvector_compress (internal),
FUNCTION 4 gtsvector_decompress (internal),
FUNCTION 5 gtsvector_penalty (internal, internal, internal),
FUNCTION 6 gtsvector_picksplit (internal, internal),
FUNCTION 7 gtsvector_same (gtsvector, gtsvector, internal),
STORAGE gtsvector;
--stat info
CREATE TYPE statinfo
as (word text, ndoc int4, nentry int4);
--REATE FUNCTION tsstat_in(cstring)
--RETURNS tsstat
--AS 'MODULE_PATHNAME'
--LANGUAGE 'C' with (isstrict);
--
--CREATE FUNCTION tsstat_out(tsstat)
--RETURNS cstring
--AS 'MODULE_PATHNAME'
--LANGUAGE 'C' with (isstrict);
--
--CREATE TYPE tsstat (
-- INTERNALLENGTH = -1,
-- INPUT = tsstat_in,
-- OUTPUT = tsstat_out,
-- STORAGE = plain
--);
--
--CREATE FUNCTION ts_accum(tsstat,tsvector)
--RETURNS tsstat
--AS 'MODULE_PATHNAME'
--LANGUAGE 'C' with (isstrict);
--
--CREATE FUNCTION ts_accum_finish(tsstat)
-- returns setof statinfo
-- as 'MODULE_PATHNAME'
-- language 'C'
-- with (isstrict);
--
--CREATE AGGREGATE stat (
-- BASETYPE=tsvector,
-- SFUNC=ts_accum,
-- STYPE=tsstat,
-- FINALFUNC = ts_accum_finish,
-- initcond = ''
--);
CREATE FUNCTION stat(text)
returns setof statinfo
as 'MODULE_PATHNAME', 'ts_stat'
language 'C'
with (isstrict);
--reset - just for debuging
CREATE FUNCTION reset_tsearch()
returns void
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
--get cover (debug for rank_cd)
CREATE FUNCTION get_covers(tsvector,tsquery)
returns text
as 'MODULE_PATHNAME'
language 'C'
with (isstrict);
--example of ISpell dictionary
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4;
--example of synonym dict
--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5;
END;
/*
* In/Out definitions for tsvector type
* Internal structure:
* string of values, array of position lexem in string and it's length
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
#include "executor/spi.h"
#include "commands/trigger.h"
#include "nodes/pg_list.h"
#include "catalog/namespace.h"
#include "utils/pg_locale.h"
#include <ctype.h> /* tolower */
#include "tsvector.h"
#include "query.h"
#include "ts_cfg.h"
#include "common.h"
PG_FUNCTION_INFO_V1(tsvector_in);
Datum tsvector_in(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsvector_out);
Datum tsvector_out(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector);
Datum to_tsvector(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector_current);
Datum to_tsvector_current(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(to_tsvector_name);
Datum to_tsvector_name(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsearch2);
Datum tsearch2(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsvector_length);
Datum tsvector_length(PG_FUNCTION_ARGS);
/*
* in/out text index type
*/
static int
comparePos(const void *a, const void *b) {
if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos )
return 1;
return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1;
}
static int
uniquePos(WordEntryPos *a, int4 l) {
WordEntryPos *ptr, *res;
res=a;
if (l==1)
return l;
qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
ptr = a + 1;
while (ptr - a < l) {
if ( ptr->pos != res->pos ) {
res++;
res->pos = ptr->pos;
res->weight = ptr->weight;
if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 )
break;
} else if ( ptr->weight > res->weight )
res->weight = ptr->weight;
ptr++;
}
return res + 1 - a;
}
static char *BufferStr;
static int
compareentry(const void *a, const void *b)
{
if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
{
return strncmp(
&BufferStr[((WordEntryIN *) a)->entry.pos],
&BufferStr[((WordEntryIN *) b)->entry.pos],
((WordEntryIN *) a)->entry.len);
}
return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1;
}
static int
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{
WordEntryIN *ptr,
*res;
res = a;
if (l == 1) {
if ( a->entry.haspos ) {
*(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos));
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos);
}
return l;
}
ptr = a + 1;
BufferStr = buf;
qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
while (ptr - a < l)
{
if (!(ptr->entry.len == res->entry.len &&
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
{
if ( res->entry.haspos ) {
*(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
*outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
res++;
memcpy(res,ptr,sizeof(WordEntryIN));
} else if ( ptr->entry.haspos ){
if ( res->entry.haspos ) {
int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos);
res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos));
memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]),
&(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos));
*(uint16*)(res->pos) += *(uint16*)(ptr->pos);
pfree( ptr->pos );
} else {
res->entry.haspos=1;
res->pos = ptr->pos;
}
}
ptr++;
}
if ( res->entry.haspos ) {
*(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos));
*outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
return res + 1 - a;
}
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define RESIZEPRSBUF \
do { \
if ( state->curpos - state->word + 1 >= state->len ) \
{ \
int4 clen = state->curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
state->curpos = state->word + clen; \
} \
} while (0)
int4
gettoken_tsvector(TI_IN_STATE * state)
{
int4 oldstate = 0;
state->curpos = state->word;
state->state = WAITWORD;
state->alen=0;
while (1)
{
if (state->state == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return 0;
else if (*(state->prsbuf) == '\'')
state->state = WAITENDCMPLX;
else if (*(state->prsbuf) == '\\')
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
elog(ERROR, "Syntax error");
else if (*(state->prsbuf) != ' ')
{
*(state->curpos) = *(state->prsbuf);
state->curpos++;
state->state = WAITENDWORD;
}
}
else if (state->state == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
elog(ERROR, "There is no escaped character");
else
{
RESIZEPRSBUF;
*(state->curpos) = *(state->prsbuf);
state->curpos++;
state->state = oldstate;
}
}
else if (state->state == WAITENDWORD)
{
if (*(state->prsbuf) == '\\')
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
{
RESIZEPRSBUF;
if (state->curpos == state->word)
elog(ERROR, "Syntax error");
*(state->curpos) = '\0';
return 1;
} else if ( *(state->prsbuf) == ':' ) {
if (state->curpos == state->word)
elog(ERROR, "Syntax error");
*(state->curpos) = '\0';
if ( state->oprisdelim )
return 1;
else
state->state = INPOSINFO;
}
else
{
RESIZEPRSBUF;
*(state->curpos) = *(state->prsbuf);
state->curpos++;
}
}
else if (state->state == WAITENDCMPLX)
{
if (*(state->prsbuf) == '\'')
{
RESIZEPRSBUF;
*(state->curpos) = '\0';
if (state->curpos == state->word)
elog(ERROR, "Syntax error");
if ( state->oprisdelim ) {
state->prsbuf++;
return 1;
} else
state->state = WAITPOSINFO;
}
else if (*(state->prsbuf) == '\\')
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
elog(ERROR, "Syntax error");
else
{
RESIZEPRSBUF;
*(state->curpos) = *(state->prsbuf);
state->curpos++;
}
} else if (state->state == WAITPOSINFO) {
if ( *(state->prsbuf) == ':' )
state->state=INPOSINFO;
else
return 1;
} else if (state->state == INPOSINFO) {
if ( isdigit(*(state->prsbuf)) ) {
if ( state->alen==0 ) {
state->alen=4;
state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen );
*(uint16*)(state->pos)=0;
} else if ( *(uint16*)(state->pos) +1 >= state->alen ) {
state->alen *= 2;
state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen );
}
( *(uint16*)(state->pos) )++;
state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf));
if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 )
elog(ERROR,"Wrong position info");
state->pos[ *(uint16*)(state->pos) ].weight = 0;
state->state = WAITPOSDELIM;
} else
elog(ERROR,"Syntax error");
} else if (state->state == WAITPOSDELIM) {
if ( *(state->prsbuf) == ',' ) {
state->state = INPOSINFO;
} else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) {
if ( state->pos[ *(uint16*)(state->pos) ].weight )
elog(ERROR,"Syntax error");
state->pos[ *(uint16*)(state->pos) ].weight = 3;
} else if ( tolower(*(state->prsbuf)) == 'b' ) {
if ( state->pos[ *(uint16*)(state->pos) ].weight )
elog(ERROR,"Syntax error");
state->pos[ *(uint16*)(state->pos) ].weight = 2;
} else if ( tolower(*(state->prsbuf)) == 'c' ) {
if ( state->pos[ *(uint16*)(state->pos) ].weight )
elog(ERROR,"Syntax error");
state->pos[ *(uint16*)(state->pos) ].weight = 1;
} else if ( tolower(*(state->prsbuf)) == 'd' ) {
if ( state->pos[ *(uint16*)(state->pos) ].weight )
elog(ERROR,"Syntax error");
state->pos[ *(uint16*)(state->pos) ].weight = 0;
} else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) {
return 1;
} else if ( !isdigit(*(state->prsbuf)) )
elog(ERROR,"Syntax error");
} else
elog(ERROR, "Inner bug :(");
state->prsbuf++;
}
return 0;
}
Datum
tsvector_in(PG_FUNCTION_ARGS)
{
char *buf = PG_GETARG_CSTRING(0);
TI_IN_STATE state;
WordEntryIN *arr;
WordEntry *inarr;
int4 len = 0,
totallen = 64;
tsvector *in;
char *tmpbuf,
*cur;
int4 i,
buflen = 256;
state.prsbuf = buf;
state.len = 32;
state.word = (char *) palloc(state.len);
state.oprisdelim = false;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
cur = tmpbuf = (char *) palloc(buflen);
while (gettoken_tsvector(&state))
{
if (len >= totallen)
{
totallen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
}
while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
{
int4 dist = cur - tmpbuf;
buflen *= 2;
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
if (state.curpos - state.word >= MAXSTRLEN)
elog(ERROR, "Word is too long");
arr[len].entry.len= state.curpos - state.word;
if (cur - tmpbuf > MAXSTRPOS)
elog(ERROR, "Too long value");
arr[len].entry.pos=cur - tmpbuf;
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
cur += arr[len].entry.len;
if ( state.alen ) {
arr[len].entry.haspos=1;
arr[len].pos = state.pos;
} else
arr[len].entry.haspos=0;
len++;
}
pfree(state.word);
if ( len > 0 )
len = uniqueentry(arr, len, tmpbuf, &buflen);
totallen = CALCDATASIZE(len, buflen);
in = (tsvector *) palloc(totallen);
memset(in,0,totallen);
in->len = totallen;
in->size = len;
cur = STRPTR(in);
inarr = ARRPTR(in);
for (i = 0; i < len; i++)
{
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
arr[i].entry.pos=cur - STRPTR(in);
cur += SHORTALIGN(arr[i].entry.len);
if ( arr[i].entry.haspos ) {
memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos));
cur += (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos);
pfree( arr[i].pos );
}
memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) );
}
pfree(tmpbuf);
pfree(arr);
PG_RETURN_POINTER(in);
}
Datum
tsvector_length(PG_FUNCTION_ARGS)
{
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
int4 ret = in->size;
PG_FREE_IF_COPY(in, 0);
PG_RETURN_INT32(ret);
}
Datum
tsvector_out(PG_FUNCTION_ARGS)
{
tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
char *outbuf;
int4 i,
j,
lenbuf = 0, pp;
WordEntry *ptr = ARRPTR(out);
char *curin,
*curout;
lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/;
for (i = 0; i < out->size; i++) {
lenbuf += ptr[i].len*2 /*for escape */;
if ( ptr[i].haspos )
lenbuf += 7*POSDATALEN(out, &(ptr[i]));
}
curout = outbuf = (char *) palloc(lenbuf);
for (i = 0; i < out->size; i++)
{
curin = STRPTR(out)+ptr->pos;
if (i != 0)
*curout++ = ' ';
*curout++ = '\'';
j = ptr->len;
while (j--)
{
if (*curin == '\'')
{
int4 pos = curout - outbuf;
outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
curout = outbuf + pos;
*curout++ = '\\';
}
*curout++ = *curin++;
}
*curout++ = '\'';
if ( (pp=POSDATALEN(out,ptr)) != 0 ) {
WordEntryPos *wptr;
*curout++ = ':';
wptr=POSDATAPTR(out,ptr);
while(pp) {
sprintf(curout,"%d",wptr->pos);
curout=strchr(curout,'\0');
switch( wptr->weight ) {
case 3: *curout++ = 'A'; break;
case 2: *curout++ = 'B'; break;
case 1: *curout++ = 'C'; break;
case 0:
default: break;
}
if ( pp>1 ) *curout++ = ',';
pp--; wptr++;
}
}
ptr++;
}
*curout='\0';
outbuf[lenbuf - 1] = '\0';
PG_FREE_IF_COPY(out, 0);
PG_RETURN_POINTER(outbuf);
}
static int
compareWORD(const void *a, const void *b)
{
if (((WORD *) a)->len == ((WORD *) b)->len) {
int res = strncmp(
((WORD *) a)->word,
((WORD *) b)->word,
((WORD *) b)->len);
if ( res==0 )
return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1;
return res;
}
return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
}
static int
uniqueWORD(WORD * a, int4 l)
{
WORD *ptr,
*res;
int tmppos;
if (l == 1) {
tmppos=LIMITPOS(a->pos.pos);
a->alen=2;
a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
a->pos.apos[0]=1;
a->pos.apos[1]=tmppos;
return l;
}
res = a;
ptr = a + 1;
qsort((void *) a, l, sizeof(WORD), compareWORD);
tmppos=LIMITPOS(a->pos.pos);
a->alen=2;
a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen );
a->pos.apos[0]=1;
a->pos.apos[1]=tmppos;
while (ptr - a < l)
{
if (!(ptr->len == res->len &&
strncmp(ptr->word, res->word, res->len) == 0))
{
res++;
res->len = ptr->len;
res->word = ptr->word;
tmppos=LIMITPOS(ptr->pos.pos);
res->alen=2;
res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen );
res->pos.apos[0]=1;
res->pos.apos[1]=tmppos;
} else {
pfree(ptr->word);
if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) {
if ( res->pos.apos[0]+1 >= res->alen ) {
res->alen*=2;
res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen );
}
res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos);
res->pos.apos[0]++;
}
}
ptr++;
}
return res + 1 - a;
}
/*
* make value of tsvector
*/
static tsvector *
makevalue(PRSTEXT * prs)
{
int4 i,j,
lenstr = 0,
totallen;
tsvector *in;
WordEntry *ptr;
char *str,
*cur;
prs->curwords = uniqueWORD(prs->words, prs->curwords);
for (i = 0; i < prs->curwords; i++) {
lenstr += SHORTALIGN(prs->words[i].len);
if ( prs->words[i].alen )
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
}
totallen = CALCDATASIZE(prs->curwords, lenstr);
in = (tsvector *) palloc(totallen);
memset(in,0,totallen);
in->len = totallen;
in->size = prs->curwords;
ptr = ARRPTR(in);
cur = str = STRPTR(in);
for (i = 0; i < prs->curwords; i++)
{
ptr->len = prs->words[i].len;
if (cur - str > MAXSTRPOS)
elog(ERROR, "Value is too big");
ptr->pos= cur - str;
memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
pfree(prs->words[i].word);
cur += SHORTALIGN(prs->words[i].len);
if ( prs->words[i].alen ) {
WordEntryPos *wptr;
ptr->haspos=1;
*(uint16*)cur = prs->words[i].pos.apos[0];
wptr=POSDATAPTR(in,ptr);
for(j=0;j<*(uint16*)cur;j++) {
wptr[j].weight=0;
wptr[j].pos=prs->words[i].pos.apos[j+1];
}
cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
pfree(prs->words[i].pos.apos);
} else
ptr->haspos=0;
ptr++;
}
pfree(prs->words);
return in;
}
Datum
to_tsvector(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(1);
PRSTEXT prs;
tsvector *out = NULL;
TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0));
prs.lenwords = 32;
prs.curwords = 0;
prs.pos = 0;
prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
PG_FREE_IF_COPY(in, 1);
if (prs.curwords)
out = makevalue(&prs);
else {
pfree(prs.words);
out = palloc(CALCDATASIZE(0,0));
out->len = CALCDATASIZE(0,0);
out->size = 0;
}
PG_RETURN_POINTER(out);
}
Datum
to_tsvector_name(PG_FUNCTION_ARGS) {
text *cfg=PG_GETARG_TEXT_P(0);
Datum res = DirectFunctionCall3(
to_tsvector,
Int32GetDatum( name2id_cfg( cfg ) ),
PG_GETARG_DATUM(1),
(Datum)0
);
PG_FREE_IF_COPY(cfg,0);
PG_RETURN_DATUM(res);
}
Datum
to_tsvector_current(PG_FUNCTION_ARGS) {
Datum res = DirectFunctionCall3(
to_tsvector,
Int32GetDatum( get_currcfg() ),
PG_GETARG_DATUM(0),
(Datum)0
);
PG_RETURN_DATUM(res);
}
static Oid
findFunc(char *fname) {
FuncCandidateList clist,ptr;
Oid funcid = InvalidOid;
List *names=makeList1(makeString(fname));
ptr = clist = FuncnameGetCandidates(names, 1);
freeList(names);
if ( !ptr )
return funcid;
while(ptr) {
if ( ptr->args[0] == TEXTOID && funcid == InvalidOid )
funcid=ptr->oid;
clist=ptr->next;
pfree(ptr);
ptr=clist;
}
return funcid;
}
/*
* Trigger
*/
Datum
tsearch2(PG_FUNCTION_ARGS)
{
TriggerData *trigdata;
Trigger *trigger;
Relation rel;
HeapTuple rettuple = NULL;
TSCfgInfo *cfg=findcfg(get_currcfg());
int numidxattr,
i;
PRSTEXT prs;
Datum datum = (Datum) 0;
Oid funcoid = InvalidOid;
if (!CALLED_AS_TRIGGER(fcinfo))
elog(ERROR, "TSearch: Not fired by trigger manager");
trigdata = (TriggerData *) fcinfo->context;
if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
elog(ERROR, "TSearch: Can't process STATEMENT events");
if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
elog(ERROR, "TSearch: Must be fired BEFORE event");
if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
rettuple = trigdata->tg_trigtuple;
else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
rettuple = trigdata->tg_newtuple;
else
elog(ERROR, "TSearch: Unknown event");
trigger = trigdata->tg_trigger;
rel = trigdata->tg_relation;
if (trigger->tgnargs < 2)
elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
if (numidxattr == SPI_ERROR_NOATTRIBUTE)
elog(ERROR, "TSearch: Can not find tsvector_field");
prs.lenwords = 32;
prs.curwords = 0;
prs.pos = 0;
prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
/* find all words in indexable column */
for (i = 1; i < trigger->tgnargs; i++)
{
int numattr;
Oid oidtype;
Datum txt_toasted;
bool isnull;
text *txt;
numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
if (numattr == SPI_ERROR_NOATTRIBUTE)
{
funcoid=findFunc(trigger->tgargs[i]);
if ( funcoid==InvalidOid )
elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]);
continue;
}
oidtype = SPI_gettypeid(rel->rd_att, numattr);
/* We assume char() and varchar() are binary-equivalent to text */
if (!(oidtype == TEXTOID ||
oidtype == VARCHAROID ||
oidtype == BPCHAROID))
{
elog(WARNING, "TSearch: '%s' is not of character type",
trigger->tgargs[i]);
continue;
}
txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
if (isnull)
continue;
if ( funcoid!=InvalidOid ) {
text *txttmp = (text *) DatumGetPointer( OidFunctionCall1(
funcoid,
PointerGetDatum(txt_toasted)
));
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
if ( txt == txttmp )
txt_toasted = PointerGetDatum(txt);
} else
txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
if (txt != (text*)DatumGetPointer(txt_toasted) )
pfree(txt);
}
/* make tsvector value */
if (prs.curwords)
{
datum = PointerGetDatum(makevalue(&prs));
rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
&datum, NULL);
pfree(DatumGetPointer(datum));
}
else
{
tsvector *out = palloc(CALCDATASIZE(0,0));
out->len = CALCDATASIZE(0,0);
out->size = 0;
datum = PointerGetDatum(out);
pfree(prs.words);
rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
&datum, NULL);
}
if (rettuple == NULL)
elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
return PointerGetDatum(rettuple);
}
#ifndef __TXTIDX_H__
#define __TXTIDX_H__
/*
#define TXTIDX_DEBUG
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
typedef struct {
uint32
haspos:1,
len:11, /* MAX 2Kb */
pos:20; /* MAX 1Mb */
} WordEntry;
#define MAXSTRLEN ( 1<<11 )
#define MAXSTRPOS ( 1<<20 )
typedef struct {
uint16
weight:2,
pos:14;
} WordEntryPos;
#define MAXENTRYPOS (1<<14)
#define MAXNUMPOS 256
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
typedef struct
{
int4 len;
int4 size;
char data[1];
} tsvector;
#define DATAHDRSIZE (sizeof(int4)*2)
#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr )
#define ARRPTR(x) ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) )
#define STRPTR(x) ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) )
#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len))
#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 )
#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) )
typedef struct {
WordEntry entry;
WordEntryPos *pos;
} WordEntryIN;
typedef struct
{
char *prsbuf;
char *word;
char *curpos;
int4 len;
int4 state;
int4 alen;
WordEntryPos *pos;
bool oprisdelim;
} TI_IN_STATE;
int4 gettoken_tsvector(TI_IN_STATE * state);
#endif
/*
* Operations for tsvector type
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/itup.h"
#include "utils/elog.h"
#include "utils/palloc.h"
#include "utils/builtins.h"
#include "storage/bufpage.h"
#include "executor/spi.h"
#include "commands/trigger.h"
#include "nodes/pg_list.h"
#include "catalog/namespace.h"
#include "utils/pg_locale.h"
#include <ctype.h> /* tolower */
#include "tsvector.h"
#include "query.h"
#include "ts_cfg.h"
#include "common.h"
PG_FUNCTION_INFO_V1(strip);
Datum strip(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(setweight);
Datum setweight(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(concat);
Datum concat(PG_FUNCTION_ARGS);
Datum
strip(PG_FUNCTION_ARGS)
{
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *out;
int i,len=0;
WordEntry *arrin=ARRPTR(in), *arrout;
char *cur;
for(i=0;i<in->size;i++)
len += SHORTALIGN( arrin[i].len );
len = CALCDATASIZE(in->size, len);
out=(tsvector*)palloc(len);
memset(out,0,len);
out->len=len;
out->size=in->size;
arrout=ARRPTR(out);
cur=STRPTR(out);
for(i=0;i<in->size;i++) {
memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len);
arrout[i].haspos = 0;
arrout[i].len = arrin[i].len;
arrout[i].pos = cur - STRPTR(out);
cur += SHORTALIGN( arrout[i].len );
}
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(out);
}
Datum
setweight(PG_FUNCTION_ARGS)
{
tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
char cw = PG_GETARG_CHAR(1);
tsvector *out;
int i,j;
WordEntry *entry;
WordEntryPos *p;
int w=0;
switch(tolower(cw)) {
case 'a': w=3; break;
case 'b': w=2; break;
case 'c': w=1; break;
case 'd': w=0; break;
default: elog(ERROR,"Unknown weight");
}
out=(tsvector*)palloc(in->len);
memcpy(out,in,in->len);
entry=ARRPTR(out);
i=out->size;
while(i--) {
if ( (j=POSDATALEN(out,entry)) != 0 ) {
p=POSDATAPTR(out,entry);
while(j--) {
p->weight=w;
p++;
}
}
entry++;
}
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(out);
}
static int
compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b)
{
if ( a->len == b->len)
{
return strncmp(
ptra + a->pos,
ptrb + b->pos,
a->len);
}
return ( a->len > b->len ) ? 1 : -1;
}
static int4
add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) {
uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr);
int i;
uint16 slen = POSDATALEN(src, srcptr), startlen;
WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr);
if ( ! destptr->haspos )
*clen=0;
startlen = *clen;
for(i=0; i<slen && *clen<MAXNUMPOS && ( *clen==0 || dpos[ *clen-1 ].pos != MAXENTRYPOS-1 ) ;i++) {
dpos[ *clen ].weight = spos[i].weight;
dpos[ *clen ].pos = LIMITPOS(spos[i].pos + maxpos);
(*clen)++;
}
if ( *clen != startlen )
destptr->haspos=1;
return *clen - startlen;
}
Datum
concat(PG_FUNCTION_ARGS) {
tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
tsvector *out;
WordEntry *ptr;
WordEntry *ptr1,*ptr2;
WordEntryPos *p;
int maxpos=0,i,j,i1,i2;
char *cur;
char *data,*data1,*data2;
ptr=ARRPTR(in1);
i=in1->size;
while(i--) {
if ( (j=POSDATALEN(in1,ptr)) != 0 ) {
p=POSDATAPTR(in1,ptr);
while(j--) {
if ( p->pos > maxpos )
maxpos = p->pos;
p++;
}
}
ptr++;
}
ptr1=ARRPTR(in1); ptr2=ARRPTR(in2);
data1=STRPTR(in1); data2=STRPTR(in2);
i1=in1->size; i2=in2->size;
out=(tsvector*)palloc( in1->len + in2->len );
memset(out,0,in1->len + in2->len);
out->len = in1->len + in2->len;
out->size = in1->size + in2->size;
data=cur=STRPTR(out);
ptr=ARRPTR(out);
while( i1 && i2 ) {
int cmp=compareEntry(data1,ptr1,data2,ptr2);
if ( cmp < 0 ) { /* in1 first */
ptr->haspos = ptr1->haspos;
ptr->len = ptr1->len;
memcpy( cur, data1 + ptr1->pos, ptr1->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr1->len);
if ( ptr->haspos ) {
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
}
ptr++; ptr1++; i1--;
} else if ( cmp>0 ) { /* in2 first */
ptr->haspos = ptr2->haspos;
ptr->len = ptr2->len;
memcpy( cur, data2 + ptr2->pos, ptr2->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr2->len);
if ( ptr->haspos ) {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
if ( addlen == 0 )
ptr->haspos=0;
else
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
}
ptr++; ptr2++; i2--;
} else {
ptr->haspos = ptr1->haspos | ptr2->haspos;
ptr->len = ptr1->len;
memcpy( cur, data1 + ptr1->pos, ptr1->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr1->len);
if ( ptr->haspos ) {
if ( ptr1->haspos ) {
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
if ( ptr2->haspos )
cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos);
} else if ( ptr2->haspos ) {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
if ( addlen == 0 )
ptr->haspos=0;
else
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
}
}
ptr++; ptr1++; ptr2++; i1--; i2--;
}
}
while(i1) {
ptr->haspos = ptr1->haspos;
ptr->len = ptr1->len;
memcpy( cur, data1 + ptr1->pos, ptr1->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr1->len);
if ( ptr->haspos ) {
memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16));
cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16);
}
ptr++; ptr1++; i1--;
}
while(i2) {
ptr->haspos = ptr2->haspos;
ptr->len = ptr2->len;
memcpy( cur, data2 + ptr2->pos, ptr2->len );
ptr->pos = cur - data;
cur+=SHORTALIGN(ptr2->len);
if ( ptr->haspos ) {
int addlen = add_pos(in2, ptr2, out, ptr, maxpos );
if ( addlen == 0 )
ptr->haspos=0;
else
cur += addlen*sizeof(WordEntryPos) + sizeof(uint16);
}
ptr++; ptr2++; i2--;
}
out->size=ptr-ARRPTR(out);
out->len = CALCDATASIZE( out->size, cur-data );
if ( data != STRPTR(out) )
memmove( STRPTR(out), data, cur-data );
PG_FREE_IF_COPY(in1, 0);
PG_FREE_IF_COPY(in2, 1);
PG_RETURN_POINTER(out);
}
BEGIN;
--Be careful !!!
--script drops all indices, triggers and columns with types defined
--in tsearch2.sql
DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE;
DROP OPERATOR || (tsvector, tsvector);
DROP OPERATOR @@ (tsvector, tsquery);
DROP OPERATOR @@ (tsquery, tsvector);
DROP AGGREGATE stat(tsvector);
DROP TABLE pg_ts_dict;
DROP TABLE pg_ts_parser;
DROP TABLE pg_ts_cfg;
DROP TABLE pg_ts_cfgmap;
DROP TYPE tokentype CASCADE;
DROP TYPE tokenout CASCADE;
DROP TYPE tsvector CASCADE;
DROP TYPE tsquery CASCADE;
DROP TYPE gtsvector CASCADE;
DROP TYPE tsstat CASCADE;
DROP TYPE statinfo CASCADE;
DROP FUNCTION lexize(oid, text) ;
DROP FUNCTION lexize(text, text);
DROP FUNCTION lexize(text);
DROP FUNCTION set_curdict(int);
DROP FUNCTION set_curdict(text);
DROP FUNCTION dex_init(text);
DROP FUNCTION dex_lexize(internal,internal,int4);
DROP FUNCTION snb_en_init(text);
DROP FUNCTION snb_lexize(internal,internal,int4);
DROP FUNCTION snb_ru_init(text);
DROP FUNCTION spell_init(text);
DROP FUNCTION spell_lexize(internal,internal,int4);
DROP FUNCTION syn_init(text);
DROP FUNCTION syn_lexize(internal,internal,int4);
DROP FUNCTION set_curprs(int);
DROP FUNCTION set_curprs(text);
DROP FUNCTION prsd_start(internal,int4);
DROP FUNCTION prsd_getlexeme(internal,internal,internal);
DROP FUNCTION prsd_end(internal);
DROP FUNCTION prsd_lextype(internal);
DROP FUNCTION prsd_headline(internal,internal,internal);
DROP FUNCTION set_curcfg(int);
DROP FUNCTION set_curcfg(text);
DROP FUNCTION show_curcfg();
DROP FUNCTION gtsvector_compress(internal);
DROP FUNCTION gtsvector_decompress(internal);
DROP FUNCTION gtsvector_penalty(internal,internal,internal);
DROP FUNCTION gtsvector_picksplit(internal, internal);
DROP FUNCTION gtsvector_union(bytea, internal);
DROP FUNCTION reset_tsearch();
DROP FUNCTION tsearch2() CASCADE;
END;
#include "deflex.h"
const char *lex_descr[]={
"",
"Latin word",
"Non-latin word",
"Word",
"Email",
"URL",
"Host",
"Scientific notation",
"VERSION",
"Part of hyphenated word",
"Non-latin part of hyphenated word",
"Latin part of hyphenated word",
"Space symbols",
"HTML Tag",
"HTTP head",
"Hyphenated word",
"Latin hyphenated word",
"Non-latin hyphenated word",
"URI",
"File or path name",
"Decimal notation",
"Signed integer",
"Unsigned integer",
"HTML Entity"
};
const char *tok_alias[]={
"",
"lword",
"nlword",
"word",
"email",
"url",
"host",
"sfloat",
"version",
"part_hword",
"nlpart_hword",
"lpart_hword",
"blank",
"tag",
"http",
"hword",
"lhword",
"nlhword",
"uri",
"file",
"float",
"int",
"uint",
"entity"
};
#ifndef __DEFLEX_H__
#define __DEFLEX_H__
/* rememder !!!! */
#define LASTNUM 23
#define LATWORD 1
#define CYRWORD 2
#define UWORD 3
#define EMAIL 4
#define FURL 5
#define HOST 6
#define SCIENTIFIC 7
#define VERSIONNUMBER 8
#define PARTHYPHENWORD 9
#define CYRPARTHYPHENWORD 10
#define LATPARTHYPHENWORD 11
#define SPACE 12
#define TAG 13
#define HTTP 14
#define HYPHENWORD 15
#define LATHYPHENWORD 16
#define CYRHYPHENWORD 17
#define URI 18
#define FILEPATH 19
#define DECIMAL 20
#define SIGNEDINT 21
#define UNSIGNEDINT 22
#define HTMLENTITY 23
extern const char *lex_descr[];
extern const char *tok_alias[];
#endif
#ifndef __PARSER_H__
#define __PARSER_H__
char *token;
int tokenlen;
int tsearch2_yylex(void);
void start_parse_str(char *, int);
void start_parse_fh(FILE *, int);
void end_parse(void);
#endif
%{
#include "postgres.h"
#include "deflex.h"
#include "parser.h"
#include "common.h"
/* Avoid exit() on fatal scanner errors */
#define fprintf(file, fmt, msg) ts_error(ERROR, fmt, msg)
/* postgres allocation function */
#define free pfree
#define malloc palloc
#define realloc repalloc
#ifdef strdup
#undef strdup
#endif
#define strdup pstrdup
char *token = NULL; /* pointer to token */
char *s = NULL; /* to return WHOLE hyphenated-word */
YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
int lrlimit = -1; /* for limiting read from filehandle ( -1 - unlimited read ) */
int bytestoread = 0; /* for limiting read from filehandle */
/* redefine macro for read limited length */
#define YY_INPUT(buf,result,max_size) \
if ( yy_current_buffer->yy_is_interactive ) { \
int c = '*', n; \
for ( n = 0; n < max_size && \
(c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \
buf[n] = (char) c; \
if ( c == '\n' ) \
buf[n++] = (char) c; \
if ( c == EOF && ferror( tsearch2_yyin ) ) \
YY_FATAL_ERROR( "input in flex scanner failed" ); \
result = n; \
} else { \
if ( lrlimit == 0 ) \
result=YY_NULL; \
else { \
if ( lrlimit>0 ) { \
bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \
lrlimit -= bytestoread; \
} else \
bytestoread = max_size; \
if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \
&& ferror( tsearch2_yyin ) ) \
YY_FATAL_ERROR( "input in flex scanner failed" ); \
} \
}
%}
%option 8bit
%option never-interactive
%option nounput
%option noyywrap
/* parser's state for parsing hyphenated-word */
%x DELIM
/* parser's state for parsing URL*/
%x URL
%x SERVER
/* parser's state for parsing TAGS */
%x INTAG
%x QINTAG
%x INCOMMENT
%x INSCRIPT
/* cyrillic koi8 char */
CYRALNUM [0-9\200-\377]
CYRALPHA [\200-\377]
ALPHA [a-zA-Z\200-\377]
ALNUM [0-9a-zA-Z\200-\377]
HOSTNAME ([-_[:alnum:]]+\.)+[[:alpha:]]+
URI [-_[:alnum:]/%,\.;=&?#]+
%%
"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; }
<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" {
BEGIN INITIAL;
*tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0';
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return SPACE;
}
"<!--" { BEGIN INCOMMENT; }
<INCOMMENT>"-->" {
BEGIN INITIAL;
*tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0';
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return SPACE;
}
"<"[\![:alpha:]] { BEGIN INTAG; }
"</"[[:alpha:]] { BEGIN INTAG; }
<INTAG>"\"" { BEGIN QINTAG; }
<QINTAG>"\\\"" ;
<QINTAG>"\"" { BEGIN INTAG; }
<INTAG>">" {
BEGIN INITIAL;
token = tsearch2_yytext;
*tsearch2_yytext=' ';
token = tsearch2_yytext;
tokenlen = 1;
return TAG;
}
<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n ;
\&(quot|amp|nbsp|lt|gt)\; {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return HTMLENTITY;
}
\&\#[0-9][0-9]?[0-9]?\; {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return HTMLENTITY;
}
[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return EMAIL;
}
[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return SCIENTIFIC;
}
[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return VERSIONNUMBER;
}
[+-]?[0-9]+\.[0-9]+ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return DECIMAL;
}
[+-][0-9]+ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return SIGNEDINT;
}
<DELIM,INITIAL>[0-9]+ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return UNSIGNEDINT;
}
http"://" {
BEGIN URL;
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return HTTP;
}
ftp"://" {
BEGIN URL;
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return HTTP;
}
<URL,INITIAL>{HOSTNAME}[/:]{URI} {
BEGIN SERVER;
if (s) { free(s); s=NULL; }
s = strdup( tsearch2_yytext );
tokenlen = tsearch2_yyleng;
yyless( 0 );
token = s;
return FURL;
}
<SERVER,URL,INITIAL>{HOSTNAME} {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return HOST;
}
<SERVER>[/:]{URI} {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return URI;
}
[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return FILEPATH;
}
({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */ {
BEGIN DELIM;
if (s) { free(s); s=NULL; }
s = strdup( tsearch2_yytext );
tokenlen = tsearch2_yyleng;
yyless( 0 );
token = s;
return CYRHYPHENWORD;
}
([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */ {
BEGIN DELIM;
if (s) { free(s); s=NULL; }
s = strdup( tsearch2_yytext );
tokenlen = tsearch2_yyleng;
yyless( 0 );
token = s;
return LATHYPHENWORD;
}
({ALNUM}+-)+{ALNUM}+ /* composite-word */ {
BEGIN DELIM;
if (s) { free(s); s=NULL; }
s = strdup( tsearch2_yytext );
tokenlen = tsearch2_yyleng;
yyless( 0 );
token = s;
return HYPHENWORD;
}
<DELIM>[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return VERSIONNUMBER;
}
<DELIM>\+?[0-9]+\.[0-9]+ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return DECIMAL;
}
<DELIM>{CYRALPHA}+ /* one word in composite-word */ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return CYRPARTHYPHENWORD;
}
<DELIM>[[:alpha:]]+ /* one word in composite-word */ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return LATPARTHYPHENWORD;
}
<DELIM>{ALNUM}+ /* one word in composite-word */ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return PARTHYPHENWORD;
}
<DELIM>- {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return SPACE;
}
<DELIM,SERVER,URL>.|\n /* return in basic state */ {
BEGIN INITIAL;
yyless( 0 );
}
{CYRALPHA}+ /* normal word */ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return CYRWORD;
}
[[:alpha:]]+ /* normal word */ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return LATWORD;
}
{ALNUM}+ /* normal word */ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return UWORD;
}
[ \r\n\t]+ {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return SPACE;
}
. {
token = tsearch2_yytext;
tokenlen = tsearch2_yyleng;
return SPACE;
}
%%
/* clearing after parsing from string */
void end_parse() {
if (s) { free(s); s=NULL; }
tsearch2_yy_delete_buffer( buf );
buf = NULL;
}
/* start parse from string */
void start_parse_str(char* str, int limit) {
if (buf) end_parse();
buf = tsearch2_yy_scan_bytes( str, limit );
tsearch2_yy_switch_to_buffer( buf );
BEGIN INITIAL;
}
/* start parse from filehandle */
void start_parse_fh( FILE* fh, int limit ) {
if (buf) end_parse();
lrlimit = ( limit ) ? limit : -1;
buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE );
tsearch2_yy_switch_to_buffer( buf );
BEGIN INITIAL;
}
/*
* interface functions to parser
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "postgres.h"
#include "fmgr.h"
#include "utils/array.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "funcapi.h"
#include "wparser.h"
#include "ts_cfg.h"
#include "snmap.h"
#include "common.h"
/*********top interface**********/
static void *plan_getparser=NULL;
static Oid current_parser_id=InvalidOid;
void
init_prs(Oid id, WParserInfo *prs) {
Oid arg[1]={ OIDOID };
bool isnull;
Datum pars[1]={ ObjectIdGetDatum(id) };
int stat;
memset(prs,0,sizeof(WParserInfo));
SPI_connect();
if ( !plan_getparser ) {
plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) );
if ( !plan_getparser )
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_getparser, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 ) {
Oid oid=InvalidOid;
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) );
fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) );
fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) );
oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) );
fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
prs->prs_id=id;
} else
ts_error(ERROR, "No parser with id %d", id);
SPI_finish();
}
typedef struct {
WParserInfo *last_prs;
int len;
int reallen;
WParserInfo *list;
SNMap name2id_map;
} PrsList;
static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}};
void
reset_prs(void) {
freeSNMap( &(PList.name2id_map) );
if ( PList.list )
free(PList.list);
memset(&PList,0,sizeof(PrsList));
}
static int
compareprs(const void *a, const void *b) {
return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id;
}
WParserInfo *
findprs(Oid id) {
/* last used prs */
if ( PList.last_prs && PList.last_prs->prs_id==id )
return PList.last_prs;
/* already used prs */
if ( PList.len != 0 ) {
WParserInfo key;
key.prs_id=id;
PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
if ( PList.last_prs != NULL )
return PList.last_prs;
}
/* last chance */
if ( PList.len==PList.reallen ) {
WParserInfo *tmp;
int reallen = ( PList.reallen ) ? 2*PList.reallen : 16;
tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen);
if ( !tmp )
ts_error(ERROR,"No memory");
PList.reallen=reallen;
PList.list=tmp;
}
PList.last_prs=&(PList.list[PList.len]);
init_prs(id, PList.last_prs);
PList.len++;
qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
return findprs(id); /* qsort changed order!! */;
}
static void *plan_name2id=NULL;
Oid
name2id_prs(text *name) {
Oid arg[1]={ TEXTOID };
bool isnull;
Datum pars[1]={ PointerGetDatum(name) };
int stat;
Oid id=findSNMap_t( &(PList.name2id_map), name );
if ( id )
return id;
SPI_connect();
if ( !plan_name2id ) {
plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) );
if ( !plan_name2id )
ts_error(ERROR, "SPI_prepare() failed");
}
stat = SPI_execp(plan_name2id, pars, " ", 1);
if ( stat < 0 )
ts_error (ERROR, "SPI_execp return %d", stat);
if ( SPI_processed > 0 )
id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) );
else
ts_error(ERROR, "No parser '%s'", text2char(name));
SPI_finish();
addSNMap_t( &(PList.name2id_map), name, id );
return id;
}
/******sql-level interface******/
typedef struct {
int cur;
LexDescr *list;
} TypeStorage;
static void
setup_firstcall(FuncCallContext *funcctx, Oid prsid) {
TupleDesc tupdesc;
MemoryContext oldcontext;
TypeStorage *st;
WParserInfo *prs = findprs(prsid);
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st=(TypeStorage*)palloc( sizeof(TypeStorage) );
st->cur=0;
st->list = (LexDescr*)DatumGetPointer(
OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) )
);
funcctx->user_fctx = (void*)st;
tupdesc = RelationNameGetTupleDesc("tokentype");
funcctx->slot = TupleDescGetSlot(tupdesc);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
process_call(FuncCallContext *funcctx) {
TypeStorage *st;
st=(TypeStorage*)funcctx->user_fctx;
if ( st->list && st->list[st->cur].lexid ) {
Datum result;
char* values[3];
char txtid[16];
HeapTuple tuple;
values[0]=txtid;
sprintf(txtid,"%d",st->list[st->cur].lexid);
values[1]=st->list[st->cur].alias;
values[2]=st->list[st->cur].descr;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = TupleGetDatum(funcctx->slot, tuple);
pfree(values[1]);
pfree(values[2]);
st->cur++;
return result;
} else {
if ( st->list ) pfree(st->list);
pfree(st);
}
return (Datum)0;
}
PG_FUNCTION_INFO_V1(token_type);
Datum token_type(PG_FUNCTION_ARGS);
Datum
token_type(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
funcctx = SRF_FIRSTCALL_INIT();
setup_firstcall(funcctx, PG_GETARG_OID(0) );
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=process_call(funcctx)) != (Datum)0 )
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(token_type_byname);
Datum token_type_byname(PG_FUNCTION_ARGS);
Datum
token_type_byname(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
text *name = PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
setup_firstcall(funcctx, name2id_prs( name ) );
PG_FREE_IF_COPY(name,0);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=process_call(funcctx)) != (Datum)0 )
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(token_type_current);
Datum token_type_current(PG_FUNCTION_ARGS);
Datum
token_type_current(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
funcctx = SRF_FIRSTCALL_INIT();
if ( current_parser_id==InvalidOid )
current_parser_id = name2id_prs( char2text("default") );
setup_firstcall(funcctx, current_parser_id );
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=process_call(funcctx)) != (Datum)0 )
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(set_curprs);
Datum set_curprs(PG_FUNCTION_ARGS);
Datum
set_curprs(PG_FUNCTION_ARGS) {
findprs(PG_GETARG_OID(0));
current_parser_id=PG_GETARG_OID(0);
PG_RETURN_VOID();
}
PG_FUNCTION_INFO_V1(set_curprs_byname);
Datum set_curprs_byname(PG_FUNCTION_ARGS);
Datum
set_curprs_byname(PG_FUNCTION_ARGS) {
text *name=PG_GETARG_TEXT_P(0);
DirectFunctionCall1(
set_curprs,
ObjectIdGetDatum( name2id_prs(name) )
);
PG_FREE_IF_COPY(name, 0);
PG_RETURN_VOID();
}
typedef struct {
int type;
char *lexem;
} LexemEntry;
typedef struct {
int cur;
int len;
LexemEntry *list;
} PrsStorage;
static void
prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt) {
TupleDesc tupdesc;
MemoryContext oldcontext;
PrsStorage *st;
WParserInfo *prs = findprs(prsid);
char *lex=NULL;
int llen=0, type=0;
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
st=(PrsStorage*)palloc( sizeof(PrsStorage) );
st->cur=0;
st->len=16;
st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len );
prs->prs = (void*)DatumGetPointer(
FunctionCall2(
&(prs->start_info),
PointerGetDatum(VARDATA(txt)),
Int32GetDatum(VARSIZE(txt)-VARHDRSZ)
)
);
while( ( type=DatumGetInt32(FunctionCall3(
&(prs->getlexeme_info),
PointerGetDatum(prs->prs),
PointerGetDatum(&lex),
PointerGetDatum(&llen))) ) != 0 ) {
if ( st->cur>=st->len ) {
st->len=2*st->len;
st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len);
}
st->list[st->cur].lexem = palloc(llen+1);
memcpy( st->list[st->cur].lexem, lex, llen);
st->list[st->cur].lexem[llen]='\0';
st->list[st->cur].type=type;
st->cur++;
}
FunctionCall1(
&(prs->end_info),
PointerGetDatum(prs->prs)
);
st->len=st->cur;
st->cur=0;
funcctx->user_fctx = (void*)st;
tupdesc = RelationNameGetTupleDesc("tokenout");
funcctx->slot = TupleDescGetSlot(tupdesc);
funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
MemoryContextSwitchTo(oldcontext);
}
static Datum
prs_process_call(FuncCallContext *funcctx) {
PrsStorage *st;
st=(PrsStorage*)funcctx->user_fctx;
if ( st->cur < st->len ) {
Datum result;
char* values[2];
char tid[16];
HeapTuple tuple;
values[0]=tid;
sprintf(tid,"%d",st->list[st->cur].type);
values[1]=st->list[st->cur].lexem;
tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
result = TupleGetDatum(funcctx->slot, tuple);
pfree(values[1]);
st->cur++;
return result;
} else {
if ( st->list ) pfree(st->list);
pfree(st);
}
return (Datum)0;
}
PG_FUNCTION_INFO_V1(parse);
Datum parse(PG_FUNCTION_ARGS);
Datum
parse(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt );
PG_FREE_IF_COPY(txt,1);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(parse_byname);
Datum parse_byname(PG_FUNCTION_ARGS);
Datum
parse_byname(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
text *name = PG_GETARG_TEXT_P(0);
text *txt = PG_GETARG_TEXT_P(1);
funcctx = SRF_FIRSTCALL_INIT();
prs_setup_firstcall(funcctx, name2id_prs( name ),txt );
PG_FREE_IF_COPY(name,0);
PG_FREE_IF_COPY(txt,1);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(parse_current);
Datum parse_current(PG_FUNCTION_ARGS);
Datum
parse_current(PG_FUNCTION_ARGS) {
FuncCallContext *funcctx;
Datum result;
if (SRF_IS_FIRSTCALL()) {
text *txt = PG_GETARG_TEXT_P(0);
funcctx = SRF_FIRSTCALL_INIT();
if ( current_parser_id==InvalidOid )
current_parser_id = name2id_prs( char2text("default") );
prs_setup_firstcall(funcctx, current_parser_id,txt );
PG_FREE_IF_COPY(txt,0);
}
funcctx = SRF_PERCALL_SETUP();
if ( (result=prs_process_call(funcctx)) != (Datum)0 )
SRF_RETURN_NEXT(funcctx, result);
SRF_RETURN_DONE(funcctx);
}
PG_FUNCTION_INFO_V1(headline);
Datum headline(PG_FUNCTION_ARGS);
Datum
headline(PG_FUNCTION_ARGS) {
TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0));
text *in = PG_GETARG_TEXT_P(1);
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
text *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL;
HLPRSTEXT prs;
text *out;
WParserInfo *prsobj = findprs(cfg->prs_id);
memset(&prs,0,sizeof(HLPRSTEXT));
prs.lenwords = 32;
prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
FunctionCall3(
&(prsobj->headline_info),
PointerGetDatum(&prs),
PointerGetDatum(opt),
PointerGetDatum(query)
);
out = genhl(&prs);
PG_FREE_IF_COPY(in,1);
PG_FREE_IF_COPY(query,2);
if ( opt ) PG_FREE_IF_COPY(opt,3);
pfree(prs.words);
pfree(prs.startsel);
pfree(prs.stopsel);
PG_RETURN_POINTER(out);
}
PG_FUNCTION_INFO_V1(headline_byname);
Datum headline_byname(PG_FUNCTION_ARGS);
Datum
headline_byname(PG_FUNCTION_ARGS) {
text *cfg=PG_GETARG_TEXT_P(0);
Datum out=DirectFunctionCall4(
headline,
ObjectIdGetDatum(name2id_cfg( cfg ) ),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2),
( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
);
PG_FREE_IF_COPY(cfg,0);
PG_RETURN_DATUM(out);
}
PG_FUNCTION_INFO_V1(headline_current);
Datum headline_current(PG_FUNCTION_ARGS);
Datum
headline_current(PG_FUNCTION_ARGS) {
PG_RETURN_DATUM(DirectFunctionCall4(
headline,
ObjectIdGetDatum(get_currcfg()),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
));
}
#ifndef __WPARSER_H__
#define __WPARSER_H__
#include "postgres.h"
#include "fmgr.h"
typedef struct {
Oid prs_id;
FmgrInfo start_info;
FmgrInfo getlexeme_info;
FmgrInfo end_info;
FmgrInfo headline_info;
Oid lextype;
void *prs;
} WParserInfo;
void init_prs(Oid id, WParserInfo *prs);
WParserInfo* findprs(Oid id);
Oid name2id_prs(text *name);
void reset_prs(void);
typedef struct {
int lexid;
char *alias;
char *descr;
} LexDescr;
#endif
/*
* default word parser
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include "postgres.h"
#include "utils/builtins.h"
#include "dict.h"
#include "wparser.h"
#include "common.h"
#include "ts_cfg.h"
#include "wordparser/parser.h"
#include "wordparser/deflex.h"
PG_FUNCTION_INFO_V1(prsd_lextype);
Datum prsd_lextype(PG_FUNCTION_ARGS);
Datum
prsd_lextype(PG_FUNCTION_ARGS) {
LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1));
int i;
for(i=1;i<=LASTNUM;i++) {
descr[i-1].lexid = i;
descr[i-1].alias = pstrdup(tok_alias[i]);
descr[i-1].descr = pstrdup(lex_descr[i]);
}
descr[LASTNUM].lexid=0;
PG_RETURN_POINTER(descr);
}
PG_FUNCTION_INFO_V1(prsd_start);
Datum prsd_start(PG_FUNCTION_ARGS);
Datum
prsd_start(PG_FUNCTION_ARGS) {
start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) );
PG_RETURN_POINTER(NULL);
}
PG_FUNCTION_INFO_V1(prsd_getlexeme);
Datum prsd_getlexeme(PG_FUNCTION_ARGS);
Datum
prsd_getlexeme(PG_FUNCTION_ARGS) {
/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
char **t=(char**)PG_GETARG_POINTER(1);
int *tlen=(int*)PG_GETARG_POINTER(2);
int type=tsearch2_yylex();
*t = token;
*tlen = tokenlen;
PG_RETURN_INT32(type);
}
PG_FUNCTION_INFO_V1(prsd_end);
Datum prsd_end(PG_FUNCTION_ARGS);
Datum
prsd_end(PG_FUNCTION_ARGS) {
/* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */
end_parse();
PG_RETURN_VOID();
}
#define LEAVETOKEN(x) ( (x)==12 )
#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 )
#define ENDPUNCTOKEN(x) ( (x)==12 )
#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 )
#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 )
#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) )
#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) )
typedef struct {
HLWORD *words;
int len;
} hlCheck;
static bool
checkcondition_HL(void *checkval, ITEM *val) {
int i;
for(i=0;i<((hlCheck*)checkval)->len;i++) {
if ( ((hlCheck*)checkval)->words[i].item==val )
return true;
}
return false;
}
static bool
hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) {
int i,j;
ITEM *item=GETQUERY(query);
int pos=*p;
*q=0;
*p=0x7fffffff;
for(j=0;j<query->size;j++) {
if ( item->type != VAL ) {
item++;
continue;
}
for(i=pos;i<prs->curwords;i++) {
if ( prs->words[i].item == item ) {
if ( i>*q)
*q = i;
break;
}
}
item++;
}
if ( *q==0 )
return false;
item=GETQUERY(query);
for(j=0;j<query->size;j++) {
if ( item->type != VAL ) {
item++;
continue;
}
for(i=*q;i>=pos;i--) {
if ( prs->words[i].item == item ) {
if ( i<*p )
*p=i;
break;
}
}
item++;
}
if ( *p<=*q ) {
hlCheck ch={ &(prs->words[*p]), *q-*p+1 };
if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) {
return true;
} else {
(*p)++;
return hlCover(prs,query,p,q);
}
}
return false;
}
PG_FUNCTION_INFO_V1(prsd_headline);
Datum prsd_headline(PG_FUNCTION_ARGS);
Datum
prsd_headline(PG_FUNCTION_ARGS) {
HLPRSTEXT *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0);
text *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */
QUERYTYPE *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */
/* from opt + start and and tag */
int min_words=15;
int max_words=35;
int shortword=3;
int p=0,q=0;
int bestb=-1,beste=-1;
int bestlen=-1;
int pose=0, poslen, curlen;
int i;
/*config*/
prs->startsel=NULL;
prs->stopsel=NULL;
if ( opt ) {
Map *map,*mptr;
parse_cfgdict(opt,&map);
mptr=map;
while(mptr && mptr->key) {
if ( strcasecmp(mptr->key,"MaxWords")==0 )
max_words=pg_atoi(mptr->value,4,1);
else if ( strcasecmp(mptr->key,"MinWords")==0 )
min_words=pg_atoi(mptr->value,4,1);
else if ( strcasecmp(mptr->key,"ShortWord")==0 )
shortword=pg_atoi(mptr->value,4,1);
else if ( strcasecmp(mptr->key,"StartSel")==0 )
prs->startsel=pstrdup(mptr->value);
else if ( strcasecmp(mptr->key,"StopSel")==0 )
prs->stopsel=pstrdup(mptr->value);
pfree(mptr->key);
pfree(mptr->value);
mptr++;
}
pfree(map);
if ( min_words >= max_words )
elog(ERROR,"Must be MinWords < MaxWords");
if ( min_words<=0 )
elog(ERROR,"Must be MinWords > 0");
if ( shortword<0 )
elog(ERROR,"Must be ShortWord >= 0");
}
while( hlCover(prs,query,&p,&q) ) {
/* find cover len in words */
curlen=0;
poslen=0;
for(i=p;i<=q && curlen < max_words ; i++) {
if ( !NONWORDTOKEN(prs->words[i].type) )
curlen++;
if ( prs->words[i].item && !prs->words[i].repeated )
poslen++;
pose=i;
}
if ( poslen<bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) {
/* best already finded, so try one more cover */
p++;
continue;
}
if ( curlen < max_words ) { /* find good end */
for(i=i-1 ;i<prs->curwords && curlen<max_words; i++) {
if ( i!=q ) {
if ( !NONWORDTOKEN(prs->words[i].type) )
curlen++;
if ( prs->words[i].item && !prs->words[i].repeated )
poslen++;
}
pose=i;
if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
continue;
if ( curlen>=min_words )
break;
}
} else { /* shorter cover :((( */
for(;curlen>min_words;i--) {
if ( !NONWORDTOKEN(prs->words[i].type) )
curlen--;
if ( prs->words[i].item && !prs->words[i].repeated )
poslen--;
pose=i;
if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword )
continue;
break;
}
}
if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) ||
( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) &&
(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) {
bestb=p; beste=pose;
bestlen=poslen;
}
p++;
}
if ( bestlen<0 ) {
curlen=0;
poslen=0;
for(i=0;i<prs->curwords && curlen<min_words ; i++) {
if ( !NONWORDTOKEN(prs->words[i].type) )
curlen++;
pose=i;
}
bestb=0; beste=pose;
}
for(i=bestb;i<=beste;i++) {
if ( prs->words[i].item )
prs->words[i].selected=1;
if ( prs->words[i].repeated )
prs->words[i].skip=1;
if ( HLIDIGNORE(prs->words[i].type) )
prs->words[i].replace=1;
prs->words[i].in=1;
}
if (!prs->startsel)
prs->startsel=pstrdup("<b>");
if (!prs->stopsel)
prs->stopsel=pstrdup("</b>");
prs->startsellen=strlen(prs->startsel);
prs->stopsellen=strlen(prs->stopsel);
PG_RETURN_POINTER(prs);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment