stopword.c 1.89 KB
Newer Older
Teodor Sigaev's avatar
Teodor Sigaev committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
/* 
 * stopword library
 * Teodor Sigaev <teodor@sigaev.ru>
 */
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "postgres.h"
#include "common.h"
#include "dict.h"

#define STOPBUFLEN	4096

char*
lowerstr(char *str) {
	char *ptr=str;
	while(*ptr) {
		*ptr = tolower(*(unsigned char*)ptr);
		ptr++;
	}
	return str;
}

void
freestoplist(StopList *s) {
	char **ptr=s->stop;
	if ( ptr )
		while( *ptr && s->len >0 ) {
			free(*ptr);
			ptr++; s->len--;
		free(s->stop);
	}
	memset(s,0,sizeof(StopList));
}

void
readstoplist(text *in, StopList *s) {
	char **stop=NULL;
	s->len=0;
	if ( in && VARSIZE(in) - VARHDRSZ > 0 ) {
		char *filename=text2char(in);
		FILE	*hin=NULL;
		char 	buf[STOPBUFLEN];
		int reallen=0;

		if ( (hin=fopen(filename,"r")) == NULL )
			elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno));
		while( fgets(buf,STOPBUFLEN,hin) ) {
			buf[strlen(buf)-1] = '\0';
			if ( *buf=='\0' ) continue;

			if ( s->len>= reallen ) {
				char **tmp;
				reallen=(reallen) ? reallen*2 : 16;
				tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen);
				if (!tmp) {
					freestoplist(s);
					fclose(hin); 
					elog(ERROR,"Not enough memory");
				}
				stop=tmp;
			}
	 
			stop[s->len]=strdup(buf);
			if ( !stop[s->len] ) {
				freestoplist(s);
				fclose(hin); 
				elog(ERROR,"Not enough memory");
			}
			if ( s->wordop ) 
				stop[s->len]=(s->wordop)(stop[s->len]);

			(s->len)++;	
		}
		fclose(hin);
		pfree(filename); 
	}
	s->stop=stop;
} 

static int
comparestr(const void *a, const void *b) {
	return strcmp( *(char**)a, *(char**)b );
}

void
sortstoplist(StopList *s) {
	if (s->stop && s->len>0)
		qsort(s->stop, s->len, sizeof(char*), comparestr);
}

bool
searchstoplist(StopList *s, char *key) {
	if ( s->wordop ) 
		key=(*(s->wordop))(key);
	return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false;
}