Commit 18d99bc2 authored by Peter Eisentraut's avatar Peter Eisentraut

Update soundex to new fmgr interface and fix algorithm

parent baa3a09b
# #
# $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.7 2000/07/09 13:13:33 petere Exp $ # $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.8 2000/10/04 19:25:34 petere Exp $
# #
subdir = contrib/soundex subdir = contrib/soundex
top_builddir = ../.. top_builddir = ../..
include ../../src/Makefile.global include $(top_builddir)/src/Makefile.global
NAME := soundex NAME := soundex
SONAME := $(NAME)$(DLSUFFIX) SONAME := $(NAME)$(DLSUFFIX)
...@@ -14,7 +14,7 @@ CFLAGS += -I. $(CFLAGS_SL) ...@@ -14,7 +14,7 @@ CFLAGS += -I. $(CFLAGS_SL)
all: $(SONAME) $(NAME).sql all: $(SONAME) $(NAME).sql
$(NAME).sql: $(NAME).sql.in $(NAME).sql: $(NAME).sql.in
sed -e 's:MODULE_PATHNAME:$(datadir)/contrib/$(SONAME):g' < $< > $@ sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@
install: all installdirs install: all installdirs
$(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib $(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib
...@@ -28,7 +28,7 @@ uninstall: ...@@ -28,7 +28,7 @@ uninstall:
rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME) rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME)
clean distclean maintainer-clean: clean distclean maintainer-clean:
rm -f $(SONAME) $(NAME).sql rm -f $(SONAME) $(NAME).o $(NAME).sql
depend dep: depend dep:
$(CC) -MM -MG $(CFLAGS) *.c > depend $(CC) -MM -MG $(CFLAGS) *.c > depend
......
This directory contains a module that implements the "Soundex" code as
a PostgreSQL user-defined function. The Soundex system is a method of
matching similar sounding names (or any words) to the same code. It
was initially used by the United States Census in 1880, 1900, and
1910, but it has little use beyond English names (or the English
pronunciation of names), and it is not a linguistic tool.
To install it, first configure the main source tree, then run make;
make install in this directory. Finally, load the function definition
with psql:
psql -f PREFIX/share/contrib/soundex.sql
The following are some usage examples:
SELECT text_soundex('hello world!'); SELECT text_soundex('hello world!');
...@@ -50,4 +64,3 @@ WHERE text_sx_eq(nm,'john')\g ...@@ -50,4 +64,3 @@ WHERE text_sx_eq(nm,'john')\g
SELECT * SELECT *
from s from s
where s.nm #= 'john'; where s.nm #= 'john';
/*****************************************************************************/ /* $Header: /cvsroot/pgsql/contrib/soundex/Attic/soundex.c,v 1.7 2000/10/04 19:25:34 petere Exp $ */
/* soundex.c */ #include "postgres.h"
/*****************************************************************************/ #include "fmgr.h"
#include "utils/builtins.h"
#include <ctype.h> #include <ctype.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include "postgres.h" /* for char16, etc. */
#include "utils/palloc.h" /* for palloc */ Datum
text_soundex(PG_FUNCTION_ARGS);
/* prototypes for soundex functions */ static void
text *text_soundex(text *t); soundex(const char *instr, char *outstr);
char *soundex(char *instr, char *outstr);
text * #define SOUNDEX_LEN 4
text_soundex(text *t)
{
text *new_t;
char outstr[6 + 1]; /* max length of soundex is 6 */
char *instr;
/* make a null-terminated string */ #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
instr = palloc(VARSIZE(t) + 1); #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
memcpy(instr, VARDATA(t), VARSIZE(t) - VARHDRSZ);
instr[VARSIZE(t) - VARHDRSZ] = (char) 0;
/* load soundex into outstr */
soundex(instr, outstr);
/* Now the outstr contains the soundex of instr */ #ifndef SOUNDEX_TEST
/* copy outstr to new_t */ /*
new_t = (text *) palloc(strlen(outstr) + VARHDRSZ); * SQL function: text_soundex(text) returns text
memset(new_t, 0, strlen(outstr) + 1); */
VARSIZE(new_t) = strlen(outstr) + VARHDRSZ; Datum
memcpy((void *) VARDATA(new_t), text_soundex(PG_FUNCTION_ARGS)
(void *) outstr, {
strlen(outstr)); char outstr[SOUNDEX_LEN + 1];
char *arg;
arg = _textout(PG_GETARG_TEXT_P(0));
/* free instr */ soundex(arg, outstr);
pfree(instr);
return (new_t); PG_RETURN_TEXT_P(_textin(outstr));
} }
#endif /* not SOUNDEX_TEST */
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
static const char *soundex_table = "01230120022455012623010202";
#define soundex_code(letter) soundex_table[toupper(letter) - 'A']
char * static void
soundex(char *instr, char *outstr) soundex(const char *instr, char *outstr)
{ {
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ int count;
char *table = "01230120022455012623010202";
int count = 0;
AssertArg(instr);
AssertArg(outstr);
outstr[SOUNDEX_LEN] = '\0';
/* Skip leading non-alphabetic characters */
while (!isalpha(instr[0]) && instr[0]) while (!isalpha(instr[0]) && instr[0])
++instr; ++instr;
/* No string left */
if (!instr[0]) if (!instr[0])
{ /* Hey! Where'd the string go? */
outstr[0] = (char) 0;
return outstr;
}
if (toupper(instr[0]) == 'P' && toupper(instr[1]) == 'H')
{ {
instr[0] = 'F'; outstr[0] = (char) 0;
instr[1] = 'A'; return;
} }
/* Take the first letter as is */
*outstr++ = (char) toupper(*instr++); *outstr++ = (char) toupper(*instr++);
while (*instr && count < 5) count = 1;
while (*instr && count < SOUNDEX_LEN)
{ {
if (isalpha(*instr) && *instr != *(instr - 1)) if (isalpha(*instr) && soundex_code(*instr) != soundex_code(*(instr - 1)))
{ {
*outstr = table[toupper(instr[0]) - 'A']; *outstr = soundex_code(instr[0]);
if (*outstr != '0') if (*outstr != '0')
{ {
++outstr; ++outstr;
...@@ -83,6 +83,33 @@ soundex(char *instr, char *outstr) ...@@ -83,6 +83,33 @@ soundex(char *instr, char *outstr)
++instr; ++instr;
} }
*outstr = '\0'; /* Fill with 0's */
return (outstr); while (count < SOUNDEX_LEN)
{
*outstr = '0';
++outstr;
++count;
}
}
#ifdef SOUNDEX_TEST
int
main (int argc, char *argv[])
{
if (argc < 2)
{
fprintf(stderr, "usage: %s string\n", argv[0]);
return 1;
}
else
{
char output[SOUNDEX_LEN + 1];
soundex(argv[1], output);
printf("soundex(%s) = %s\n", argv[1], output);
return 0;
}
} }
#endif /* SOUNDEX_TEST */
CREATE FUNCTION text_soundex(text) RETURNS text CREATE FUNCTION text_soundex(text) RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE 'c'; AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'newC';
CREATE FUNCTION soundex(text) RETURNS text
AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'newC';
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment