Commit 8ace5114 authored by Bruce Momjian's avatar Bruce Momjian

This directory contains a module that implements the "Metaphone" code as

a PostgreSQL user-defined function.  The Metaphone system is a method of
matching similar sounding names (or any words) to the same code.

Metaphone was invented by Lawrence Philips as an improvement to the popular
name-hashing routine, Soundex.

This metaphone code is from Michael Kuhn, and is detailed at
   http://aspell.sourceforge.net/metaphone/metaphone-kuhn.txt

Joel Burton
parent 2c6373fa
# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.18 2001/03/14 00:57:43 tgl Exp $
# $Header: /cvsroot/pgsql/contrib/Makefile,v 1.19 2001/05/09 23:00:44 momjian Exp $
subdir = contrib
top_builddir = ..
......@@ -15,6 +15,7 @@ WANTED_DIRS = \
lo \
mSQL-interface \
mac \
metaphone \
miscutil \
noupdate \
oid2name \
......
......@@ -72,6 +72,10 @@ mac -
Support functions for MAC address types
by Lawrence E. Rosenman <ler@lerctr.org>
metaphone -
Improved Soundex function
by Joel Burton <jburton@scw.org>
miscutil -
PostgreSQL assert checking and various utility functions
by Massimo Dal Zotto <dz@cs.unitn.it>
......
#
# $Header: /cvsroot/pgsql/contrib/metaphone/Attic/Makefile,v 1.1 2001/05/09 23:00:44 momjian Exp $
#
subdir = contrib/metaphone
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
NAME := metaphone
SONAME := $(NAME)$(DLSUFFIX)
override CPPFLAGS += -I$(srcdir)
override CFLAGS += $(CFLAGS_SL)
all: $(SONAME) $(NAME).sql
$(NAME).sql: $(NAME).sql.in
sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@
install: all installdirs
$(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
$(INSTALL_DATA) README.$(NAME) $(docdir)/contrib
installdirs:
$(mkinstalldirs) $(libdir)/contrib $(datadir)/contrib $(docdir)/contrib
uninstall:
rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME)
clean distclean maintainer-clean:
rm -f $(SONAME) $(NAME).o $(NAME).sql
depend dep:
$(CC) -MM -MG $(CFLAGS) *.c > depend
ifeq (depend,$(wildcard depend))
include depend
endif
This directory contains a module that implements the "Metaphone" code as
a PostgreSQL user-defined function. The Metaphone system is a method of
matching similar sounding names (or any words) to the same code.
Metaphone was invented by Lawrence Philips as an improvement to the popular
name-hashing routine, Soundex.
This metaphone code is from Michael Kuhn, and is detailed at
http://aspell.sourceforge.net/metaphone/metaphone-kuhn.txt
Code for this (including this help file!) was liberally borrowed from
the soundex() module for PostgreSQL.
There are two functions:
metaphone(text) : returns hash of a name
metaphone(text,int) : returns hash (maximum length of int) of name
---
To install it, first configure the main source tree, then run make;
make install in this directory. Finally, load the function definition
with psql:
psql -f PREFIX/share/contrib/metaphone.sql
The following are some usage examples:
SELECT text_metaphone('hello world!');
SELECT text_metaphone('hello world!', 4);
CREATE TABLE s (nm text)\g
insert into s values ('john')\g
insert into s values ('joan')\g
insert into s values ('wobbly')\g
select * from s
where text_metaphone(nm) = text_metaphone('john')\g
select nm from s a, s b
where text_metaphone(a.nm) = text_metaphone(b.nm)
and a.oid <> b.oid\g
CREATE FUNCTION text_mp_eq(text, text) RETURNS bool AS
'select text_metaphone($1) = text_metaphone($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_mp_lt(text,text) RETURNS bool AS
'select text_metaphone($1) < text_metaphone($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_mp_gt(text,text) RETURNS bool AS
'select text_metaphone($1) > text_metaphone($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_mp_le(text,text) RETURNS bool AS
'select text_metaphone($1) <= text_metaphone($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_mp_ge(text,text) RETURNS bool AS
'select text_metaphone($1) >= text_metaphone($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_mp_ne(text,text) RETURNS bool AS
'select text_metaphone($1) <> text_metaphone($2)'
LANGUAGE 'sql';
DROP OPERATOR #= (text,text)\g
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_mp_eq,
commutator=text_mp_eq)\g
SELECT *
FROM s
WHERE text_mp_eq(nm,'pillsbury')\g
SELECT *
from s
where s.nm #= 'pillsbury';
#include "postgres.h"
#include "fmgr.h"
#include "utils/builtins.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
Datum text_metaphone(PG_FUNCTION_ARGS);
Datum text_metaphone_length(PG_FUNCTION_ARGS);
void phonetic(char *name, char *metaph, int metalen);
#define METAPHONE_LEN 50
#undef METAPHONE_TEST
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
#define NULLCHAR (char *) 0
char *VOWELS="AEIOU",
*FRONTV="EIY", /* special cases for letters in FRONT of these */
*VARSON="CSPTG", /* variable sound--those modified by adding an "h" */
*DOUBLE="."; /* let these double letters through */
char *excpPAIR="AGKPW", /* exceptions "ae-", "gn-", "kn-", "pn-", "wr-" */
*nextLTR ="ENNNR";
char *chrptr, *chrptr1;
void phonetic(name,metaph,metalen)
char *name, *metaph;
int metalen;
{
int ii, jj, silent, hard, Lng, lastChr;
char curLtr, prevLtr, nextLtr, nextLtr2, nextLtr3;
int vowelAfter, vowelBefore, frontvAfter;
char wname[60];
char *ename=wname;
jj = 0;
for (ii=0; name[ii] != '\0'; ii++) {
if ( isalpha(name[ii]) ) {
ename[jj] = toupper(name[ii]);
jj++;
}
}
ename[jj] = '\0';
if (strlen(ename) == 0) return;
/* if ae, gn, kn, pn, wr then drop the first letter */
if ( (chrptr=strchr(excpPAIR,ename[0]) ) != NULLCHAR ) {
chrptr1 = nextLTR + (chrptr-excpPAIR);
if ( *chrptr1 == ename[1] ) strcpy(ename,&ename[1]);
}
/* change x to s */
if (ename[0] == 'X') ename[0] = 'S';
/* get rid of the "h" in "wh" */
if ( strncmp(ename,"WH",2) == 0 ) strcpy(&ename[1], &ename[2]);
Lng = strlen(ename);
lastChr = Lng -1; /* index to last character in string makes code easier*/
/* Remove an S from the end of the string */
if ( ename[lastChr] == 'S' ) {
ename[lastChr] = '\0';
Lng = strlen(ename);
lastChr = Lng -1;
}
for (ii=0; ( (strlen(metaph) < metalen) && (ii < Lng) ); ii++) {
curLtr = ename[ii];
vowelBefore = FALSE; prevLtr = ' ';
if (ii > 0) {
prevLtr = ename[ii-1];
if ( strchr(VOWELS,prevLtr) != NULLCHAR ) vowelBefore = TRUE;
}
/* if first letter is a vowel KEEP it */
if (ii == 0 && (strchr(VOWELS,curLtr) != NULLCHAR) ) {
strncat(metaph,&curLtr,1);
continue;
}
vowelAfter = FALSE; frontvAfter = FALSE; nextLtr = ' ';
if ( ii < lastChr ) {
nextLtr = ename[ii+1];
if ( strchr(VOWELS,nextLtr) != NULLCHAR ) vowelAfter = TRUE;
if ( strchr(FRONTV,nextLtr) != NULLCHAR ) frontvAfter = TRUE;
}
/* skip double letters except ones in list */
if (curLtr == nextLtr && (strchr(DOUBLE,nextLtr) == NULLCHAR) ) continue;
nextLtr2 = ' ';
if (ii < (lastChr-1) ) nextLtr2 = ename[ii+2];
nextLtr3 = ' ';
if (ii < (lastChr-2) ) nextLtr3 = ename[ii+3];
switch (curLtr) {
case 'B': silent = FALSE;
if (ii == lastChr && prevLtr == 'M') silent = TRUE;
if (! silent) strncat(metaph,&curLtr,1);
break;
/*silent -sci-,-sce-,-scy-; sci-, etc OK*/
case 'C': if (! (ii > 1 && prevLtr == 'S' && frontvAfter) ) {
if ( ii > 0 && nextLtr == 'I' && nextLtr2 == 'A' )
strncat(metaph,"X",1);
else
if (frontvAfter)
strncat(metaph,"S",1);
else
if (ii > 1 && prevLtr == 'S' && nextLtr == 'H')
strncat(metaph,"K",1);
else
if (nextLtr == 'H')
if (ii == 0 && (strchr(VOWELS,nextLtr2) == NULLCHAR) )
strncat(metaph,"K",1);
else
strncat(metaph,"X",1);
else
if (prevLtr == 'C')
strncat(metaph,"C",1);
else
strncat(metaph,"K",1);
}
break;
case 'D': if (nextLtr == 'G' && (strchr(FRONTV,nextLtr2) != NULLCHAR))
strncat(metaph,"J",1);
else
strncat(metaph,"T",1);
break;
case 'G': silent=FALSE;
/* SILENT -gh- except for -gh and no vowel after h */
if ( (ii < (lastChr-1) && nextLtr == 'H')
&& (strchr(VOWELS,nextLtr2) == NULLCHAR) )
silent=TRUE;
if ( (ii == (lastChr-3) )
&& nextLtr == 'N' && nextLtr2 == 'E' && nextLtr3 == 'D')
silent=TRUE;
else
if ( (ii == (lastChr-1)) && nextLtr == 'N') silent=TRUE;
if (prevLtr == 'D' && frontvAfter) silent=TRUE;
if (prevLtr == 'G')
hard=TRUE;
else
hard=FALSE;
if (!silent) {
if (frontvAfter && (! hard) )
strncat(metaph,"J",1);
else
strncat(metaph,"K",1);
}
break;
case 'H': silent = FALSE;
if ( strchr(VARSON,prevLtr) != NULLCHAR ) silent = TRUE;
if ( vowelBefore && !vowelAfter) silent = TRUE;
if (!silent) strncat(metaph,&curLtr,1);
break;
case 'F':
case 'J':
case 'L':
case 'M':
case 'N':
case 'R': strncat(metaph,&curLtr,1);
break;
case 'K': if (prevLtr != 'C') strncat(metaph,&curLtr,1);
break;
case 'P': if (nextLtr == 'H')
strncat(metaph,"F",1);
else
strncat(metaph,"P",1);
break;
case 'Q': strncat(metaph,"K",1);
break;
case 'S': if (ii > 1 && nextLtr == 'I'
&& ( nextLtr2 == 'O' || nextLtr2 == 'A') )
strncat(metaph,"X",1);
else
if (nextLtr == 'H')
strncat(metaph,"X",1);
else
strncat(metaph,"S",1);
break;
case 'T': if (ii > 1 && nextLtr == 'I'
&& ( nextLtr2 == 'O' || nextLtr2 == 'A') )
strncat(metaph,"X",1);
else
if (nextLtr == 'H') /* The=0, Tho=T, Withrow=0 */
if (ii > 0 || (strchr(VOWELS,nextLtr2) != NULLCHAR) )
strncat(metaph,"0",1);
else
strncat(metaph,"T",1);
else
if (! (ii < (lastChr-2) && nextLtr == 'C' && nextLtr2 == 'H'))
strncat(metaph,"T",1);
break;
case 'V': strncat(metaph,"F",1);
break;
case 'W':
case 'Y': if (ii < lastChr && vowelAfter) strncat(metaph,&curLtr,1);
break;
case 'X': strncat(metaph,"KS",2);
break;
case 'Z': strncat(metaph,"S",1);
break;
}
}
/* DON'T DO THIS NOW, REMOVING "S" IN BEGINNING HAS the same effect
with plurals, in addition imbedded S's in the Metaphone are included
Lng = strlen(metaph);
lastChr = Lng -1;
if ( metaph[lastChr] == 'S' && Lng >= 3 ) metaph[lastChr] = '\0';
*/
return;
}
#ifdef METAPHONE_TEST
int
main(int argc, char *argv[])
{
if (argc < 2)
{
fprintf(stderr, "usage: %s string\n", argv[0]);
return 1;
}
else
{
char output[51]="";
phonetic(argv[1], output, 50);
printf("metaphone(%s) = %s\n", argv[1], output);
return 0;
}
}
#endif /* METAPHONE_TEST */
#ifndef METAPHONE_TEST
/*
* SQL function: text_metaphone(text) returns text
*/
PG_FUNCTION_INFO_V1(text_metaphone);
Datum
text_metaphone(PG_FUNCTION_ARGS)
{
char outstr[51]="";
char *arg;
arg = _textout(PG_GETARG_TEXT_P(0));
phonetic(arg, outstr, 50);
PG_RETURN_TEXT_P(_textin(outstr));
}
/*
char outstr[51]="";
char *arg;
int32 metalen;
arg = _textout(PG_GETARG_TEXT_P(0));
metalen = PG_GETARG_INT32(1);
phonetic(arg, outstr, metalen);
*/
PG_FUNCTION_INFO_V1(text_metaphone_length);
Datum
text_metaphone_length(PG_FUNCTION_ARGS)
{
char outstr[51]="";
char *arg;
int32 metalen;
arg = _textout(PG_GETARG_TEXT_P(0));
metalen = PG_GETARG_INT32(1);
phonetic(arg, outstr, metalen);
PG_RETURN_TEXT_P(_textin(outstr));
}
#endif /* not METAPHONE_TEST */
CREATE FUNCTION text_soundex(text) RETURNS text
AS '@MODULE_FILENAME@', 'text_metaphone' LANGUAGE 'C';
......@@ -56,7 +56,10 @@ LDFLAGS=--shared -Wl,-E -Wl,-soname,$@
.PHONY: clean
all: plpython.so
all:
@echo "Disabled until merged into our Makefile system, bjm 2001-05-09"
disabled: plpython.so
plpython.o: plpython.c plpython.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $<
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment