Commit 1ac9f0e9 authored by Neil Conway's avatar Neil Conway

The attached patch implements the soundex difference function which

compares two strings' soundex values for similarity, from Kris Jurka.
Also mark the text_soundex() function as STRICT, to avoid crashing
on NULL input.
parent fd5437c7
...@@ -33,6 +33,10 @@ ...@@ -33,6 +33,10 @@
* Folded existing soundex contrib into this one. Renamed text_soundex() (C function) * Folded existing soundex contrib into this one. Renamed text_soundex() (C function)
* to soundex() for consistency. * to soundex() for consistency.
* *
* difference()
* ------------
* Return the difference between two strings' soundex values. Kris Jurka
*
* Permission to use, copy, modify, and distribute this software and its * Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written agreement * documentation for any purpose, without fee, and without a written agreement
* is hereby granted, provided that the above copyright notice and this * is hereby granted, provided that the above copyright notice and this
......
...@@ -7,15 +7,25 @@ United States Census in 1880, 1900, and 1910, but it has little use ...@@ -7,15 +7,25 @@ United States Census in 1880, 1900, and 1910, but it has little use
beyond English names (or the English pronunciation of names), and beyond English names (or the English pronunciation of names), and
it is not a linguistic tool. it is not a linguistic tool.
When comparing two soundex values to determine similarity, the
difference function reports how close the match is on a scale
from zero to four, with zero being no match and four being an
exact match.
The following are some usage examples: The following are some usage examples:
SELECT soundex('hello world!'); SELECT soundex('hello world!');
SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
CREATE TABLE s (nm text)\g CREATE TABLE s (nm text)\g
insert into s values ('john')\g insert into s values ('john')\g
insert into s values ('joan')\g insert into s values ('joan')\g
insert into s values ('wobbly')\g insert into s values ('wobbly')\g
insert into s values ('jack')\g
select * from s select * from s
where soundex(nm) = soundex('john')\g where soundex(nm) = soundex('john')\g
...@@ -58,5 +68,10 @@ FROM s ...@@ -58,5 +68,10 @@ FROM s
WHERE text_sx_eq(nm,'john')\g WHERE text_sx_eq(nm,'john')\g
SELECT * SELECT *
from s FROM s
where s.nm #= 'john'; WHERE s.nm #= 'john';
SELECT *
FROM s
WHERE difference(s.nm, 'john') > 2;
...@@ -755,3 +755,23 @@ _soundex(const char *instr, char *outstr) ...@@ -755,3 +755,23 @@ _soundex(const char *instr, char *outstr)
++count; ++count;
} }
} }
PG_FUNCTION_INFO_V1(difference);
Datum
difference(PG_FUNCTION_ARGS)
{
char sndx1[SOUNDEX_LEN+1], sndx2[SOUNDEX_LEN+1];
int i, result;
_soundex(_textout(PG_GETARG_TEXT_P(0)), sndx1);
_soundex(_textout(PG_GETARG_TEXT_P(1)), sndx2);
result = 0;
for (i=0; i<SOUNDEX_LEN; i++) {
if (sndx1[i] == sndx2[i])
result++;
}
PG_RETURN_INT32(result);
}
...@@ -60,6 +60,7 @@ ...@@ -60,6 +60,7 @@
extern Datum levenshtein(PG_FUNCTION_ARGS); extern Datum levenshtein(PG_FUNCTION_ARGS);
extern Datum metaphone(PG_FUNCTION_ARGS); extern Datum metaphone(PG_FUNCTION_ARGS);
extern Datum soundex(PG_FUNCTION_ARGS); extern Datum soundex(PG_FUNCTION_ARGS);
extern Datum difference(PG_FUNCTION_ARGS);
/* /*
* Soundex * Soundex
......
-- Adjust this setting to control where the objects get created. -- Adjust this setting to control where the objects get created.
SET search_path = public; SET search_path = public;
CREATE FUNCTION levenshtein (text,text) CREATE FUNCTION levenshtein (text,text) RETURNS int
RETURNS int
AS 'MODULE_PATHNAME','levenshtein' AS 'MODULE_PATHNAME','levenshtein'
LANGUAGE 'C' WITH (iscachable, isstrict); LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION metaphone (text,int) CREATE FUNCTION metaphone (text,int) RETURNS text
RETURNS text
AS 'MODULE_PATHNAME','metaphone' AS 'MODULE_PATHNAME','metaphone'
LANGUAGE 'C' WITH (iscachable, isstrict); LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION soundex(text) RETURNS text CREATE FUNCTION soundex(text) RETURNS text
AS 'MODULE_PATHNAME', 'soundex' AS 'MODULE_PATHNAME', 'soundex'
LANGUAGE 'C' WITH (iscachable, isstrict); LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION text_soundex(text) RETURNS text CREATE FUNCTION text_soundex(text) RETURNS text
AS 'MODULE_PATHNAME', 'soundex' AS 'MODULE_PATHNAME', 'soundex'
LANGUAGE 'C'; LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION difference(text,text) RETURNS int
AS 'MODULE_PATHNAME', 'difference'
LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION dmetaphone (text) RETURNS text CREATE FUNCTION dmetaphone (text) RETURNS text
LANGUAGE C IMMUTABLE STRICT AS 'MODULE_PATHNAME', 'dmetaphone'
AS 'MODULE_PATHNAME', 'dmetaphone'; LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION dmetaphone_alt (text) RETURNS text CREATE FUNCTION dmetaphone_alt (text) RETURNS text
LANGUAGE C IMMUTABLE STRICT AS 'MODULE_PATHNAME', 'dmetaphone_alt'
AS 'MODULE_PATHNAME', 'dmetaphone_alt'; LANGUAGE C IMMUTABLE STRICT;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment