Commit 9728eda7 authored by Tom Lane's avatar Tom Lane

Fix contrib/pg_trgm's similarity() function for trigram-free strings.

Cases such as similarity('', '') produced a NaN result due to computing
0/0.  Per discussion, make it return zero instead.

This appears to be the basic cause of bug #7867 from Michele Baravalle,
although it remains unclear why her installation doesn't think Cyrillic
letters are letters.

Back-patch to all active branches.
parent cd89965a
...@@ -53,6 +53,12 @@ select similarity('wow',' WOW '); ...@@ -53,6 +53,12 @@ select similarity('wow',' WOW ');
1 1
(1 row) (1 row)
select similarity('---', '####---');
similarity
------------
0
(1 row)
CREATE TABLE test_trgm(t text); CREATE TABLE test_trgm(t text);
\copy test_trgm from 'data/trgm.data \copy test_trgm from 'data/trgm.data
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t; select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
......
...@@ -11,6 +11,8 @@ select show_trgm('a b C0*%^'); ...@@ -11,6 +11,8 @@ select show_trgm('a b C0*%^');
select similarity('wow','WOWa '); select similarity('wow','WOWa ');
select similarity('wow',' WOW '); select similarity('wow',' WOW ');
select similarity('---', '####---');
CREATE TABLE test_trgm(t text); CREATE TABLE test_trgm(t text);
\copy test_trgm from 'data/trgm.data \copy test_trgm from 'data/trgm.data
......
...@@ -553,6 +553,10 @@ cnt_sml(TRGM *trg1, TRGM *trg2) ...@@ -553,6 +553,10 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
len1 = ARRNELEM(trg1); len1 = ARRNELEM(trg1);
len2 = ARRNELEM(trg2); len2 = ARRNELEM(trg2);
/* explicit test is needed to avoid 0/0 division when both lengths are 0 */
if (len1 <= 0 || len2 <= 0)
return (float4) 0.0;
while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2) while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
{ {
int res = CMPTRGM(ptr1, ptr2); int res = CMPTRGM(ptr1, ptr2);
...@@ -570,9 +574,9 @@ cnt_sml(TRGM *trg1, TRGM *trg2) ...@@ -570,9 +574,9 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
} }
#ifdef DIVUNION #ifdef DIVUNION
return ((((float4) count) / ((float4) (len1 + len2 - count)))); return ((float4) count) / ((float4) (len1 + len2 - count));
#else #else
return (((float) count) / ((float) ((len1 > len2) ? len1 : len2))); return ((float4) count) / ((float4) ((len1 > len2) ? len1 : len2));
#endif #endif
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment