In levenshtein_internal(), describe algorithm a bit more clearly.

12679b8b · Robert Haas · 54c88dee · 12679b8b
Commit 12679b8b authored Sep 24, 2010 by Robert Haas
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 7 deletions

contrib/fuzzystrmatch/fuzzystrmatch.c contrib/fuzzystrmatch/fuzzystrmatch.c +17 -7

No files found.
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -277,15 +277,25 @@ levenshtein_internal(text *s, text *t,
 	++n;
 	/*
-	 * Instead of building an (m+1)x(n+1) array, we'll use two different
+	 * One way to compute Levenshtein distance is to incrementally construct
-	 * arrays of size m+1 for storing accumulated values. At each step one
+	 * an (m+1)x(n+1) matrix where cell (i, j) represents the minimum number
-	 * represents the "previous" row and one is the "current" row of the
+	 * of operations required to transform the first i characters of s into
-	 * notional large array.
+	 * the first j characters of t.  The last column of the final row is the
+	 * answer.
+	 *
+	 * We use that algorithm here with some modification.  In lieu of holding
+	 * the entire array in memory at once, we'll just use two arrays of size
+	 * m+1 for storing accumulated values. At each step one array represents
+	 * the "previous" row and one is the "current" row of the notional large
+	 * array.
 	 */
 	prev = (int *) palloc(2 * m * sizeof(int));
 	curr = prev + m;
-	/* Initialize the "previous" row to 0..cols */
+	/*
+	 * To transform the first i characters of s into the first 0 characters
+	 * of t, we must perform i deletions.
+	 */
 	for (i = 0; i < m; i++)
 		prev[i] = i * del_c;
@@ -297,8 +307,8 @@ levenshtein_internal(text *s, text *t,
 		int			y_char_len = n != t_bytes + 1 ? pg_mblen(y) : 1;
 		/*
-		 * First cell must increment sequentially, as we're on the j'th row of
+		 * To transform the first 0 characters of s into the first j
-		 * the (m+1)x(n+1) array.
+		 * characters of t, we must perform j insertions.
 		 */
 		curr[0] = j * ins_c;