Add new selectivity estimation functions for pattern-matching operators

(LIKE and regexp matches). These are not yet referenced in pg_operator, so by default the system will continue to use eqsel/neqsel. Also, tweak convert_to_scalar() logic so that common prefixes of strings are stripped off, allowing better accuracy when all strings in a table share a common prefix.

Add new selectivity estimation functions for pattern-matching operators
(LIKE and regexp matches). These are not yet referenced in pg_operator, so by default the system will continue to use eqsel/neqsel. Also, tweak convert_to_scalar() logic so that common prefixes of strings are stripped off, allowing better accuracy when all strings in a table share a common prefix.
82849df6 · Tom Lane · 8c3b52e7 · 82849df6 · 82849df6 · 82849df6
Commit 82849df6 authored Apr 16, 2000 by Tom Lane
5 changed files
--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/xoper.sgml,v 1.9 2000/03/31 03:27:41 thomas Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/xoper.sgml,v 1.10 2000/04/16 04:41:01 tgl Exp $
 -->
 <Chapter Id="xoper">
@@ -254,9 +254,9 @@ SELECT (a + b) AS c FROM test_complex;
   <para>
    You can frequently get away with using either eqsel or neqsel for
    operators that have very high or very low selectivity, even if they
-    aren't really equality or inequality.  For example, the regular expression
+    aren't really equality or inequality.  For example, the
-    matching operators (~, ~*, etc) use eqsel on the assumption that they'll
+    approximate-equality geometric operators use eqsel on the assumption that
-    usually only match a small fraction of the entries in a table.
+    they'll usually only match a small fraction of the entries in a table.
   </para>
   <para>

--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,22 +9,20 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.82 2000/04/12 17:15:19 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.83 2000/04/16 04:41:01 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
-#include <ctype.h>
-#include <math.h>
 #include "postgres.h"
+#include <math.h>
 #include "access/heapam.h"
 #include "access/nbtree.h"
 #include "catalog/catname.h"
 #include "catalog/pg_amop.h"
 #include "catalog/pg_operator.h"
 #include "executor/executor.h"
-#include "mb/pg_wchar.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
@@ -46,11 +44,6 @@
 #define is_indexable_operator(clause,opclass,relam,indexkey_on_left) \
 	(indexable_operator(clause,opclass,relam,indexkey_on_left) != InvalidOid)
-typedef enum
-{
-	Prefix_None, Prefix_Partial, Prefix_Exact
-} Prefix_Status;
 static void match_index_orclauses(RelOptInfo *rel, IndexOptInfo *index,
 					  List *restrictinfo_list);
 static List *match_index_orclause(RelOptInfo *rel, IndexOptInfo *index,
@@ -92,17 +85,11 @@ static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel,
 					   IndexOptInfo *index);
 static bool match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
 							 bool indexkey_on_left);
-static Prefix_Status like_fixed_prefix(char *patt, char **prefix);
-static Prefix_Status regex_fixed_prefix(char *patt, bool case_insensitive,
-				   char **prefix);
 static List *prefix_quals(Var *leftop, Oid expr_op,
-			 char *prefix, Prefix_Status pstatus);
+			 char *prefix, Pattern_Prefix_Status pstatus);
-static char *make_greater_string(const char *str, Oid datatype);
 static Oid	find_operator(const char *opname, Oid datatype);
 static Datum string_to_datum(const char *str, Oid datatype);
 static Const *string_to_const(const char *str, Oid datatype);
-static bool string_lessthan(const char *str1, const char *str2,
-				Oid datatype);
 /*
@@ -1644,6 +1631,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
 	Datum		constvalue;
 	char	   *patt;
 	char	   *prefix;
+	char	   *rest;
 	/*
 	 * Currently, all known special operators require the indexkey on the
@@ -1672,7 +1660,8 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
 		case OID_NAME_LIKE_OP:
 			/* the right-hand const is type text for all of these */
 			patt = textout((text *) DatumGetPointer(constvalue));
-			isIndexable = like_fixed_prefix(patt, &prefix) != Prefix_None;
+			isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
+											   &prefix, &rest) != Pattern_Prefix_None;
 			if (prefix)
 				pfree(prefix);
 			pfree(patt);
@@ -1684,7 +1673,8 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
 		case OID_NAME_REGEXEQ_OP:
 			/* the right-hand const is type text for all of these */
 			patt = textout((text *) DatumGetPointer(constvalue));
-			isIndexable = regex_fixed_prefix(patt, false, &prefix) != Prefix_None;
+			isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+											   &prefix, &rest) != Pattern_Prefix_None;
 			if (prefix)
 				pfree(prefix);
 			pfree(patt);
@@ -1696,7 +1686,8 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
 		case OID_NAME_ICREGEXEQ_OP:
 			/* the right-hand const is type text for all of these */
 			patt = textout((text *) DatumGetPointer(constvalue));
-			isIndexable = regex_fixed_prefix(patt, true, &prefix) != Prefix_None;
+			isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+											   &prefix, &rest) != Pattern_Prefix_None;
 			if (prefix)
 				pfree(prefix);
 			pfree(patt);
@@ -1776,7 +1767,8 @@ expand_indexqual_conditions(List *indexquals)
 		Datum		constvalue;
 		char	   *patt;
 		char	   *prefix;
-		Prefix_Status pstatus;
+		char	   *rest;
+		Pattern_Prefix_Status pstatus;
 		switch (expr_op)
 		{
@@ -1794,7 +1786,8 @@ expand_indexqual_conditions(List *indexquals)
 				/* the right-hand const is type text for all of these */
 				constvalue = ((Const *) rightop)->constvalue;
 				patt = textout((text *) DatumGetPointer(constvalue));
-				pstatus = like_fixed_prefix(patt, &prefix);
+				pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
+											   &prefix, &rest);
 				resultquals = nconc(resultquals,
 									prefix_quals(leftop, expr_op,
 												 prefix, pstatus));
@@ -1810,7 +1803,8 @@ expand_indexqual_conditions(List *indexquals)
 				/* the right-hand const is type text for all of these */
 				constvalue = ((Const *) rightop)->constvalue;
 				patt = textout((text *) DatumGetPointer(constvalue));
-				pstatus = regex_fixed_prefix(patt, false, &prefix);
+				pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+											   &prefix, &rest);
 				resultquals = nconc(resultquals,
 									prefix_quals(leftop, expr_op,
 												 prefix, pstatus));
@@ -1826,7 +1820,8 @@ expand_indexqual_conditions(List *indexquals)
 				/* the right-hand const is type text for all of these */
 				constvalue = ((Const *) rightop)->constvalue;
 				patt = textout((text *) DatumGetPointer(constvalue));
-				pstatus = regex_fixed_prefix(patt, true, &prefix);
+				pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+											   &prefix, &rest);
 				resultquals = nconc(resultquals,
 									prefix_quals(leftop, expr_op,
 												 prefix, pstatus));
@@ -1844,130 +1839,6 @@ expand_indexqual_conditions(List *indexquals)
 	return resultquals;
 }
-/*
- * Extract the fixed prefix, if any, for a LIKE pattern.
- * *prefix is set to a palloc'd prefix string,
- * or to NULL if no fixed prefix exists for the pattern.
- * The return value distinguishes no fixed prefix, a partial prefix,
- * or an exact-match-only pattern.
- */
-static Prefix_Status
-like_fixed_prefix(char *patt, char **prefix)
-{
-	char	   *match;
-	int			pos,
-				match_pos;
-	*prefix = match = palloc(strlen(patt) + 1);
-	match_pos = 0;
-	for (pos = 0; patt[pos]; pos++)
-	{
-		/* % and _ are wildcard characters in LIKE */
-		if (patt[pos] == '%' ||
-			patt[pos] == '_')
-			break;
-		/* Backslash quotes the next character */
-		if (patt[pos] == '\\')
-		{
-			pos++;
-			if (patt[pos] == '\0')
-				break;
-		}
-		/*
-		 * NOTE: this code used to think that %% meant a literal %, but
-		 * textlike() itself does not think that, and the SQL92 spec
-		 * doesn't say any such thing either.
-		 */
-		match[match_pos++] = patt[pos];
-	}
-	match[match_pos] = '\0';
-	/* in LIKE, an empty pattern is an exact match! */
-	if (patt[pos] == '\0')
-		return Prefix_Exact;	/* reached end of pattern, so exact */
-	if (match_pos > 0)
-		return Prefix_Partial;
-	return Prefix_None;
-}
-/*
- * Extract the fixed prefix, if any, for a regex pattern.
- * *prefix is set to a palloc'd prefix string,
- * or to NULL if no fixed prefix exists for the pattern.
- * The return value distinguishes no fixed prefix, a partial prefix,
- * or an exact-match-only pattern.
- */
-static Prefix_Status
-regex_fixed_prefix(char *patt, bool case_insensitive,
-				   char **prefix)
-{
-	char	   *match;
-	int			pos,
-				match_pos;
-	*prefix = NULL;
-	/* Pattern must be anchored left */
-	if (patt[0] != '^')
-		return Prefix_None;
-	/* Cannot optimize if unquoted | { } is present in pattern */
-	for (pos = 1; patt[pos]; pos++)
-	{
-		if (patt[pos] == '|' ||
-			patt[pos] == '{' ||
-			patt[pos] == '}')
-			return Prefix_None;
-		if (patt[pos] == '\\')
-		{
-			pos++;
-			if (patt[pos] == '\0')
-				break;
-		}
-	}
-	/* OK, allocate space for pattern */
-	*prefix = match = palloc(strlen(patt) + 1);
-	match_pos = 0;
-	/* note start at pos 1 to skip leading ^ */
-	for (pos = 1; patt[pos]; pos++)
-	{
-		if (patt[pos] == '.' ||
-			patt[pos] == '?' ||
-			patt[pos] == '*' ||
-			patt[pos] == '[' ||
-			patt[pos] == '$' ||
-		/*
-		 * XXX I suspect isalpha() is not an adequately locale-sensitive
-		 * test for characters that can vary under case folding?
-		 */
-			(case_insensitive && isalpha(patt[pos])))
-			break;
-		if (patt[pos] == '\\')
-		{
-			pos++;
-			if (patt[pos] == '\0')
-				break;
-		}
-		match[match_pos++] = patt[pos];
-	}
-	match[match_pos] = '\0';
-	if (patt[pos] == '$' && patt[pos + 1] == '\0')
-		return Prefix_Exact;	/* pattern specifies exact match */
-	if (match_pos > 0)
-		return Prefix_Partial;
-	return Prefix_None;
-}
 /*
 * Given a fixed prefix that all the "leftop" values must have,
 * generate suitable indexqual condition(s).  expr_op is the original
@@ -1976,7 +1847,7 @@ regex_fixed_prefix(char *patt, bool case_insensitive,
 */
 static List *
 prefix_quals(Var *leftop, Oid expr_op,
-			 char *prefix, Prefix_Status pstatus)
+			 char *prefix, Pattern_Prefix_Status pstatus)
 {
 	List	   *result;
 	Oid			datatype;
@@ -1986,7 +1857,7 @@ prefix_quals(Var *leftop, Oid expr_op,
 	Expr	   *expr;
 	char	   *greaterstr;
-	Assert(pstatus != Prefix_None);
+	Assert(pstatus != Pattern_Prefix_None);
 	switch (expr_op)
 	{
@@ -2022,7 +1893,7 @@ prefix_quals(Var *leftop, Oid expr_op,
 	/*
 	 * If we found an exact-match pattern, generate an "=" indexqual.
 	 */
-	if (pstatus == Prefix_Exact)
+	if (pstatus == Pattern_Prefix_Exact)
 	{
 		oproid = find_operator("=", datatype);
 		if (oproid == InvalidOid)
@@ -2067,68 +1938,6 @@ prefix_quals(Var *leftop, Oid expr_op,
 	return result;
 }
-/*
- * Try to generate a string greater than the given string or any string it is
- * a prefix of.  If successful, return a palloc'd string; else return NULL.
- *
- * To work correctly in non-ASCII locales with weird collation orders,
- * we cannot simply increment "foo" to "fop" --- we have to check whether
- * we actually produced a string greater than the given one.  If not,
- * increment the righthand byte again and repeat.  If we max out the righthand
- * byte, truncate off the last character and start incrementing the next.
- * For example, if "z" were the last character in the sort order, then we
- * could produce "foo" as a string greater than "fonz".
- *
- * This could be rather slow in the worst case, but in most cases we won't
- * have to try more than one or two strings before succeeding.
- *
- * XXX in a sufficiently weird locale, this might produce incorrect results?
- * For example, in German I believe "ss" is treated specially --- if we are
- * given "foos" and return "foot", will this actually be greater than "fooss"?
- */
-static char *
-make_greater_string(const char *str, Oid datatype)
-{
-	char	   *workstr;
-	int			len;
-	/*
-	 * Make a modifiable copy, which will be our return value if
-	 * successful
-	 */
-	workstr = pstrdup((char *) str);
-	while ((len = strlen(workstr)) > 0)
-	{
-		unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
-		/*
-		 * Try to generate a larger string by incrementing the last byte.
-		 */
-		while (*lastchar < (unsigned char) 255)
-		{
-			(*lastchar)++;
-			if (string_lessthan(str, workstr, datatype))
-				return workstr; /* Success! */
-		}
-		/*
-		 * Truncate off the last character, which might be more than 1
-		 * byte in MULTIBYTE case.
-		 */
-#ifdef MULTIBYTE
-		len = pg_mbcliplen((const unsigned char *) workstr, len, len - 1);
-		workstr[len] = '\0';
-#else
-		*lastchar = '\0';
-#endif
-	}
-	/* Failed... */
-	pfree(workstr);
-	return NULL;
-}
 /*
 * Handy subroutines for match_special_index_operator() and friends.
 */
@@ -2179,45 +1988,3 @@ string_to_const(const char *str, Oid datatype)
 	return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
 					 conval, false, false, false, false);
 }
-/*
- * Test whether two strings are "<" according to the rules of the given
- * datatype.  We do this the hard way, ie, actually calling the type's
- * "<" operator function, to ensure we get the right result...
- */
-static bool
-string_lessthan(const char *str1, const char *str2, Oid datatype)
-{
-	Datum		datum1 = string_to_datum(str1, datatype);
-	Datum		datum2 = string_to_datum(str2, datatype);
-	bool		result;
-	switch (datatype)
-	{
-		case TEXTOID:
-			result = text_lt((text *) datum1, (text *) datum2);
-			break;
-		case BPCHAROID:
-			result = bpcharlt((char *) datum1, (char *) datum2);
-			break;
-		case VARCHAROID:
-			result = varcharlt((char *) datum1, (char *) datum2);
-			break;
-		case NAMEOID:
-			result = namelt((NameData *) datum1, (NameData *) datum2);
-			break;
-		default:
-			elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
-			result = false;
-			break;
-	}
-	pfree(DatumGetPointer(datum1));
-	pfree(DatumGetPointer(datum2));
-	return result;
-}
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,13 +15,14 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.64 2000/04/12 17:15:51 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.65 2000/04/16 04:41:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"
+#include <ctype.h>
 #include <math.h>
 #include "access/heapam.h"
@@ -30,6 +31,7 @@
 #include "catalog/pg_proc.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
 #include "optimizer/cost.h"
 #include "parser/parse_func.h"
 #include "parser/parse_oper.h"
@@ -50,8 +52,23 @@
 /* default selectivity estimate for inequalities such as "A < b" */
 #define DEFAULT_INEQ_SEL  (1.0 / 3.0)
-static bool convert_string_to_scalar(char *str, int strlength,
+/* default selectivity estimate for pattern-match operators such as LIKE */
-						 double *scaleval);
+#define DEFAULT_MATCH_SEL	0.01
+static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
+							  Datum lobound, Datum hibound, Oid boundstypid,
+							  double *scaledlobound, double *scaledhibound);
+static double convert_numeric_to_scalar(Datum value, Oid typid);
+static void convert_string_to_scalar(unsigned char *value,
+									 double *scaledvalue,
+									 unsigned char *lobound,
+									 double *scaledlobound,
+									 unsigned char *hibound,
+									 double *scaledhibound);
+static double convert_one_string_to_scalar(unsigned char *value,
+										   int rangelo, int rangehi);
+static unsigned char * convert_string_datum(Datum value, Oid typid);
+static double convert_timevalue_to_scalar(Datum value, Oid typid);
 static void getattproperties(Oid relid, AttrNumber attnum,
 				 Oid *typid,
 				 int *typlen,
@@ -64,6 +81,15 @@ static bool getattstatistics(Oid relid, AttrNumber attnum,
 				 Datum *commonval,
 				 Datum *loval,
 				 Datum *hival);
+static Selectivity prefix_selectivity(char *prefix,
+									  Oid relid,
+									  AttrNumber attno,
+									  Oid datatype);
+static Selectivity pattern_selectivity(char *patt, Pattern_Type ptype);
+static bool string_lessthan(const char *str1, const char *str2,
+				Oid datatype);
+static Oid	find_operator(const char *opname, Oid datatype);
+static Datum string_to_datum(const char *str, Oid datatype);
 /*
@@ -71,9 +97,10 @@ static bool getattstatistics(Oid relid, AttrNumber attnum,
 *
 * Note: this routine is also used to estimate selectivity for some
 * operators that are not "=" but have comparable selectivity behavior,
- * such as "~~" (text LIKE).  Even for "=" we must keep in mind that
+ * such as "~=" (geometric approximate-match).  Even for "=", we must
- * the left and right datatypes may differ, so the type of the given
+ * keep in mind that the left and right datatypes may differ, so the type
- * constant "value" may be different from the type of the attribute.
+ * of the given constant "value" may be different from the type of the
+ * attribute.
 */
 float64
 eqsel(Oid opid,
@@ -255,7 +282,8 @@ scalarltsel(Oid opid,
 	{
 		HeapTuple	oprtuple;
 		Oid			ltype,
-					rtype;
+					rtype,
+					contype;
 		Oid			typid;
 		int			typlen;
 		bool		typbyval;
@@ -277,23 +305,7 @@ scalarltsel(Oid opid,
 			elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
 		ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
 		rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
+		contype = (flag & SEL_RIGHT) ? rtype : ltype;
-		/* Convert the constant to a uniform comparison scale. */
-		if (!convert_to_scalar(value,
-							   ((flag & SEL_RIGHT) ? rtype : ltype),
-							   &val))
-		{
-			/*
-			 * Ideally we'd produce an error here, on the grounds that the
-			 * given operator shouldn't have scalarltsel registered as its
-			 * selectivity func unless we can deal with its operand types.
-			 * But currently, all manner of stuff is invoking scalarltsel,
-			 * so give a default estimate until that can be fixed.
-			 */
-			*result = DEFAULT_INEQ_SEL;
-			return result;
-		}
 		/* Now get info and stats about the attribute */
 		getattproperties(relid, attno,
@@ -308,17 +320,24 @@ scalarltsel(Oid opid,
 			return result;
 		}
-		/* Convert the attribute's loval/hival to common scale. */
+		/* Convert the values to a uniform comparison scale. */
-		if (!convert_to_scalar(loval, typid, &low) ||
+		if (!convert_to_scalar(value, contype, &val,
-			!convert_to_scalar(hival, typid, &high))
+							   loval, hival, typid,
+							   &low, &high))
 		{
-			/* See above comments... */
+			/*
+			 * Ideally we'd produce an error here, on the grounds that the
+			 * given operator shouldn't have scalarltsel registered as its
+			 * selectivity func unless we can deal with its operand types.
+			 * But currently, all manner of stuff is invoking scalarltsel,
+			 * so give a default estimate until that can be fixed.
+			 */
 			if (!typbyval)
 			{
 				pfree(DatumGetPointer(hival));
 				pfree(DatumGetPointer(loval));
 			}
 			*result = DEFAULT_INEQ_SEL;
 			return result;
 		}
@@ -391,6 +410,183 @@ scalargtsel(Oid opid,
 	return result;
 }
+/*
+ * patternsel			- Generic code for pattern-match selectivity.
+ */
+static float64
+patternsel(Oid opid,
+		   Pattern_Type ptype,
+		   Oid relid,
+		   AttrNumber attno,
+		   Datum value,
+		   int32 flag)
+{
+	float64		result;
+	result = (float64) palloc(sizeof(float64data));
+	/* Must have a constant for the pattern, or cannot learn anything */
+	if ((flag & (SEL_CONSTANT | SEL_RIGHT)) != (SEL_CONSTANT | SEL_RIGHT))
+		*result = DEFAULT_MATCH_SEL;
+	else
+	{
+		HeapTuple	oprtuple;
+		Oid			ltype,
+					rtype;
+		char	   *patt;
+		Pattern_Prefix_Status pstatus;
+		char	   *prefix;
+		char	   *rest;
+		/*
+		 * Get left and right datatypes of the operator so we know what
+		 * type the attribute is.
+		 */
+		oprtuple = get_operator_tuple(opid);
+		if (!HeapTupleIsValid(oprtuple))
+			elog(ERROR, "patternsel: no tuple for operator %u", opid);
+		ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
+		rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
+		/* the right-hand const is type text for all supported operators */
+		Assert(rtype == TEXTOID);
+		patt = textout((text *) DatumGetPointer(value));
+		/* divide pattern into fixed prefix and remainder */
+		pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);
+		if (pstatus == Pattern_Prefix_Exact)
+		{
+			/* Pattern specifies an exact match, so pretend operator is '=' */
+			Oid		eqopr = find_operator("=", ltype);
+			Datum	eqcon;
+			if (eqopr == InvalidOid)
+				elog(ERROR, "patternsel: no = operator for type %u", ltype);
+			eqcon = string_to_datum(prefix, ltype);
+			result = eqsel(eqopr, relid, attno, eqcon, SEL_CONSTANT|SEL_RIGHT);
+			pfree(DatumGetPointer(eqcon));
+		}
+		else
+		{
+			/*
+			 * Not exact-match pattern.  We estimate selectivity of the
+			 * fixed prefix and remainder of pattern separately, then
+			 * combine the two.
+			 */
+			Selectivity prefixsel;
+			Selectivity restsel;
+			Selectivity selec;
+			if (pstatus == Pattern_Prefix_Partial)
+				prefixsel = prefix_selectivity(prefix, relid, attno, ltype);
+			else
+				prefixsel = 1.0;
+			restsel = pattern_selectivity(rest, ptype);
+			selec = prefixsel * restsel;
+			/* result should be in range, but make sure... */
+			if (selec < 0.0)
+				selec = 0.0;
+			else if (selec > 1.0)
+				selec = 1.0;
+			*result = (float64data) selec;
+		}
+		if (prefix)
+			pfree(prefix);
+		pfree(patt);
+	}
+	return result;
+}
+/*
+ *		regexeqsel		- Selectivity of regular-expression pattern match.
+ */
+float64
+regexeqsel(Oid opid,
+		   Oid relid,
+		   AttrNumber attno,
+		   Datum value,
+		   int32 flag)
+{
+	return patternsel(opid, Pattern_Type_Regex, relid, attno, value, flag);
+}
+/*
+ *		icregexeqsel	- Selectivity of case-insensitive regex match.
+ */
+float64
+icregexeqsel(Oid opid,
+			 Oid relid,
+			 AttrNumber attno,
+			 Datum value,
+			 int32 flag)
+{
+	return patternsel(opid, Pattern_Type_Regex_IC, relid, attno, value, flag);
+}
+/*
+ *		likesel			- Selectivity of LIKE pattern match.
+ */
+float64
+likesel(Oid opid,
+		Oid relid,
+		AttrNumber attno,
+		Datum value,
+		int32 flag)
+{
+	return patternsel(opid, Pattern_Type_Like, relid, attno, value, flag);
+}
+/*
+ *		regexnesel		- Selectivity of regular-expression pattern non-match.
+ */
+float64
+regexnesel(Oid opid,
+		   Oid relid,
+		   AttrNumber attno,
+		   Datum value,
+		   int32 flag)
+{
+	float64		result;
+	result = patternsel(opid, Pattern_Type_Regex, relid, attno, value, flag);
+	*result = 1.0 - *result;
+	return result;
+}
+/*
+ *		icregexnesel	- Selectivity of case-insensitive regex non-match.
+ */
+float64
+icregexnesel(Oid opid,
+			 Oid relid,
+			 AttrNumber attno,
+			 Datum value,
+			 int32 flag)
+{
+	float64		result;
+	result = patternsel(opid, Pattern_Type_Regex_IC, relid, attno, value, flag);
+	*result = 1.0 - *result;
+	return result;
+}
+/*
+ *		nlikesel		- Selectivity of LIKE pattern non-match.
+ */
+float64
+nlikesel(Oid opid,
+		 Oid relid,
+		 AttrNumber attno,
+		 Datum value,
+		 int32 flag)
+{
+	float64		result;
+	result = patternsel(opid, Pattern_Type_Like, relid, attno, value, flag);
+	*result = 1.0 - *result;
+	return result;
+}
 /*
 *		eqjoinsel		- Join selectivity of "="
 */
@@ -491,9 +687,112 @@ scalargtjoinsel(Oid opid,
 	return result;
 }
+/*
+ *		regexeqjoinsel	- Join selectivity of regular-expression pattern match.
+ */
+float64
+regexeqjoinsel(Oid opid,
+			   Oid relid1,
+			   AttrNumber attno1,
+			   Oid relid2,
+			   AttrNumber attno2)
+{
+	float64		result;
+	result = (float64) palloc(sizeof(float64data));
+	*result = DEFAULT_MATCH_SEL;
+	return result;
+}
+/*
+ *		icregexeqjoinsel	- Join selectivity of case-insensitive regex match.
+ */
+float64
+icregexeqjoinsel(Oid opid,
+				 Oid relid1,
+				 AttrNumber attno1,
+				 Oid relid2,
+				 AttrNumber attno2)
+{
+	float64		result;
+	result = (float64) palloc(sizeof(float64data));
+	*result = DEFAULT_MATCH_SEL;
+	return result;
+}
+/*
+ *		likejoinsel			- Join selectivity of LIKE pattern match.
+ */
+float64
+likejoinsel(Oid opid,
+			Oid relid1,
+			AttrNumber attno1,
+			Oid relid2,
+			AttrNumber attno2)
+{
+	float64		result;
+	result = (float64) palloc(sizeof(float64data));
+	*result = DEFAULT_MATCH_SEL;
+	return result;
+}
+/*
+ *		regexnejoinsel	- Join selectivity of regex non-match.
+ */
+float64
+regexnejoinsel(Oid opid,
+			   Oid relid1,
+			   AttrNumber attno1,
+			   Oid relid2,
+			   AttrNumber attno2)
+{
+	float64		result;
+	result = regexeqjoinsel(opid, relid1, attno1, relid2, attno2);
+	*result = 1.0 - *result;
+	return result;
+}
+/*
+ *		icregexnejoinsel	- Join selectivity of case-insensitive regex non-match.
+ */
+float64
+icregexnejoinsel(Oid opid,
+				 Oid relid1,
+				 AttrNumber attno1,
+				 Oid relid2,
+				 AttrNumber attno2)
+{
+	float64		result;
+	result = icregexeqjoinsel(opid, relid1, attno1, relid2, attno2);
+	*result = 1.0 - *result;
+	return result;
+}
+/*
+ *		nlikejoinsel		- Join selectivity of LIKE pattern non-match.
+ */
+float64
+nlikejoinsel(Oid opid,
+			 Oid relid1,
+			 AttrNumber attno1,
+			 Oid relid2,
+			 AttrNumber attno2)
+{
+	float64		result;
+	result = likejoinsel(opid, relid1, attno1, relid2, attno2);
+	*result = 1.0 - *result;
+	return result;
+}
 /*
 * convert_to_scalar
- *	  Convert a non-NULL value of the indicated type to the comparison
+ *	  Convert non-NULL values of the indicated types to the comparison
 *	  scale needed by scalarltsel()/scalargtsel().
 *	  Returns "true" if successful.
 *
@@ -501,7 +800,8 @@ scalargtjoinsel(Oid opid,
 * "double" values.
 *
 * String datatypes are converted by convert_string_to_scalar(),
- * which is explained below.
+ * which is explained below.  The reason why this routine deals with
+ * three values at a time, not just one, is that we need it for strings.
 *
 * The several datatypes representing absolute times are all converted
 * to Timestamp, which is actually a double, and then we just use that
@@ -511,237 +811,349 @@ scalargtjoinsel(Oid opid,
 * The several datatypes representing relative times (intervals) are all
 * converted to measurements expressed in seconds.
 */
-bool
+static bool
-convert_to_scalar(Datum value, Oid typid,
+convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
-				  double *scaleval)
+				  Datum lobound, Datum hibound, Oid boundstypid,
+				  double *scaledlobound, double *scaledhibound)
 {
-	switch (typid)
+	switch (valuetypid)
 	{
-			/*
+		/*
-			 * Built-in numeric types
+		 * Built-in numeric types
-			 */
+		 */
-			case BOOLOID:
+		case BOOLOID:
-			*scaleval = (double) DatumGetUInt8(value);
-			return true;
 		case INT2OID:
-			*scaleval = (double) DatumGetInt16(value);
-			return true;
 		case INT4OID:
-			*scaleval = (double) DatumGetInt32(value);
-			return true;
 		case INT8OID:
-			*scaleval = (double) (*i8tod((int64 *) DatumGetPointer(value)));
-			return true;
 		case FLOAT4OID:
-			*scaleval = (double) (*DatumGetFloat32(value));
-			return true;
 		case FLOAT8OID:
-			*scaleval = (double) (*DatumGetFloat64(value));
-			return true;
 		case NUMERICOID:
-			*scaleval = (double) (*numeric_float8((Numeric) DatumGetPointer(value)));
-			return true;
 		case OIDOID:
 		case REGPROCOID:
-			/* we can treat OIDs as integers... */
+			*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
-			*scaleval = (double) DatumGetObjectId(value);
+			*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
+			*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
 			return true;
-			/*
+		/*
-			 * Built-in string types
+		 * Built-in string types
-			 */
+		 */
 		case CHAROID:
-			{
-				char		ch = DatumGetChar(value);
-				return convert_string_to_scalar(&ch, 1, scaleval);
-			}
 		case BPCHAROID:
 		case VARCHAROID:
 		case TEXTOID:
-			{
-				char	   *str = (char *) VARDATA(DatumGetPointer(value));
-				int			strlength = VARSIZE(DatumGetPointer(value)) - VARHDRSZ;
-				return convert_string_to_scalar(str, strlength, scaleval);
-			}
 		case NAMEOID:
-			{
+		{
-				NameData   *nm = (NameData *) DatumGetPointer(value);
+			unsigned char *valstr = convert_string_datum(value, valuetypid);
+			unsigned char *lostr = convert_string_datum(lobound, boundstypid);
-				return convert_string_to_scalar(NameStr(*nm), strlen(NameStr(*nm)),
+			unsigned char *histr = convert_string_datum(hibound, boundstypid);
-												scaleval);
-			}
+			convert_string_to_scalar(valstr, scaledvalue,
+									 lostr, scaledlobound,
+									 histr, scaledhibound);
+			pfree(valstr);
+			pfree(lostr);
+			pfree(histr);
+			return true;
+		}
-			/*
+		/*
-			 * Built-in absolute-time types
+		 * Built-in time types
-			 */
+		 */
 		case TIMESTAMPOID:
-			*scaleval = *((Timestamp *) DatumGetPointer(value));
-			return true;
 		case ABSTIMEOID:
-			*scaleval = *abstime_timestamp(value);
-			return true;
 		case DATEOID:
-			*scaleval = *date_timestamp(value);
-			return true;
-			/*
-			 * Built-in relative-time types
-			 */
 		case INTERVALOID:
-			{
-				Interval   *interval = (Interval *) DatumGetPointer(value);
-				/*
-				 * Convert the month part of Interval to days using
-				 * assumed average month length of 365.25/12.0 days.  Not
-				 * too accurate, but plenty good enough for our purposes.
-				 */
-				*scaleval = interval->time +
-					interval->month * (365.25 / 12.0 * 24.0 * 60.0 * 60.0);
-				return true;
-			}
 		case RELTIMEOID:
-			*scaleval = (RelativeTime) DatumGetInt32(value);
-			return true;
 		case TINTERVALOID:
-			{
-				TimeInterval interval = (TimeInterval) DatumGetPointer(value);
-				if (interval->status != 0)
-				{
-					*scaleval = interval->data[1] - interval->data[0];
-					return true;
-				}
-				break;
-			}
 		case TIMEOID:
-			*scaleval = *((TimeADT *) DatumGetPointer(value));
+			*scaledvalue = convert_timevalue_to_scalar(value, valuetypid);
+			*scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid);
+			*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid);
 			return true;
-		default:
-			{
-				/*
-				 * See whether there is a registered type-conversion
-				 * function, namely a procedure named "float8" with the
-				 * right signature. If so, assume we can convert the value
-				 * to the numeric scale.
-				 *
-				 * NOTE: there are no such procedures in the standard
-				 * distribution, except with argument types that we
-				 * already dealt with above. This code is just here as an
-				 * escape for user-defined types.
-				 */
-				Oid			oid_array[FUNC_MAX_ARGS];
-				HeapTuple	ftup;
-				MemSet(oid_array, 0, FUNC_MAX_ARGS * sizeof(Oid));
-				oid_array[0] = typid;
-				ftup = SearchSysCacheTuple(PROCNAME,
-										   PointerGetDatum("float8"),
-										   Int32GetDatum(1),
-										   PointerGetDatum(oid_array),
-										   0);
-				if (HeapTupleIsValid(ftup) &&
-				((Form_pg_proc) GETSTRUCT(ftup))->prorettype == FLOAT8OID)
-				{
-					RegProcedure convertproc = (RegProcedure) ftup->t_data->t_oid;
-					Datum		converted = (Datum) fmgr(convertproc, value);
-					*scaleval = (double) (*DatumGetFloat64(converted));
-					return true;
-				}
-				break;
-			}
 	}
 	/* Don't know how to convert */
 	return false;
 }
+/*
+ * Do convert_to_scalar()'s work for any numeric data type.
+ */
+static double
+convert_numeric_to_scalar(Datum value, Oid typid)
+{
+	switch (typid)
+	{
+		case BOOLOID:
+			return (double) DatumGetUInt8(value);
+		case INT2OID:
+			return (double) DatumGetInt16(value);
+		case INT4OID:
+			return (double) DatumGetInt32(value);
+		case INT8OID:
+			return (double) (*i8tod((int64 *) DatumGetPointer(value)));
+		case FLOAT4OID:
+			return (double) (*DatumGetFloat32(value));
+		case FLOAT8OID:
+			return (double) (*DatumGetFloat64(value));
+		case NUMERICOID:
+			return (double) (*numeric_float8((Numeric) DatumGetPointer(value)));
+		case OIDOID:
+		case REGPROCOID:
+			/* we can treat OIDs as integers... */
+			return (double) DatumGetObjectId(value);
+	}
+	/* Can't get here unless someone tries to use scalarltsel/scalargtsel
+	 * on an operator with one numeric and one non-numeric operand.
+	 */
+	elog(ERROR, "convert_numeric_to_scalar: unsupported type %u", typid);
+	return 0;
+}
 /*
 * Do convert_to_scalar()'s work for any character-string data type.
 *
- * String datatypes are converted to a scale that ranges from 0 to 1, where
+ * String datatypes are converted to a scale that ranges from 0 to 1,
- * we visualize the bytes of the string as fractional base-256 digits.
+ * where we visualize the bytes of the string as fractional digits.
- * It's sufficient to consider the first few bytes, since double has only
- * limited precision (and we can't expect huge accuracy in our selectivity
- * predictions anyway!)
 *
- * If USE_LOCALE is defined, we must pass the string through strxfrm()
+ * We do not want the base to be 256, however, since that tends to
- * before doing the computation, so as to generate correct locale-specific
+ * generate inflated selectivity estimates; few databases will have
- * results.
+ * occurrences of all 256 possible byte values at each position.
+ * Instead, use the smallest and largest byte values seen in the bounds
+ * as the estimated range for each byte, after some fudging to deal with
+ * the fact that we probably aren't going to see the full range that way.
+ *
+ * An additional refinement is that we discard any common prefix of the
+ * three strings before computing the scaled values.  This allows us to
+ * "zoom in" when we encounter a narrow data range.  An example is a phone
+ * number database where all the values begin with the same area code.
 */
-static bool
+static void
-convert_string_to_scalar(char *str, int strlength,
+convert_string_to_scalar(unsigned char *value,
-						 double *scaleval)
+						 double *scaledvalue,
+						 unsigned char *lobound,
+						 double *scaledlobound,
+						 unsigned char *hibound,
+						 double *scaledhibound)
 {
+	int			rangelo,
+				rangehi;
 	unsigned char *sptr;
-	int			slen;
+	rangelo = rangehi = hibound[0];
+	for (sptr = lobound; *sptr; sptr++)
+	{
+		if (rangelo > *sptr)
+			rangelo = *sptr;
+		if (rangehi < *sptr)
+			rangehi = *sptr;
+	}
+	for (sptr = hibound; *sptr; sptr++)
+	{
+		if (rangelo > *sptr)
+			rangelo = *sptr;
+		if (rangehi < *sptr)
+			rangehi = *sptr;
+	}
+	/* If range includes any upper-case ASCII chars, make it include all */
+	if (rangelo <= 'Z' && rangehi >= 'A')
+	{
+		if (rangelo > 'A')
+			rangelo = 'A';
+		if (rangehi < 'Z')
+			rangehi = 'Z';
+	}
+	/* Ditto lower-case */
+	if (rangelo <= 'z' && rangehi >= 'a')
+	{
+		if (rangelo > 'a')
+			rangelo = 'a';
+		if (rangehi < 'z')
+			rangehi = 'z';
+	}
+	/* Ditto digits */
+	if (rangelo <= '9' && rangehi >= '0')
+	{
+		if (rangelo > '0')
+			rangelo = '0';
+		if (rangehi < '9')
+			rangehi = '9';
+	}
+	/* If range includes less than 10 chars, assume we have not got enough
+	 * data, and make it include regular ASCII set.
+	 */
+	if (rangehi - rangelo < 9)
+	{
+		rangelo = ' ';
+		rangehi = 127;
+	}
+	/*
+	 * Now strip any common prefix of the three strings.
+	 */
+	while (*lobound)
+	{
+		if (*lobound != *hibound || *lobound != *value)
+			break;
+		lobound++, hibound++, value++;
+	}
+	/*
+	 * Now we can do the conversions.
+	 */
+	*scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
+	*scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
+	*scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
+}
+static double
+convert_one_string_to_scalar(unsigned char *value, int rangelo, int rangehi)
+{
+	int			slen = strlen((char *) value);
+	double		num,
+				denom,
+				base;
+	if (slen <= 0)
+		return 0.0;				/* empty string has scalar value 0 */
+	/* Since base is at least 10, need not consider more than about 20 chars */
+	if (slen > 20)
+		slen = 20;
+	/* Convert initial characters to fraction */
+	base = rangehi - rangelo + 1;
+	num = 0.0;
+	denom = base;
+	while (slen-- > 0)
+	{
+		int		ch = *value++;
+		if (ch < rangelo)
+			ch = rangelo-1;
+		else if (ch > rangehi)
+			ch = rangehi+1;
+		num += ((double) (ch - rangelo)) / denom;
+		denom *= base;
+	}
+	return num;
+}
+/*
+ * Convert a string-type Datum into a palloc'd, null-terminated string.
+ *
+ * If USE_LOCALE is defined, we must pass the string through strxfrm()
+ * before continuing, so as to generate correct locale-specific results.
+ */
+static unsigned char *
+convert_string_datum(Datum value, Oid typid)
+{
+	char	   *val;
 #ifdef USE_LOCALE
-	char	   *rawstr;
 	char	   *xfrmstr;
 	size_t		xfrmsize;
 	size_t		xfrmlen;
 #endif
-	double		num,
-				denom;
-	if (strlength <= 0)
+	switch (typid)
 	{
-		*scaleval = 0;			/* empty string has scalar value 0 */
+		case CHAROID:
-		return true;
+			val = (char *) palloc(2);
+			val[0] = DatumGetChar(value);
+			val[1] = '\0';
+			break;
+		case BPCHAROID:
+		case VARCHAROID:
+		case TEXTOID:
+		{
+			char	   *str = (char *) VARDATA(DatumGetPointer(value));
+			int			strlength = VARSIZE(DatumGetPointer(value)) - VARHDRSZ;
+			val = (char *) palloc(strlength+1);
+			memcpy(val, str, strlength);
+			val[strlength] = '\0';
+			break;
+		}
+		case NAMEOID:
+		{
+			NameData   *nm = (NameData *) DatumGetPointer(value);
+			val = pstrdup(NameStr(*nm));
+			break;
+		}
+		default:
+			/* Can't get here unless someone tries to use scalarltsel
+			 * on an operator with one string and one non-string operand.
+			 */
+			elog(ERROR, "convert_string_datum: unsupported type %u", typid);
+			return NULL;
 	}
 #ifdef USE_LOCALE
-	/* Need a null-terminated string to pass to strxfrm() */
+	/* Guess that transformed string is not much bigger than original */
-	rawstr = (char *) palloc(strlength + 1);
+	xfrmsize = strlen(val) + 32;		/* arbitrary pad value here... */
-	memcpy(rawstr, str, strlength);
-	rawstr[strlength] = '\0';
-	/* Guess that transformed string is not much bigger */
-	xfrmsize = strlength + 32;	/* arbitrary pad value here... */
 	xfrmstr = (char *) palloc(xfrmsize);
-	xfrmlen = strxfrm(xfrmstr, rawstr, xfrmsize);
+	xfrmlen = strxfrm(xfrmstr, val, xfrmsize);
 	if (xfrmlen >= xfrmsize)
 	{
 		/* Oops, didn't make it */
 		pfree(xfrmstr);
 		xfrmstr = (char *) palloc(xfrmlen + 1);
-		xfrmlen = strxfrm(xfrmstr, rawstr, xfrmlen + 1);
+		xfrmlen = strxfrm(xfrmstr, val, xfrmlen + 1);
 	}
-	pfree(rawstr);
+	pfree(val);
+	val = xfrmstr;
-	sptr = (unsigned char *) xfrmstr;
-	slen = xfrmlen;
-#else
-	sptr = (unsigned char *) str;
-	slen = strlength;
 #endif
-	/* No need to consider more than about 8 bytes (sizeof double) */
+	return (unsigned char *) val;
-	if (slen > 8)
+}
-		slen = 8;
-	/* Convert initial characters to fraction */
+/*
-	num = 0.0;
+ * Do convert_to_scalar()'s work for any timevalue data type.
-	denom = 256.0;
+ */
-	while (slen-- > 0)
+static double
+convert_timevalue_to_scalar(Datum value, Oid typid)
+{
+	switch (typid)
 	{
-		num += ((double) (*sptr++)) / denom;
+		case TIMESTAMPOID:
-		denom *= 256.0;
+			return *((Timestamp *) DatumGetPointer(value));
-	}
+		case ABSTIMEOID:
+			return *abstime_timestamp(value);
+		case DATEOID:
+			return *date_timestamp(value);
+		case INTERVALOID:
+		{
+			Interval   *interval = (Interval *) DatumGetPointer(value);
-#ifdef USE_LOCALE
+			/*
-	pfree(xfrmstr);
+			 * Convert the month part of Interval to days using
-#endif
+			 * assumed average month length of 365.25/12.0 days.  Not
+			 * too accurate, but plenty good enough for our purposes.
+			 */
+			return interval->time +
+				interval->month * (365.25 / 12.0 * 24.0 * 60.0 * 60.0);
+		}
+		case RELTIMEOID:
+			return (RelativeTime) DatumGetInt32(value);
+		case TINTERVALOID:
+		{
+			TimeInterval interval = (TimeInterval) DatumGetPointer(value);
-	*scaleval = num;
+			if (interval->status != 0)
-	return true;
+				return interval->data[1] - interval->data[0];
+			return 0;			/* for lack of a better idea */
+		}
+		case TIMEOID:
+			return *((TimeADT *) DatumGetPointer(value));
+	}
+	/* Can't get here unless someone tries to use scalarltsel/scalargtsel
+	 * on an operator with one timevalue and one non-timevalue operand.
+	 */
+	elog(ERROR, "convert_timevalue_to_scalar: unsupported type %u", typid);
+	return 0;
 }
@@ -914,6 +1326,623 @@ getattstatistics(Oid relid,
 	return true;
 }
+/*-------------------------------------------------------------------------
+ *
+ * Pattern analysis functions
+ *
+ * These routines support analysis of LIKE and regular-expression patterns
+ * by the planner/optimizer.  It's important that they agree with the
+ * regular-expression code in backend/regex/ and the LIKE code in
+ * backend/utils/adt/like.c.
+ *
+ * Note that the prefix-analysis functions are called from
+ * backend/optimizer/path/indxpath.c as well as from routines in this file.
+ *
+ *-------------------------------------------------------------------------
+ */
+/*
+ * Extract the fixed prefix, if any, for a pattern.
+ * *prefix is set to a palloc'd prefix string,
+ * or to NULL if no fixed prefix exists for the pattern.
+ * *rest is set to point to the remainder of the pattern after the
+ * portion describing the fixed prefix.
+ * The return value distinguishes no fixed prefix, a partial prefix,
+ * or an exact-match-only pattern.
+ */
+static Pattern_Prefix_Status
+like_fixed_prefix(char *patt, char **prefix, char **rest)
+{
+	char	   *match;
+	int			pos,
+				match_pos;
+	*prefix = match = palloc(strlen(patt) + 1);
+	match_pos = 0;
+	for (pos = 0; patt[pos]; pos++)
+	{
+		/* % and _ are wildcard characters in LIKE */
+		if (patt[pos] == '%' ||
+			patt[pos] == '_')
+			break;
+		/* Backslash quotes the next character */
+		if (patt[pos] == '\\')
+		{
+			pos++;
+			if (patt[pos] == '\0')
+				break;
+		}
+		/*
+		 * NOTE: this code used to think that %% meant a literal %, but
+		 * textlike() itself does not think that, and the SQL92 spec
+		 * doesn't say any such thing either.
+		 */
+		match[match_pos++] = patt[pos];
+	}
+	match[match_pos] = '\0';
+	*rest = &patt[pos];
+	/* in LIKE, an empty pattern is an exact match! */
+	if (patt[pos] == '\0')
+		return Pattern_Prefix_Exact;	/* reached end of pattern, so exact */
+	if (match_pos > 0)
+		return Pattern_Prefix_Partial;
+	pfree(match);
+	*prefix = NULL;
+	return Pattern_Prefix_None;
+}
+static Pattern_Prefix_Status
+regex_fixed_prefix(char *patt, bool case_insensitive,
+				   char **prefix, char **rest)
+{
+	char	   *match;
+	int			pos,
+				match_pos,
+				paren_depth;
+	/* Pattern must be anchored left */
+	if (patt[0] != '^')
+	{
+		*prefix = NULL;
+		*rest = patt;
+		return Pattern_Prefix_None;
+	}
+	/* If unquoted | is present at paren level 0 in pattern, then there
+	 * are multiple alternatives for the start of the string.
+	 */
+	paren_depth = 0;
+	for (pos = 1; patt[pos]; pos++)
+	{
+		if (patt[pos] == '|' && paren_depth == 0)
+		{
+			*prefix = NULL;
+			*rest = patt;
+			return Pattern_Prefix_None;
+		}
+		else if (patt[pos] == '(')
+			paren_depth++;
+		else if (patt[pos] == ')' && paren_depth > 0)
+			paren_depth--;
+		else if (patt[pos] == '\\')
+		{
+			/* backslash quotes the next character */
+			pos++;
+			if (patt[pos] == '\0')
+				break;
+		}
+	}
+	/* OK, allocate space for pattern */
+	*prefix = match = palloc(strlen(patt) + 1);
+	match_pos = 0;
+	/* note start at pos 1 to skip leading ^ */
+	for (pos = 1; patt[pos]; pos++)
+	{
+		/*
+		 * Check for characters that indicate multiple possible matches here.
+		 * XXX I suspect isalpha() is not an adequately locale-sensitive
+		 * test for characters that can vary under case folding?
+		 */
+		if (patt[pos] == '.' ||
+			patt[pos] == '(' ||
+			patt[pos] == '[' ||
+			patt[pos] == '$' ||
+			(case_insensitive && isalpha(patt[pos])))
+			break;
+		/*
+		 * Check for quantifiers.  Except for +, this means the preceding
+		 * character is optional, so we must remove it from the prefix too!
+		 */
+		if (patt[pos] == '*' ||
+			patt[pos] == '?' ||
+			patt[pos] == '{')
+		{
+			if (match_pos > 0)
+				match_pos--;
+			pos--;
+			break;
+		}
+		if (patt[pos] == '+')
+		{
+			pos--;
+			break;
+		}
+		if (patt[pos] == '\\')
+		{
+			/* backslash quotes the next character */
+			pos++;
+			if (patt[pos] == '\0')
+				break;
+		}
+		match[match_pos++] = patt[pos];
+	}
+	match[match_pos] = '\0';
+	*rest = &patt[pos];
+	if (patt[pos] == '$' && patt[pos + 1] == '\0')
+	{
+		*rest = &patt[pos + 1];
+		return Pattern_Prefix_Exact;	/* pattern specifies exact match */
+	}
+	if (match_pos > 0)
+		return Pattern_Prefix_Partial;
+	pfree(match);
+	*prefix = NULL;
+	return Pattern_Prefix_None;
+}
+Pattern_Prefix_Status
+pattern_fixed_prefix(char *patt, Pattern_Type ptype,
+					 char **prefix, char **rest)
+{
+	Pattern_Prefix_Status result;
+	switch (ptype)
+	{
+		case Pattern_Type_Like:
+			result = like_fixed_prefix(patt, prefix, rest);
+			break;
+		case Pattern_Type_Regex:
+			result = regex_fixed_prefix(patt, false, prefix, rest);
+			break;
+		case Pattern_Type_Regex_IC:
+			result = regex_fixed_prefix(patt, true, prefix, rest);
+			break;
+		default:
+			elog(ERROR, "pattern_fixed_prefix: bogus ptype");
+			result = Pattern_Prefix_None; /* keep compiler quiet */
+			break;
+	}
+	return result;
+}
+/*
+ * Estimate the selectivity of a fixed prefix for a pattern match.
+ *
+ * A fixed prefix "foo" is estimated as the selectivity of the expression
+ * "var >= 'foo' AND var < 'fop'" (see also indxqual.c).
+ */
+static Selectivity
+prefix_selectivity(char *prefix,
+				   Oid relid,
+				   AttrNumber attno,
+				   Oid datatype)
+{
+	Selectivity	prefixsel;
+	Oid			cmpopr;
+	Datum		prefixcon;
+	char	   *greaterstr;
+	cmpopr = find_operator(">=", datatype);
+	if (cmpopr == InvalidOid)
+		elog(ERROR, "prefix_selectivity: no >= operator for type %u",
+			 datatype);
+	prefixcon = string_to_datum(prefix, datatype);
+	/* Assume scalargtsel is appropriate for all supported types */
+	prefixsel = * scalargtsel(cmpopr, relid, attno,
+							  prefixcon, SEL_CONSTANT|SEL_RIGHT);
+	pfree(DatumGetPointer(prefixcon));
+	/*
+	 * If we can create a string larger than the prefix,
+	 * say "x < greaterstr".
+	 */
+	greaterstr = make_greater_string(prefix, datatype);
+	if (greaterstr)
+	{
+		Selectivity		topsel;
+		cmpopr = find_operator("<", datatype);
+		if (cmpopr == InvalidOid)
+			elog(ERROR, "prefix_selectivity: no < operator for type %u",
+				 datatype);
+		prefixcon = string_to_datum(greaterstr, datatype);
+		/* Assume scalarltsel is appropriate for all supported types */
+		topsel = * scalarltsel(cmpopr, relid, attno,
+							   prefixcon, SEL_CONSTANT|SEL_RIGHT);
+		pfree(DatumGetPointer(prefixcon));
+		pfree(greaterstr);
+		/*
+		 * Merge the two selectivities in the same way as for
+		 * a range query (see clauselist_selectivity()).
+		 */
+		prefixsel = topsel + prefixsel - 1.0;
+		/*
+		 * A zero or slightly negative prefixsel should be converted into a
+		 * small positive value; we probably are dealing with a very
+		 * tight range and got a bogus result due to roundoff errors.
+		 * However, if prefixsel is very negative, then we probably have
+		 * default selectivity estimates on one or both sides of the
+		 * range.  In that case, insert a not-so-wildly-optimistic
+		 * default estimate.
+		 */
+		if (prefixsel <= 0.0)
+		{
+			if (prefixsel < -0.01)
+			{
+				/*
+				 * No data available --- use a default estimate that
+				 * is small, but not real small.
+				 */
+				prefixsel = 0.01;
+			}
+			else
+			{
+				/*
+				 * It's just roundoff error; use a small positive value
+				 */
+				prefixsel = 1.0e-10;
+			}
+		}
+	}
+	return prefixsel;
+}
+/*
+ * Estimate the selectivity of a pattern of the specified type.
+ * Note that any fixed prefix of the pattern will have been removed already.
+ *
+ * For now, we use a very simplistic approach: fixed characters reduce the
+ * selectivity a good deal, character ranges reduce it a little,
+ * wildcards (such as % for LIKE or .* for regex) increase it.
+ */
+#define FIXED_CHAR_SEL	0.04	/* about 1/25 */
+#define CHAR_RANGE_SEL	0.25
+#define ANY_CHAR_SEL	0.9		/* not 1, since it won't match end-of-string */
+#define FULL_WILDCARD_SEL 5.0
+#define PARTIAL_WILDCARD_SEL 2.0
+static Selectivity
+like_selectivity(char *patt)
+{
+	Selectivity		sel = 1.0;
+	int				pos;
+	/* Skip any leading %; it's already factored into initial sel */
+	pos = (*patt == '%') ? 1 : 0;
+	for (; patt[pos]; pos++)
+	{
+		/* % and _ are wildcard characters in LIKE */
+		if (patt[pos] == '%')
+			sel *= FULL_WILDCARD_SEL;
+		else if (patt[pos] == '_')
+			sel *= ANY_CHAR_SEL;
+		else if (patt[pos] == '\\')
+		{
+			/* Backslash quotes the next character */
+			pos++;
+			if (patt[pos] == '\0')
+				break;
+			sel *= FIXED_CHAR_SEL;
+		}
+		else
+			sel *= FIXED_CHAR_SEL;
+	}
+	/* Could get sel > 1 if multiple wildcards */
+	if (sel > 1.0)
+		sel = 1.0;
+	return sel;
+}
+static Selectivity
+regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
+{
+	Selectivity		sel = 1.0;
+	int				paren_depth = 0;
+	int				paren_pos = 0; /* dummy init to keep compiler quiet */
+	int				pos;
+	for (pos = 0; pos < pattlen; pos++)
+	{
+		if (patt[pos] == '(')
+		{
+			if (paren_depth == 0)
+				paren_pos = pos; /* remember start of parenthesized item */
+			paren_depth++;
+		}
+		else if (patt[pos] == ')' && paren_depth > 0)
+		{
+			paren_depth--;
+			if (paren_depth == 0)
+				sel *= regex_selectivity_sub(patt + (paren_pos + 1),
+											 pos - (paren_pos + 1),
+											 case_insensitive);
+		}
+		else if (patt[pos] == '|' && paren_depth == 0)
+		{
+			/*
+			 * If unquoted | is present at paren level 0 in pattern,
+			 * we have multiple alternatives; sum their probabilities.
+			 */
+			sel += regex_selectivity_sub(patt + (pos + 1),
+										 pattlen - (pos + 1),
+										 case_insensitive);
+			break;				/* rest of pattern is now processed */
+		}
+		else if (patt[pos] == '[')
+		{
+			bool	negclass = false;
+			if (patt[++pos] == '^')
+			{
+				negclass = true;
+				pos++;
+			}
+			if (patt[pos] == ']') /* ']' at start of class is not special */
+				pos++;
+			while (pos < pattlen && patt[pos] != ']')
+				pos++;
+			if (paren_depth == 0)
+				sel *= (negclass ? (1.0-CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
+		}
+		else if (patt[pos] == '.')
+		{
+			if (paren_depth == 0)
+				sel *= ANY_CHAR_SEL;
+		}
+		else if (patt[pos] == '*' ||
+				 patt[pos] == '?' ||
+				 patt[pos] == '+')
+		{
+			/* Ought to be smarter about quantifiers... */
+			if (paren_depth == 0)
+				sel *= PARTIAL_WILDCARD_SEL;
+		}
+		else if (patt[pos] == '{')
+		{
+			while (pos < pattlen && patt[pos] != '}')
+				pos++;
+			if (paren_depth == 0)
+				sel *= PARTIAL_WILDCARD_SEL;
+		}
+		else if (patt[pos] == '\\')
+		{
+			/* backslash quotes the next character */
+			pos++;
+			if (pos >= pattlen)
+				break;
+			if (paren_depth == 0)
+				sel *= FIXED_CHAR_SEL;
+		}
+		else
+		{
+			if (paren_depth == 0)
+				sel *= FIXED_CHAR_SEL;
+		}
+	}
+	/* Could get sel > 1 if multiple wildcards */
+	if (sel > 1.0)
+		sel = 1.0;
+	return sel;
+}
+static Selectivity
+regex_selectivity(char *patt, bool case_insensitive)
+{
+	Selectivity		sel;
+	int				pattlen = strlen(patt);
+	/* If patt doesn't end with $, consider it to have a trailing wildcard */
+	if (pattlen > 0 && patt[pattlen-1] == '$' &&
+		(pattlen == 1 || patt[pattlen-2] != '\\'))
+	{
+		/* has trailing $ */
+		sel = regex_selectivity_sub(patt, pattlen-1, case_insensitive);
+	}
+	else
+	{
+		/* no trailing $ */
+		sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
+		sel *= FULL_WILDCARD_SEL;
+		if (sel > 1.0)
+			sel = 1.0;
+	}
+	return sel;
+}
+static Selectivity
+pattern_selectivity(char *patt, Pattern_Type ptype)
+{
+	Selectivity result;
+	switch (ptype)
+	{
+		case Pattern_Type_Like:
+			result = like_selectivity(patt);
+			break;
+		case Pattern_Type_Regex:
+			result = regex_selectivity(patt, false);
+			break;
+		case Pattern_Type_Regex_IC:
+			result = regex_selectivity(patt, true);
+			break;
+		default:
+			elog(ERROR, "pattern_selectivity: bogus ptype");
+			result = 1.0;		/* keep compiler quiet */
+			break;
+	}
+	return result;
+}
+/*
+ * Try to generate a string greater than the given string or any string it is
+ * a prefix of.  If successful, return a palloc'd string; else return NULL.
+ *
+ * To work correctly in non-ASCII locales with weird collation orders,
+ * we cannot simply increment "foo" to "fop" --- we have to check whether
+ * we actually produced a string greater than the given one.  If not,
+ * increment the righthand byte again and repeat.  If we max out the righthand
+ * byte, truncate off the last character and start incrementing the next.
+ * For example, if "z" were the last character in the sort order, then we
+ * could produce "foo" as a string greater than "fonz".
+ *
+ * This could be rather slow in the worst case, but in most cases we won't
+ * have to try more than one or two strings before succeeding.
+ *
+ * XXX in a sufficiently weird locale, this might produce incorrect results?
+ * For example, in German I believe "ss" is treated specially --- if we are
+ * given "foos" and return "foot", will this actually be greater than "fooss"?
+ */
+char *
+make_greater_string(const char *str, Oid datatype)
+{
+	char	   *workstr;
+	int			len;
+	/*
+	 * Make a modifiable copy, which will be our return value if
+	 * successful
+	 */
+	workstr = pstrdup((char *) str);
+	while ((len = strlen(workstr)) > 0)
+	{
+		unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
+		/*
+		 * Try to generate a larger string by incrementing the last byte.
+		 */
+		while (*lastchar < (unsigned char) 255)
+		{
+			(*lastchar)++;
+			if (string_lessthan(str, workstr, datatype))
+				return workstr; /* Success! */
+		}
+		/*
+		 * Truncate off the last character, which might be more than 1
+		 * byte in MULTIBYTE case.
+		 */
+#ifdef MULTIBYTE
+		len = pg_mbcliplen((const unsigned char *) workstr, len, len - 1);
+		workstr[len] = '\0';
+#else
+		*lastchar = '\0';
+#endif
+	}
+	/* Failed... */
+	pfree(workstr);
+	return NULL;
+}
+/*
+ * Test whether two strings are "<" according to the rules of the given
+ * datatype.  We do this the hard way, ie, actually calling the type's
+ * "<" operator function, to ensure we get the right result...
+ */
+static bool
+string_lessthan(const char *str1, const char *str2, Oid datatype)
+{
+	Datum		datum1 = string_to_datum(str1, datatype);
+	Datum		datum2 = string_to_datum(str2, datatype);
+	bool		result;
+	switch (datatype)
+	{
+		case TEXTOID:
+			result = text_lt((text *) datum1, (text *) datum2);
+			break;
+		case BPCHAROID:
+			result = bpcharlt((char *) datum1, (char *) datum2);
+			break;
+		case VARCHAROID:
+			result = varcharlt((char *) datum1, (char *) datum2);
+			break;
+		case NAMEOID:
+			result = namelt((NameData *) datum1, (NameData *) datum2);
+			break;
+		default:
+			elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
+			result = false;
+			break;
+	}
+	pfree(DatumGetPointer(datum1));
+	pfree(DatumGetPointer(datum2));
+	return result;
+}
+/* See if there is a binary op of the given name for the given datatype */
+static Oid
+find_operator(const char *opname, Oid datatype)
+{
+	HeapTuple	optup;
+	optup = SearchSysCacheTuple(OPERNAME,
+								PointerGetDatum(opname),
+								ObjectIdGetDatum(datatype),
+								ObjectIdGetDatum(datatype),
+								CharGetDatum('b'));
+	if (!HeapTupleIsValid(optup))
+		return InvalidOid;
+	return optup->t_data->t_oid;
+}
+/*
+ * Generate a Datum of the appropriate type from a C string.
+ * Note that all of the supported types are pass-by-ref, so the
+ * returned value should be pfree'd if no longer needed.
+ */
+static Datum
+string_to_datum(const char *str, Oid datatype)
+{
+	/*
+	 * We cheat a little by assuming that textin() will do for bpchar and
+	 * varchar constants too...
+	 */
+	if (datatype == NAMEOID)
+		return PointerGetDatum(namein((char *) str));
+	else
+		return PointerGetDatum(textin((char *) str));
+}
 /*-------------------------------------------------------------------------
 *
 * Index cost estimation functions

--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: pg_proc.h,v 1.132 2000/04/12 17:16:29 momjian Exp $
+ * $Id: pg_proc.h,v 1.133 2000/04/16 04:41:03 tgl Exp $
 *
 * NOTES
 *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -2436,6 +2436,32 @@ DESCR("convert text to timestamp");
 DATA(insert OID = 1780 ( to_date			PGUID 11 f t f 2 f	1082 "25 25" 100 0 0 100  to_date - ));
 DESCR("convert text to date");
+/* Selectivity estimators for LIKE and related operators */
+DATA(insert OID = 1818 ( regexeqsel			PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  regexeqsel - ));
+DESCR("restriction selectivity of regex match");
+DATA(insert OID = 1819 ( likesel			PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  likesel - ));
+DESCR("restriction selectivity of LIKE");
+DATA(insert OID = 1820 ( icregexeqsel		PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  icregexeqsel - ));
+DESCR("restriction selectivity of case-insensitive regex match");
+DATA(insert OID = 1821 ( regexnesel			PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  regexnesel - ));
+DESCR("restriction selectivity of regex non-match");
+DATA(insert OID = 1822 ( nlikesel			PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  nlikesel - ));
+DESCR("restriction selectivity of NOT LIKE");
+DATA(insert OID = 1823 ( icregexnesel		PGUID 11 f t f 5 f 701 "26 26 21 0 23" 100 0 0 100  icregexnesel - ));
+DESCR("restriction selectivity of case-insensitive regex non-match");
+DATA(insert OID = 1824 ( regexeqjoinsel		PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100	regexeqjoinsel - ));
+DESCR("join selectivity of regex match");
+DATA(insert OID = 1825 ( likejoinsel		PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100	likejoinsel - ));
+DESCR("join selectivity of LIKE");
+DATA(insert OID = 1826 ( icregexeqjoinsel	PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100	icregexeqjoinsel - ));
+DESCR("join selectivity of case-insensitive regex match");
+DATA(insert OID = 1827 ( regexnejoinsel		PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100	regexnejoinsel - ));
+DESCR("join selectivity of regex non-match");
+DATA(insert OID = 1828 ( nlikejoinsel		PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100	nlikejoinsel - ));
+DESCR("join selectivity of NOT LIKE");
+DATA(insert OID = 1829 ( icregexnejoinsel	PGUID 11 f t f 5 f 701 "26 26 21 26 21" 100 0 0 100	icregexnejoinsel - ));
+DESCR("join selectivity of case-insensitive regex non-match");
 /*
 * prototypes for functions pg_proc.c

--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $Id: builtins.h,v 1.110 2000/04/12 17:16:54 momjian Exp $
+ * $Id: builtins.h,v 1.111 2000/04/16 04:41:03 tgl Exp $
 *
 * NOTES
 *	  This should normally only be included by fmgr.h.
@@ -371,15 +371,47 @@ extern char *deparse_expression(Node *expr, List *rangetables,
 				   bool forceprefix);
 /* selfuncs.c */
-extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
+extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno,
-extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
+					 Datum value, int32 flag);
-extern float64 scalarltsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
+extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno,
-extern float64 scalargtsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
+					  Datum value, int32 flag);
-extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
+extern float64 scalarltsel(Oid opid, Oid relid, AttrNumber attno,
-extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
+						   Datum value, int32 flag);
-extern float64 scalarltjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
+extern float64 scalargtsel(Oid opid, Oid relid, AttrNumber attno,
-extern float64 scalargtjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
+						   Datum value, int32 flag);
-extern bool convert_to_scalar(Datum value, Oid typid, double *scaleval);
+extern float64 regexeqsel(Oid opid, Oid relid, AttrNumber attno,
+						  Datum value, int32 flag);
+extern float64 likesel(Oid opid, Oid relid, AttrNumber attno,
+					   Datum value, int32 flag);
+extern float64 icregexeqsel(Oid opid, Oid relid, AttrNumber attno,
+							Datum value, int32 flag);
+extern float64 regexnesel(Oid opid, Oid relid, AttrNumber attno,
+						  Datum value, int32 flag);
+extern float64 nlikesel(Oid opid, Oid relid, AttrNumber attno,
+						Datum value, int32 flag);
+extern float64 icregexnesel(Oid opid, Oid relid, AttrNumber attno,
+							Datum value, int32 flag);
+extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+						 Oid relid2, AttrNumber attno2);
+extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+						  Oid relid2, AttrNumber attno2);
+extern float64 scalarltjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+							   Oid relid2, AttrNumber attno2);
+extern float64 scalargtjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+							   Oid relid2, AttrNumber attno2);
+extern float64 regexeqjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+							  Oid relid2, AttrNumber attno2);
+extern float64 likejoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+						   Oid relid2, AttrNumber attno2);
+extern float64 icregexeqjoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+								Oid relid2, AttrNumber attno2);
+extern float64 regexnejoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+							  Oid relid2, AttrNumber attno2);
+extern float64 nlikejoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+							Oid relid2, AttrNumber attno2);
+extern float64 icregexnejoinsel(Oid opid, Oid relid1, AttrNumber attno1,
+								Oid relid2, AttrNumber attno2);
 extern void btcostestimate(Query *root, RelOptInfo *rel,
 			   IndexOptInfo *index, List *indexQuals,
@@ -402,6 +434,22 @@ extern void gistcostestimate(Query *root, RelOptInfo *rel,
 				 Cost *indexTotalCost,
 				 Selectivity *indexSelectivity);
+typedef enum
+{
+	Pattern_Type_Like, Pattern_Type_Regex, Pattern_Type_Regex_IC
+} Pattern_Type;
+typedef enum
+{
+	Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
+} Pattern_Prefix_Status;
+extern Pattern_Prefix_Status pattern_fixed_prefix(char *patt,
+												  Pattern_Type ptype,
+												  char **prefix,
+												  char **rest);
+extern char *make_greater_string(const char *str, Oid datatype);
 /* tid.c */
 extern ItemPointer tidin(const char *str);
 extern char *tidout(ItemPointer itemPtr);