Commit 11d5c820 authored by Tom Lane's avatar Tom Lane

Improve implementation of btrim/ltrim/rtrim: provide a special case for

single-byte encodings, and a direct C implementation of the single-argument
forms (where spaces are always what gets trimmed).  This is in preparation
for using rtrim1() as the bpchar-to-text cast operator, but is a useful
performance improvement even if we decide not to do that.
parent 78d21560
...@@ -2,26 +2,30 @@ ...@@ -2,26 +2,30 @@
* oracle_compat.c * oracle_compat.c
* Oracle compatible functions. * Oracle compatible functions.
* *
* Copyright (c) 1996-2001, PostgreSQL Global Development Group * Copyright (c) 1996-2003, PostgreSQL Global Development Group
* *
* Author: Edmund Mergl <E.Mergl@bawue.de> * Author: Edmund Mergl <E.Mergl@bawue.de>
* Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org> * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v 1.43 2002/09/04 20:31:28 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v 1.44 2003/05/23 22:33:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include <ctype.h> #include <ctype.h>
#include "utils/builtins.h" #include "utils/builtins.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
static text *dotrim(const char *string, int stringlen,
const char *set, int setlen,
bool doltrim, bool dortrim);
/******************************************************************** /********************************************************************
* *
* lower * lower
...@@ -349,86 +353,192 @@ btrim(PG_FUNCTION_ARGS) ...@@ -349,86 +353,192 @@ btrim(PG_FUNCTION_ARGS)
text *string = PG_GETARG_TEXT_P(0); text *string = PG_GETARG_TEXT_P(0);
text *set = PG_GETARG_TEXT_P(1); text *set = PG_GETARG_TEXT_P(1);
text *ret; text *ret;
char *ptr,
*end,
*ptr2,
*end2;
int m;
char **mp; ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
int mplen; VARDATA(set), VARSIZE(set) - VARHDRSZ,
char *p; true, true);
int mblen;
int len;
if ((m = VARSIZE(string) - VARHDRSZ) <= 0 || PG_RETURN_TEXT_P(ret);
(VARSIZE(set) - VARHDRSZ) <= 0) }
PG_RETURN_TEXT_P(string);
ptr = VARDATA(string); /********************************************************************
*
* btrim1 --- btrim with set fixed as ' '
*
********************************************************************/
Datum
btrim1(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *ret;
len = m; ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
mp = (char **) palloc(len * sizeof(char *)); " ", 1,
p = ptr; true, true);
mplen = 0;
PG_RETURN_TEXT_P(ret);
}
/*
* Common implementation for btrim, ltrim, rtrim
*/
static text *
dotrim(const char *string, int stringlen,
const char *set, int setlen,
bool doltrim, bool dortrim)
{
text *result;
int i;
/* Nothing to do if either string or set is empty */
if (stringlen > 0 && setlen > 0)
{
if (pg_database_encoding_max_length() > 1)
{
/*
* In the multibyte-encoding case, build arrays of pointers to
* character starts, so that we can avoid inefficient checks in
* the inner loops.
*/
const char **stringchars;
const char **setchars;
int *stringmblen;
int *setmblen;
int stringnchars;
int setnchars;
int resultndx;
int resultnchars;
const char *p;
int len;
int mblen;
const char *str_pos;
int str_len;
/* build the mb pointer array */ stringchars = (const char **) palloc(stringlen * sizeof(char *));
stringmblen = (int *) palloc(stringlen * sizeof(int));
stringnchars = 0;
p = string;
len = stringlen;
while (len > 0) while (len > 0)
{ {
mp[mplen++] = p; stringchars[stringnchars] = p;
mblen = pg_mblen(p); stringmblen[stringnchars] = mblen = pg_mblen(p);
stringnchars++;
p += mblen; p += mblen;
len -= mblen; len -= mblen;
} }
mplen--;
end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
while (m > 0) setchars = (const char **) palloc(setlen * sizeof(char *));
setmblen = (int *) palloc(setlen * sizeof(int));
setnchars = 0;
p = set;
len = setlen;
while (len > 0)
{ {
int str_len = pg_mblen(ptr); setchars[setnchars] = p;
setmblen[setnchars] = mblen = pg_mblen(p);
setnchars++;
p += mblen;
len -= mblen;
}
ptr2 = VARDATA(set); resultndx = 0; /* index in stringchars[] */
while (ptr2 <= end2) resultnchars = stringnchars;
{
int set_len = pg_mblen(ptr2);
if (str_len == set_len && if (doltrim)
memcmp(ptr, ptr2, str_len) == 0) {
while (resultnchars > 0)
{
str_pos = stringchars[resultndx];
str_len = stringmblen[resultndx];
for (i = 0; i < setnchars; i++)
{
if (str_len == setmblen[i] &&
memcmp(str_pos, setchars[i], str_len) == 0)
break; break;
ptr2 += set_len;
} }
if (ptr2 > end2) if (i >= setnchars)
break; break; /* no match here */
ptr += str_len; string += str_len;
m -= str_len; stringlen -= str_len;
resultndx++;
resultnchars--;
}
} }
while (m > 0) if (dortrim)
{ {
int str_len; while (resultnchars > 0)
{
str_pos = stringchars[resultndx + resultnchars - 1];
str_len = stringmblen[resultndx + resultnchars - 1];
for (i = 0; i < setnchars; i++)
{
if (str_len == setmblen[i] &&
memcmp(str_pos, setchars[i], str_len) == 0)
break;
}
if (i >= setnchars)
break; /* no match here */
stringlen -= str_len;
resultnchars--;
}
}
end = mp[mplen--]; pfree(stringchars);
str_len = pg_mblen(end); pfree(stringmblen);
ptr2 = VARDATA(set); pfree(setchars);
while (ptr2 <= end2) pfree(setmblen);
}
else
{
/*
* In the single-byte-encoding case, we don't need such overhead.
*/
if (doltrim)
{
while (stringlen > 0)
{ {
int set_len = pg_mblen(ptr2); char str_ch = *string;
if (str_len == set_len && for (i = 0; i < setlen; i++)
memcmp(end, ptr2, str_len) == 0) {
if (str_ch == set[i])
break; break;
ptr2 += set_len;
} }
if (ptr2 > end2) if (i >= setlen)
break; /* no match here */
string++;
stringlen--;
}
}
if (dortrim)
{
while (stringlen > 0)
{
char str_ch = string[stringlen - 1];
for (i = 0; i < setlen; i++)
{
if (str_ch == set[i])
break; break;
m -= str_len;
} }
pfree(mp); if (i >= setlen)
ret = (text *) palloc(VARHDRSZ + m); break; /* no match here */
VARATT_SIZEP(ret) = VARHDRSZ + m; stringlen--;
memcpy(VARDATA(ret), ptr, m); }
}
}
}
PG_RETURN_TEXT_P(ret); /* Return selected portion of string */
result = (text *) palloc(VARHDRSZ + stringlen);
VARATT_SIZEP(result) = VARHDRSZ + stringlen;
memcpy(VARDATA(result), string, stringlen);
return result;
} }
/******************************************************************** /********************************************************************
...@@ -525,45 +635,33 @@ ltrim(PG_FUNCTION_ARGS) ...@@ -525,45 +635,33 @@ ltrim(PG_FUNCTION_ARGS)
text *string = PG_GETARG_TEXT_P(0); text *string = PG_GETARG_TEXT_P(0);
text *set = PG_GETARG_TEXT_P(1); text *set = PG_GETARG_TEXT_P(1);
text *ret; text *ret;
char *ptr,
*ptr2,
*end2;
int m;
if ((m = VARSIZE(string) - VARHDRSZ) <= 0 || ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
(VARSIZE(set) - VARHDRSZ) <= 0) VARDATA(set), VARSIZE(set) - VARHDRSZ,
PG_RETURN_TEXT_P(string); true, false);
ptr = VARDATA(string); PG_RETURN_TEXT_P(ret);
end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1; }
while (m > 0) /********************************************************************
{ *
int str_len = pg_mblen(ptr); * ltrim1 --- ltrim with set fixed as ' '
*
********************************************************************/
ptr2 = VARDATA(set); Datum
while (ptr2 <= end2) ltrim1(PG_FUNCTION_ARGS)
{ {
int set_len = pg_mblen(ptr2); text *string = PG_GETARG_TEXT_P(0);
text *ret;
if (str_len == set_len && ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
memcmp(ptr, ptr2, str_len) == 0) " ", 1,
break; true, false);
ptr2 += set_len;
}
if (ptr2 > end2)
break;
ptr += str_len;
m -= str_len;
}
ret = (text *) palloc(VARHDRSZ + m);
VARATT_SIZEP(ret) = VARHDRSZ + m;
memcpy(VARDATA(ret), ptr, m);
PG_RETURN_TEXT_P(ret); PG_RETURN_TEXT_P(ret);
} }
/******************************************************************** /********************************************************************
* *
* rtrim * rtrim
...@@ -586,64 +684,28 @@ rtrim(PG_FUNCTION_ARGS) ...@@ -586,64 +684,28 @@ rtrim(PG_FUNCTION_ARGS)
text *set = PG_GETARG_TEXT_P(1); text *set = PG_GETARG_TEXT_P(1);
text *ret; text *ret;
char *ptr, ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
*end, VARDATA(set), VARSIZE(set) - VARHDRSZ,
*ptr2, false, true);
*end2;
int m;
char **mp;
int mplen;
char *p;
int mblen;
int len;
if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
(VARSIZE(set) - VARHDRSZ) <= 0)
PG_RETURN_TEXT_P(string);
ptr = VARDATA(string);
len = m; PG_RETURN_TEXT_P(ret);
mp = (char **) palloc(len * sizeof(char *)); }
p = ptr;
mplen = 0;
/* build the mb pointer array */
while (len > 0)
{
mp[mplen++] = p;
mblen = pg_mblen(p);
p += mblen;
len -= mblen;
}
mplen--;
end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
while (m > 0) /********************************************************************
{ *
int str_len; * rtrim1 --- rtrim with set fixed as ' '
*
********************************************************************/
end = mp[mplen--]; Datum
str_len = pg_mblen(end); rtrim1(PG_FUNCTION_ARGS)
ptr2 = VARDATA(set); {
while (ptr2 <= end2) text *string = PG_GETARG_TEXT_P(0);
{ text *ret;
int set_len = pg_mblen(ptr2);
if (str_len == set_len && ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
memcmp(end, ptr2, str_len) == 0) " ", 1,
break; false, true);
ptr2 += set_len;
}
if (ptr2 > end2)
break;
m -= str_len;
}
pfree(mp);
ret = (text *) palloc(VARHDRSZ + m);
VARATT_SIZEP(ret) = VARHDRSZ + m;
memcpy(VARDATA(ret), ptr, m);
PG_RETURN_TEXT_P(ret); PG_RETURN_TEXT_P(ret);
} }
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: catversion.h,v 1.194 2003/05/15 15:50:19 petere Exp $ * $Id: catversion.h,v 1.195 2003/05/23 22:33:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 200305151 #define CATALOG_VERSION_NO 200305231
#endif #endif
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: pg_proc.h,v 1.300 2003/05/15 15:50:19 petere Exp $ * $Id: pg_proc.h,v 1.301 2003/05/23 22:33:22 tgl Exp $
* *
* NOTES * NOTES
* The script catalog/genbki.sh reads this file and generates .bki * The script catalog/genbki.sh reads this file and generates .bki
...@@ -2134,9 +2134,9 @@ DESCR("left-pad string to length"); ...@@ -2134,9 +2134,9 @@ DESCR("left-pad string to length");
DATA(insert OID = 874 ( rpad PGNSP PGUID 12 f f t f i 3 25 "25 23 25" rpad - _null_ )); DATA(insert OID = 874 ( rpad PGNSP PGUID 12 f f t f i 3 25 "25 23 25" rpad - _null_ ));
DESCR("right-pad string to length"); DESCR("right-pad string to length");
DATA(insert OID = 875 ( ltrim PGNSP PGUID 12 f f t f i 2 25 "25 25" ltrim - _null_ )); DATA(insert OID = 875 ( ltrim PGNSP PGUID 12 f f t f i 2 25 "25 25" ltrim - _null_ ));
DESCR("left-pad string to length"); DESCR("trim selected characters from left end of string");
DATA(insert OID = 876 ( rtrim PGNSP PGUID 12 f f t f i 2 25 "25 25" rtrim - _null_ )); DATA(insert OID = 876 ( rtrim PGNSP PGUID 12 f f t f i 2 25 "25 25" rtrim - _null_ ));
DESCR("right-pad string to length"); DESCR("trim selected characters from right end of string");
DATA(insert OID = 877 ( substr PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ )); DATA(insert OID = 877 ( substr PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ ));
DESCR("return portion of string"); DESCR("return portion of string");
DATA(insert OID = 878 ( translate PGNSP PGUID 12 f f t f i 3 25 "25 25 25" translate - _null_ )); DATA(insert OID = 878 ( translate PGNSP PGUID 12 f f t f i 3 25 "25 25 25" translate - _null_ ));
...@@ -2145,16 +2145,16 @@ DATA(insert OID = 879 ( lpad PGNSP PGUID 14 f f t f i 2 25 "25 23" "select ...@@ -2145,16 +2145,16 @@ DATA(insert OID = 879 ( lpad PGNSP PGUID 14 f f t f i 2 25 "25 23" "select
DESCR("left-pad string to length"); DESCR("left-pad string to length");
DATA(insert OID = 880 ( rpad PGNSP PGUID 14 f f t f i 2 25 "25 23" "select rpad($1, $2, \' \')" - _null_ )); DATA(insert OID = 880 ( rpad PGNSP PGUID 14 f f t f i 2 25 "25 23" "select rpad($1, $2, \' \')" - _null_ ));
DESCR("right-pad string to length"); DESCR("right-pad string to length");
DATA(insert OID = 881 ( ltrim PGNSP PGUID 14 f f t f i 1 25 "25" "select ltrim($1, \' \')" - _null_ )); DATA(insert OID = 881 ( ltrim PGNSP PGUID 12 f f t f i 1 25 "25" ltrim1 - _null_ ));
DESCR("remove initial characters from string"); DESCR("trim spaces from left end of string");
DATA(insert OID = 882 ( rtrim PGNSP PGUID 14 f f t f i 1 25 "25" "select rtrim($1, \' \')" - _null_ )); DATA(insert OID = 882 ( rtrim PGNSP PGUID 12 f f t f i 1 25 "25" rtrim1 - _null_ ));
DESCR("remove trailing characters from string"); DESCR("trim spaces from right end of string");
DATA(insert OID = 883 ( substr PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ )); DATA(insert OID = 883 ( substr PGNSP PGUID 12 f f t f i 2 25 "25 23" text_substr_no_len - _null_ ));
DESCR("return portion of string"); DESCR("return portion of string");
DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ )); DATA(insert OID = 884 ( btrim PGNSP PGUID 12 f f t f i 2 25 "25 25" btrim - _null_ ));
DESCR("trim both ends of string"); DESCR("trim selected characters from both ends of string");
DATA(insert OID = 885 ( btrim PGNSP PGUID 14 f f t f i 1 25 "25" "select btrim($1, \' \')" - _null_ )); DATA(insert OID = 885 ( btrim PGNSP PGUID 12 f f t f i 1 25 "25" btrim1 - _null_ ));
DESCR("trim both ends of string"); DESCR("trim spaces from both ends of string");
DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ )); DATA(insert OID = 936 ( substring PGNSP PGUID 12 f f t f i 3 25 "25 23 23" text_substr - _null_ ));
DESCR("return portion of string"); DESCR("return portion of string");
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: builtins.h,v 1.217 2003/05/15 15:50:20 petere Exp $ * $Id: builtins.h,v 1.218 2003/05/23 22:33:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -608,9 +608,12 @@ extern Datum initcap(PG_FUNCTION_ARGS); ...@@ -608,9 +608,12 @@ extern Datum initcap(PG_FUNCTION_ARGS);
extern Datum lpad(PG_FUNCTION_ARGS); extern Datum lpad(PG_FUNCTION_ARGS);
extern Datum rpad(PG_FUNCTION_ARGS); extern Datum rpad(PG_FUNCTION_ARGS);
extern Datum btrim(PG_FUNCTION_ARGS); extern Datum btrim(PG_FUNCTION_ARGS);
extern Datum btrim1(PG_FUNCTION_ARGS);
extern Datum byteatrim(PG_FUNCTION_ARGS); extern Datum byteatrim(PG_FUNCTION_ARGS);
extern Datum ltrim(PG_FUNCTION_ARGS); extern Datum ltrim(PG_FUNCTION_ARGS);
extern Datum ltrim1(PG_FUNCTION_ARGS);
extern Datum rtrim(PG_FUNCTION_ARGS); extern Datum rtrim(PG_FUNCTION_ARGS);
extern Datum rtrim1(PG_FUNCTION_ARGS);
extern Datum translate(PG_FUNCTION_ARGS); extern Datum translate(PG_FUNCTION_ARGS);
extern Datum chr(PG_FUNCTION_ARGS); extern Datum chr(PG_FUNCTION_ARGS);
extern Datum repeat(PG_FUNCTION_ARGS); extern Datum repeat(PG_FUNCTION_ARGS);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment