Commit 8d8bcda2 authored by Bruce Momjian's avatar Bruce Momjian

Hello!

   Attached to the mail is locale-patch.tar.gz. In the archive
   there are:

file README.locale
   short description

directory src/test/locale
   test suite; currently only koi8-r tests, but the suite can be
   easily extended

file locale.patch
   the very patch; to apply: patch < locale.patch; should be applied
   to postgres-6.3.2 (at least I created it with 6.3.2 without any
additional
   patches)

   Files touched by the patch:  src/include/utils/builtins.h
src/backend/utils/adt/char.c src/backend/utils/adt/varchar.c
src/backend/utils/adt/varlena.c

Oleg
parent 67a63691
1998 May 25
I extended locale support. Now ORDER BY (if PostgreSQL configured with
--enable-locale) uses strcoll() for all text fields: char(n), varchar(n),
text. (I am not sure about ORDER BY char2/char4/etc.)
I included test suite .../src/test/locale. I didn't include this in
the regression test because not so much people require locale support. Read
.../src/test/locale/README for details on the test suite.
Many thanks to Oleg Bartunov (oleg@sai.msu.su) and Thomas G. Lockhart
(lockhart@alumni.caltech.edu) for hints, tips, help and discussion.
Oleg.
----
Oleg Broytmann http://members.tripod.com/~phd2/ phd2@earthling.net
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.33 1998/06/15 19:29:38 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.34 1998/06/16 06:41:50 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -475,7 +475,7 @@ bpcharlt(char *arg1, char *arg2)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if (cmp == 0)
return (len1 < len2);
else
......@@ -494,7 +494,7 @@ bpcharle(char *arg1, char *arg2)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if (0 == cmp)
return (bool) (len1 <= len2 ? 1 : 0);
else
......@@ -513,7 +513,7 @@ bpchargt(char *arg1, char *arg2)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if (cmp == 0)
return (len1 > len2);
else
......@@ -532,7 +532,7 @@ bpcharge(char *arg1, char *arg2)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if (0 == cmp)
return (bool) (len1 >= len2 ? 1 : 0);
else
......@@ -549,7 +549,7 @@ bpcharcmp(char *arg1, char *arg2)
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if ((0 == cmp) && (len1 != len2))
return (int32) (len1 < len2 ? -1 : 1);
else
......@@ -641,7 +641,7 @@ varcharlt(char *arg1, char *arg2)
len1 = VARSIZE(arg1) - VARHDRSZ;
len2 = VARSIZE(arg2) - VARHDRSZ;
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if (cmp == 0)
return (len1 < len2);
else
......@@ -660,7 +660,7 @@ varcharle(char *arg1, char *arg2)
len1 = VARSIZE(arg1) - VARHDRSZ;
len2 = VARSIZE(arg2) - VARHDRSZ;
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if (0 == cmp)
return (bool) (len1 <= len2 ? 1 : 0);
else
......@@ -679,7 +679,7 @@ varchargt(char *arg1, char *arg2)
len1 = VARSIZE(arg1) - VARHDRSZ;
len2 = VARSIZE(arg2) - VARHDRSZ;
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if (cmp == 0)
return (len1 > len2);
else
......@@ -698,7 +698,7 @@ varcharge(char *arg1, char *arg2)
len1 = VARSIZE(arg1) - VARHDRSZ;
len2 = VARSIZE(arg2) - VARHDRSZ;
cmp = strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if (0 == cmp)
return (bool) (len1 >= len2 ? 1 : 0);
else
......@@ -715,7 +715,7 @@ varcharcmp(char *arg1, char *arg2)
len1 = VARSIZE(arg1) - VARHDRSZ;
len2 = VARSIZE(arg2) - VARHDRSZ;
cmp = (strncmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2)));
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
if ((0 == cmp) && (len1 != len2))
return (int32) (len1 < len2 ? -1 : 1);
else
......
......@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.36 1998/06/15 19:29:38 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.37 1998/06/16 06:41:51 momjian Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -468,116 +468,87 @@ textne(text *arg1, text *arg2)
return ((bool) !texteq(arg1, arg2));
}
/* text_lt()
* Comparison function for text strings.
/* varstr_cmp()
* Comparison function for text strings with given lengths.
* Includes locale support, but must copy strings to temporary memory
* to allow null-termination for inputs to strcoll().
* XXX HACK code for textlen() indicates that there can be embedded nulls
* but it appears that most routines (incl. this one) assume not! - tgl 97/04/07
* Returns -1, 0 or 1
*/
bool
text_lt(text *arg1, text *arg2)
int
varstr_cmp(unsigned char *arg1, int len1, unsigned char *arg2, int len2)
{
bool result;
#ifdef USE_LOCALE
int cval;
#endif
int len;
unsigned char *a1p,
*a2p;
if (arg1 == NULL || arg2 == NULL)
return ((bool) FALSE);
len = (((VARSIZE(arg1) <= VARSIZE(arg2)) ? VARSIZE(arg1) : VARSIZE(arg2)) - VARHDRSZ);
int result;
unsigned char *a1p, *a2p;
#ifdef USE_LOCALE
a1p = (unsigned char *) palloc(len + 1);
a2p = (unsigned char *) palloc(len + 1);
a1p = (unsigned char *) palloc(len1 + 1);
a2p = (unsigned char *) palloc(len2 + 1);
memcpy(a1p, VARDATA(arg1), len);
*(a1p + len) = '\0';
memcpy(a2p, VARDATA(arg2), len);
*(a2p + len) = '\0';
memcpy(a1p, arg1, len1);
*(a1p + len1) = '\0';
memcpy(a2p, arg2, len2);
*(a2p + len2) = '\0';
cval = strcoll(a1p, a2p);
result = ((cval < 0) || ((cval == 0) && (VARSIZE(arg1) < VARSIZE(arg2))));
result = strcoll(a1p, a2p);
pfree(a1p);
pfree(a2p);
#else
a1p = (unsigned char *) VARDATA(arg1);
a2p = (unsigned char *) VARDATA(arg2);
while (len != 0 && *a1p == *a2p)
{
a1p++;
a2p++;
len--;
};
a1p = arg1;
a2p = arg2;
result = (len ? (*a1p < *a2p) : (VARSIZE(arg1) < VARSIZE(arg2)));
result = strncmp(a1p, a2p, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
#endif
return (result);
} /* text_lt() */
} /* varstr_cmp() */
/* text_le()
/* text_cmp()
* Comparison function for text strings.
* Includes locale support, but must copy strings to temporary memory
* to allow null-termination for inputs to strcoll().
* XXX HACK code for textlen() indicates that there can be embedded nulls
* but it appears that most routines (incl. this one) assume not! - tgl 97/04/07
* Returns -1, 0 or 1
*/
bool
text_le(text *arg1, text *arg2)
int
text_cmp(text *arg1, text *arg2)
{
bool result;
#ifdef USE_LOCALE
int cval;
#endif
int len;
unsigned char *a1p,
*a2p;
unsigned char *a1p, *a2p;
int len1, len2;
if (arg1 == NULL || arg2 == NULL)
return ((bool) 0);
len = (((VARSIZE(arg1) <= VARSIZE(arg2)) ? VARSIZE(arg1) : VARSIZE(arg2)) - VARHDRSZ);
#ifdef USE_LOCALE
a1p = (unsigned char *) palloc(len + 1);
a2p = (unsigned char *) palloc(len + 1);
memcpy(a1p, VARDATA(arg1), len);
*(a1p + len) = '\0';
memcpy(a2p, VARDATA(arg2), len);
*(a2p + len) = '\0';
cval = strcoll(a1p, a2p);
result = ((cval < 0) || ((cval == 0) && (VARSIZE(arg1) <= VARSIZE(arg2))));
return ((bool) FALSE);
pfree(a1p);
pfree(a2p);
#else
a1p = (unsigned char *) VARDATA(arg1);
a2p = (unsigned char *) VARDATA(arg2);
while (len != 0 && *a1p == *a2p)
{
a1p++;
a2p++;
len--;
};
len1 = VARSIZE(arg1) - VARHDRSZ;
len2 = VARSIZE(arg2) - VARHDRSZ;
result = (len ? (*a1p <= *a2p) : (VARSIZE(arg1) <= VARSIZE(arg2)));
#endif
return varstr_cmp(a1p, len1, a2p, len2);
} /* text_cmp() */
return (result);
/* text_lt()
* Comparison function for text strings.
*/
bool
text_lt(text *arg1, text *arg2)
{
return (bool)(text_cmp(arg1, arg2) < 0);
} /* text_lt() */
/* text_le()
* Comparison function for text strings.
*/
bool
text_le(text *arg1, text *arg2)
{
return (bool)(text_cmp(arg1, arg2) <= 0);
} /* text_le() */
bool
......
......@@ -6,7 +6,7 @@
*
* Copyright (c) 1994, Regents of the University of California
*
* $Id: builtins.h,v 1.43 1998/05/29 13:37:29 thomas Exp $
* $Id: builtins.h,v 1.44 1998/06/16 06:41:51 momjian Exp $
*
* NOTES
* This should normally only be included by fmgr.h.
......@@ -428,6 +428,8 @@ extern char *textout(text *vlena);
extern text *textcat(text *arg1, text *arg2);
extern bool texteq(text *arg1, text *arg2);
extern bool textne(text *arg1, text *arg2);
extern int varstr_cmp(unsigned char *arg1, int len1, unsigned char *arg2, int len2);
extern int text_cmp(text *arg1, text *arg2);
extern bool text_lt(text *arg1, text *arg2);
extern bool text_le(text *arg1, text *arg2);
extern bool text_gt(text *arg1, text *arg2);
......
#
# Makefile for example programs
#
SRCDIR= ../..
include ../../Makefile.global
#
# And where libpq goes, so goes the authentication stuff...
#
ifdef KRBVERS
LDFLAGS+= $(KRBLIBS)
CFLAGS+= $(KRBFLAGS)
endif
PROGS = test-pgsql-locale test-ctype
DIRS = koi8-r
all: $(PROGS)
$(PROGS): % : %.c
$(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS)
clean:
rm -f $(PROGS) *.out
for d in $(DIRS); do \
cd $$d; \
$(MAKE) clean; \
cd ..; \
done
test-%: all
@cd `echo $@ | sed s/^test-//` && $(MAKE) test
1998 May 25
This directory contains a set of tests for locales. I provided one C
program test-ctype.c to test CTYPE support in libc and installed
localedata. Then there are test-sort.pl and test-sort.py that tests
collating. (Also there is test-sort.py but it is commented out in scripts;
uncomment it if you have Python interpreter installed).
To run a test for some locale (koi8, e.g) run
make all test-$locale
(for example)
make all test-koi8
Currently, only tests for koi8 locale (russian cyrillic for UN*X)
provided in koi8 directory. Script `runall' calls test-pgsql-locale to test
whether locale support had been compiled into PotgreSQL, test-ctype to test
libc and localedata, test-sort.pl (uncomment test-sort.py, if you have
Python interpreter installed) and does tests on PostgreSQL with
test-koi8*.sql.in.
To add locale tests one need to create directory $locale and create
Makefile (and other files) similar to koi8-r/*. Actually, the simplest (I
think) method is just copy koi8-r directory and edit/replace files.
Oleg.
----
Oleg Broytmann http://members.tripod.com/~phd2/ phd2@earthling.net
#
# Makefile for example programs
#
all:
-@echo "make: Nothing to be done for \`all'."
test:
@./runall
clean:
rm -f *.out
This diff is collapsed.
abbrev|name_en |name_ru
------+--------------------+------------------------------------------------------------
ID |Idaho |
IA |Iowa |
AL |Alabama |
AK |Alaska |
AZ |Arizona |
AR |Arkansas |
WY |Wyoming |
WA |Washington |
VT |Vermont |
VA |Virginia |
WI |Wisconsin |
DE |Delaware |
GA |Georgia |
WV |West Virginia |
IL |Illinois |
IN |Indiana |
CA |California |
KA |Kansas |
KY |Kentucky |
CO |Colorado |
CT |Connecticut |
LA |Louisiana |
MA |Massachusetts |
MN |Minnesota |
MS |Mississippi |
MO |Missouri |
MI |Michigan |
MT |Montana |
ME |Maine |
MD |Maryland |
NE |Nebraska |
NV |Nevada |
NH |New Hampshire |-
NJ |New Jersey |-
NY |New York |-
NM |New Mexico |-
OH |Ohio |
OK |Oklahoma |
DC |Washington DC | ( )
OR |Oregon |
PA |Pennsylvania |
RI |Rhode Island |-
ND |North Dakota |
NC |North Carolina |
TN |Tennessee |
TX |Texas |
FL |Florida |
SD |South Dakota |
SC |South Carolina |
UT |Utah |
(50 rows)
EOF
abbrev|name_en |name_ru
------+--------------------+----------------------------------------------
OH |Ohio |
OK |Oklahoma |
DC |Washington DC | ( )
OR |Oregon |
(4 rows)
EOF
abbrev|name_en |name_ru
------+--------------------+----------------------------------------------
ID |Idaho |
IA |Iowa |
AL |Alabama |
AK |Alaska |
AZ |Arizona |
AR |Arkansas |
WY |Wyoming |
WA |Washington |
VT |Vermont |
VA |Virginia |
WI |Wisconsin |
DE |Delaware |
GA |Georgia |
WV |West Virginia |
IL |Illinois |
IN |Indiana |
CA |California |
KA |Kansas |
KY |Kentucky |
CO |Colorado |
CT |Connecticut |
LA |Louisiana |
MA |Massachusetts |
MN |Minnesota |
MS |Mississippi |
MO |Missouri |
MI |Michigan |
MT |Montana |
ME |Maine |
MD |Maryland |
NE |Nebraska |
NV |Nevada |
NH |New Hampshire |-
NJ |New Jersey |-
NY |New York |-
NM |New Mexico |-
OH |Ohio |
OK |Oklahoma |
DC |Washington DC | ( )
OR |Oregon |
PA |Pennsylvania |
RI |Rhode Island |-
ND |North Dakota |
NC |North Carolina |
TN |Tennessee |
TX |Texas |
FL |Florida |
SD |South Dakota |
SC |South Carolina |
UT |Utah |
(50 rows)
EOF
abbrev|name_en |name_ru
------+--------------------+----------------------------------------------
ID |Idaho |
IA |Iowa |
AL |Alabama |
AK |Alaska |
AZ |Arizona |
AR |Arkansas |
WY |Wyoming |
WA |Washington |
VT |Vermont |
VA |Virginia |
WI |Wisconsin |
DE |Delaware |
GA |Georgia |
WV |West Virginia |
IL |Illinois |
IN |Indiana |
CA |California |
KA |Kansas |
KY |Kentucky |
CO |Colorado |
CT |Connecticut |
LA |Louisiana |
MA |Massachusetts |
MN |Minnesota |
MS |Mississippi |
MO |Missouri |
MI |Michigan |
MT |Montana |
ME |Maine |
MD |Maryland |
NE |Nebraska |
NV |Nevada |
NH |New Hampshire |-
NJ |New Jersey |-
NY |New York |-
NM |New Mexico |-
OH |Ohio |
OK |Oklahoma |
DC |Washington DC | ( )
OR |Oregon |
PA |Pennsylvania |
RI |Rhode Island |-
ND |North Dakota |
NC |North Carolina |
TN |Tennessee |
TX |Texas |
FL |Florida |
SD |South Dakota |
SC |South Carolina |
UT |Utah |
(50 rows)
EOF
#! /bin/sh
PATH=..:$PATH
echo "Testing PostgreSQL compilation..."
if ! test-pgsql-locale; then
exit 1
fi
LC_CTYPE=ru_RU.KOI8-R
LC_COLLATE=$LC_CTYPE
export LC_CTYPE LC_COLLATE
echo "Testing LC_CTYPE..."
if ! test-ctype > koi8-ctype.out; then
exit 1
fi
diff expected/koi8-ctype.out koi8-ctype.out
echo "Testing LC_COLLATE..."
perl ../sort-test.pl test-koi8-sort.in > test-koi8-sort.out
diff expected/test-koi8-sort.out test-koi8-sort.out
### If you have Python - uncomment the following two lines
#python ../sort-test.py test-koi8-sort.in > test-koi8-sort.out
#diff expected/test-koi8-sort.out test-koi8-sort.out
abort() {
[ "$1" ] && echo "$*"
exit 1
}
for f in char varchar text; do
if echo $f | grep -q char; then
ftype="$f(60)"
else
ftype="$f"
fi
echo "Testing PgSQL: sort on $ftype type..."
destroydb testlocale >/dev/null 2>&1
createdb testlocale || abort "createdb failed"
psql -d testlocale -c "CREATE TABLE usastates (abbrev char2, name_en char(20), name_ru $ftype);" >/dev/null 2>&1 || abort "createtable failed"
psql testlocale < test-koi8.sql.in > test-koi8-$f.sql.out 2>/dev/null || abort "test query failed"
diff expected/test-koi8-$f.sql.out test-koi8-$f.sql.out
done
echo "Testing PgSQL: select on regexp..."
psql testlocale < test-koi8-select.sql.in > test-koi8-select.sql.out 2>/dev/null || abort "select query failed"
diff expected/test-koi8-select.sql.out test-koi8-select.sql.out
destroydb testlocale || abort "destroydb failed"
echo "Finished."
SELECT * FROM usastates WHERE name_ru ~* '^.*' ORDER BY name_ru;
Vesta
vesta
Bording
hoarding
COPY usastates FROM stdin USING DELIMITERS '|';
AK|Alaska |
WA|Washington |
OR|Oregon |
CA|California |
NV|Nevada |
ID|Idaho |
UT|Utah |
AZ|Arizona |
MT|Montana |
WY|Wyoming |
CO|Colorado |
NM|New Mexico |-
ND|North Dakota |
SD|South Dakota |
NE|Nebraska |
KA|Kansas |
OK|Oklahoma |
TX|Texas |
MN|Minnesota |
IA|Iowa |
MO|Missouri |
AR|Arkansas |
LA|Louisiana |
WI|Wisconsin |
IL|Illinois |
IN|Indiana |
MS|Mississippi |
AL|Alabama |
MI|Michigan |
OH|Ohio |
KY|Kentucky |
TN|Tennessee |
GA|Georgia |
FL|Florida |
PA|Pennsylvania |
WV|West Virginia |
VA|Virginia |
NC|North Carolina|
SC|South Carolina|
NY|New York |-
NJ|New Jersey |-
DE|Delaware |
MD|Maryland |
DC|Washington DC | ( )
VT|Vermont |
MA|Massachusetts |
CT|Connecticut |
ME|Maine |
NH|New Hampshire |-
RI|Rhode Island |-
\.
SELECT * FROM usastates ORDER BY name_ru;
#!/usr/local/bin/perl -w
use locale;
open(INFILE, "<$ARGV[0]");
chop(my(@words) = <INFILE>);
close(INFILE);
$"="\n";
my(@result) = sort @words;
print "@result\n";
#! /usr/local/bin/python
import sys, string, locale
locale.setlocale(locale.LC_ALL, "")
if len(sys.argv) <> 2:
sys.stderr.write("Usage: sort.py filename\n")
sys.exit(1)
infile = open(sys.argv[1], 'r')
list = infile.readlines()
infile.close()
for i in range(0, len(list)):
list[i] = list[i][:-1] # chop!
list.sort(locale.strcoll)
print string.join(list, '\n')
/*
test-ctype.c
Written by Oleg BroytMann, phd2@earthling.net
with help from Oleg Bartunov, oleg@sai.msu.su
Copyright (C) 1998 PhiloSoft Design
This is copyrighted but free software. You can use it, modify and distribute
in original or modified form providing that the author's names and the above
copyright notice will remain.
Disclaimer, legal notice and absence of warranty.
This software provided "as is" without any kind of warranty. In no event
the author shall be liable for any damage, etc.
*/
#include <stdio.h>
#include <locale.h>
#include <ctype.h>
char * flag(int b);
void describe_char(int c);
#undef LONG_FLAG
char * flag(int b)
{
#ifdef LONG_FLAG
return b ? "yes" : "no";
#else
return b ? "+" : " ";
#endif
}
void describe_char(int c)
{
char cp = c, up = toupper(c), lo = tolower(c);
if (!isprint(cp)) cp = ' ';
if (!isprint(up)) up = ' ';
if (!isprint(lo)) lo = ' ';
printf("chr#%-4d%2c%6s%6s%6s%6s%6s%6s%6s%6s%6s%6s%6s%4c%4c\n", c, cp, flag(isalnum(c)), flag(isalpha(c)), flag(iscntrl(c)), flag(isdigit(c)), flag(islower(c)), flag(isgraph(c)), flag(isprint(c)), flag(ispunct(c)), flag(isspace(c)), flag(isupper(c)), flag(isxdigit(c)), lo, up);
}
int main() {
short c;
char * cur_locale;
cur_locale = setlocale(LC_ALL, "");
if (cur_locale) {
fprintf(stderr, "Successfulle set locale to %s\n", cur_locale);
} else {
fprintf(stderr, "Cannot setup locale. Either your libc does not provide\nlocale support, or your locale data is corrupt, or you have not set\nLANG or LC_CTYPE environment variable to proper value. Program aborted.\n");
return 1;
}
printf("char# char alnum alpha cntrl digit lower graph print punct space upper xdigit lo up\n");
for (c = 0; c <= 255; c++)
describe_char(c);
return 0;
}
#include <stdio.h>
#include "postgres.h"
int main() {
#ifdef USE_LOCALE
printf("PostgreSQL compiled with locale support\n");
return 0;
#else
printf("PostgreSQL compiled without locale support\n");
return 1;
#endif
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment