Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
f2a01b0d
Commit
f2a01b0d
authored
Jan 15, 2007
by
Teodor Sigaev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix localization support for multibyte encoding and C locale.
Slightly reworked patch from Tatsuo Ishii
parent
7021d6f6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
144 additions
and
52 deletions
+144
-52
contrib/tsearch2/ts_locale.c
contrib/tsearch2/ts_locale.c
+32
-15
contrib/tsearch2/ts_locale.h
contrib/tsearch2/ts_locale.h
+8
-7
contrib/tsearch2/wordparser/parser.c
contrib/tsearch2/wordparser/parser.c
+104
-30
No files found.
contrib/tsearch2/ts_locale.c
View file @
f2a01b0d
...
...
@@ -12,13 +12,13 @@
size_t
wchar2char
(
char
*
to
,
const
wchar_t
*
from
,
size_t
len
)
{
if
(
len
==
0
)
return
0
;
if
(
GetDatabaseEncoding
()
==
PG_UTF8
)
{
int
r
;
if
(
len
==
0
)
return
0
;
r
=
WideCharToMultiByte
(
CP_UTF8
,
0
,
from
,
-
1
,
to
,
len
,
NULL
,
NULL
);
...
...
@@ -34,17 +34,19 @@ wchar2char(char *to, const wchar_t *from, size_t len)
return
wcstombs
(
to
,
from
,
len
);
}
#endif
/* WIN32 */
size_t
char2wchar
(
wchar_t
*
to
,
const
char
*
from
,
size_t
len
)
{
if
(
len
==
0
)
return
0
;
#ifdef WIN32
if
(
GetDatabaseEncoding
()
==
PG_UTF8
)
{
int
r
;
if
(
len
==
0
)
return
0
;
r
=
MultiByteToWideChar
(
CP_UTF8
,
0
,
from
,
len
,
to
,
len
);
if
(
!
r
)
...
...
@@ -60,29 +62,44 @@ char2wchar(wchar_t *to, const char *from, size_t len)
return
r
;
}
else
#endif
/* WIN32 */
if
(
lc_ctype_is_c
()
)
{
/*
* pg_mb2wchar_with_len always adds trailing '\0', so
* 'to' should be allocated with sufficient space
*/
return
pg_mb2wchar_with_len
(
from
,
(
pg_wchar
*
)
to
,
len
);
}
return
mbstowcs
(
to
,
from
,
len
);
}
#endif
/* WIN32 */
int
_t_isalpha
(
const
char
*
ptr
)
{
wchar_t
character
;
wchar_t
character
[
2
];
if
(
lc_ctype_is_c
())
return
isalpha
(
TOUCHAR
(
ptr
));
char2wchar
(
&
character
,
ptr
,
1
);
char2wchar
(
character
,
ptr
,
1
);
return
iswalpha
((
wint_t
)
character
);
return
iswalpha
((
wint_t
)
*
character
);
}
int
_t_isprint
(
const
char
*
ptr
)
{
wchar_t
character
;
wchar_t
character
[
2
];
if
(
lc_ctype_is_c
())
return
isprint
(
TOUCHAR
(
ptr
));
char2wchar
(
&
character
,
ptr
,
1
);
char2wchar
(
character
,
ptr
,
1
);
return
iswprint
((
wint_t
)
character
);
return
iswprint
((
wint_t
)
*
character
);
}
#endif
/* TS_USE_WIDE */
...
...
@@ -126,7 +143,7 @@ lowerstr(char *str)
if
(
wlen
<
0
)
ereport
(
ERROR
,
(
errcode
(
ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
errmsg
(
"trans
a
lation failed from server encoding to wchar_t"
)));
errmsg
(
"translation failed from server encoding to wchar_t"
)));
Assert
(
wlen
<=
len
);
wstr
[
wlen
]
=
0
;
...
...
@@ -152,7 +169,7 @@ lowerstr(char *str)
if
(
wlen
<
0
)
ereport
(
ERROR
,
(
errcode
(
ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
errmsg
(
"trans
a
lation failed from wchar_t to server encoding %d"
,
errno
)));
errmsg
(
"translation failed from wchar_t to server encoding %d"
,
errno
)));
Assert
(
wlen
<=
len
);
out
[
wlen
]
=
'\0'
;
}
...
...
contrib/tsearch2/ts_locale.h
View file @
f2a01b0d
...
...
@@ -30,16 +30,17 @@
#define TOUCHAR(x) (*((unsigned char*)(x)))
#ifdef TS_USE_WIDE
size_t
char2wchar
(
wchar_t
*
to
,
const
char
*
from
,
size_t
len
);
#ifdef WIN32
size_t
wchar2char
(
char
*
to
,
const
wchar_t
*
from
,
size_t
len
);
size_t
char2wchar
(
wchar_t
*
to
,
const
char
*
from
,
size_t
len
);
#else
/* WIN32 */
/* correct mbstowcs */
#define char2wchar mbstowcs
/* correct wcstombs */
#define wchar2char wcstombs
#endif
/* WIN32 */
#define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
...
...
@@ -55,10 +56,10 @@ extern int _t_isprint(const char *ptr);
*/
#define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false )
#define COPYCHAR(d,s) do { \
int lll = pg_mblen( s ); \
\
while( lll-- ) \
#define COPYCHAR(d,s) do {
\
int lll = pg_mblen( s );
\
\
while( lll-- )
\
TOUCHAR((d)+lll) = TOUCHAR((s)+lll); \
} while(0)
...
...
contrib/tsearch2/wordparser/parser.c
View file @
f2a01b0d
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.1
1 2006/10/04 00:29:47 momjian
Exp $ */
/* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.1
2 2007/01/15 15:16:28 teodor
Exp $ */
#include "postgres.h"
...
...
@@ -40,16 +40,13 @@ TParserInit(char *str, int len)
#ifdef TS_USE_WIDE
/*
* Use wide char code only when max encoding length > 1 and ctype != C.
* Some operating systems fail with multi-byte encodings and a C locale.
* Also, for a C locale there is no need to process as multibyte. From
* backend/utils/adt/oracle_compat.c Teodor
* Use wide char code only when max encoding length > 1.
*/
if
(
prs
->
charmaxlen
>
1
&&
!
lc_ctype_is_c
()
)
if
(
prs
->
charmaxlen
>
1
)
{
prs
->
usewide
=
true
;
prs
->
wstr
=
(
wchar_t
*
)
palloc
(
sizeof
(
wchar_t
)
*
prs
->
lenstr
);
prs
->
wstr
=
(
wchar_t
*
)
palloc
(
sizeof
(
wchar_t
)
*
(
prs
->
lenstr
+
1
)
);
prs
->
lenwstr
=
char2wchar
(
prs
->
wstr
,
prs
->
str
,
prs
->
lenstr
);
}
else
...
...
@@ -83,25 +80,99 @@ TParserClose(TParser * prs)
/*
* defining support function, equvalent is* macroses, but
* working with any possible encodings and locales
* working with any possible encodings and locales. Note,
* that with multibyte encoding and C-locale isw* function may fail
* or give wrong result. Note 2: multibyte encoding and C-locale
* often are used for Asian languages.
*/
#ifdef TS_USE_WIDE
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \
is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
if ( prs->usewide ) \
{ \
if ( lc_ctype_is_c() ) \
return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \
\
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
} \
\
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
}
static
int
p_isalnum
(
TParser
*
prs
)
{
Assert
(
prs
->
state
);
if
(
prs
->
usewide
)
{
if
(
lc_ctype_is_c
())
{
unsigned
int
c
=
*
(
unsigned
int
*
)(
prs
->
wstr
+
prs
->
state
->
poschar
);
/*
* any non-ascii symbol with multibyte encoding
* with C-locale is an alpha character
*/
if
(
c
>
0x7f
)
return
1
;
return
isalnum
(
0xff
&
c
);
}
return
iswalnum
(
(
wint_t
)
*
(
prs
->
wstr
+
prs
->
state
->
poschar
));
}
return
isalnum
(
*
(
unsigned
char
*
)(
prs
->
str
+
prs
->
state
->
posbyte
));
}
static
int
p_isnotalnum
(
TParser
*
prs
)
{
return
!
p_isalnum
(
prs
);
}
static
int
p_isalpha
(
TParser
*
prs
)
{
Assert
(
prs
->
state
);
if
(
prs
->
usewide
)
{
if
(
lc_ctype_is_c
())
{
unsigned
int
c
=
*
(
prs
->
wstr
+
prs
->
state
->
poschar
);
/*
* any non-ascii symbol with multibyte encoding
* with C-locale is an alpha character
*/
if
(
c
>
0x7f
)
return
1
;
return
isalpha
(
0xff
&
c
);
}
return
iswalpha
(
(
wint_t
)
*
(
prs
->
wstr
+
prs
->
state
->
poschar
));
}
return
isalpha
(
*
(
unsigned
char
*
)(
prs
->
str
+
prs
->
state
->
posbyte
));
}
static
int
p_isnotalpha
(
TParser
*
prs
)
{
return
!
p_isalpha
(
prs
);
}
/* p_iseq should be used only for ascii symbols */
...
...
@@ -111,18 +182,19 @@ p_iseq(TParser * prs, char c)
Assert
(
prs
->
state
);
return
((
prs
->
state
->
charlen
==
1
&&
*
(
prs
->
str
+
prs
->
state
->
posbyte
)
==
c
))
?
1
:
0
;
}
#else
/* TS_USE_WIDE */
#define p_iswhat(type) \
static int \
p_is##type(TParser *prs) { \
Assert( prs->state ); \
return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) );
\
} \
\
static int \
p_isnot##type(TParser *prs) { \
return !p_is##type(prs); \
#define p_iswhat(type)
\
static int
\
p_is##type(TParser *prs) {
\
Assert( prs->state );
\
return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \
}
\
\
static int
\
p_isnot##type(TParser *prs) {
\
return !p_is##type(prs);
\
}
...
...
@@ -132,10 +204,12 @@ p_iseq(TParser * prs, char c)
Assert
(
prs
->
state
);
return
(
*
(
prs
->
str
+
prs
->
state
->
posbyte
)
==
c
)
?
1
:
0
;
}
#endif
/* TS_USE_WIDE */
p_iswhat
(
alnum
)
p_iswhat
(
alpha
)
#endif
/* TS_USE_WIDE */
p_iswhat
(
digit
)
p_iswhat
(
lower
)
p_iswhat
(
print
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment