Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
59559458
Commit
59559458
authored
Jun 15, 2005
by
Bruce Momjian
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Support 3 and 4-byte unicode characters.
John Hansen
parent
f4c4f1ce
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
76 additions
and
40 deletions
+76
-40
src/backend/utils/mb/conv.c
src/backend/utils/mb/conv.c
+9
-2
src/backend/utils/mb/wchar.c
src/backend/utils/mb/wchar.c
+64
-37
src/include/mb/pg_wchar.h
src/include/mb/pg_wchar.h
+3
-1
No files found.
src/backend/utils/mb/conv.c
View file @
59559458
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.5
2 2005/03/07 04:30:52
momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.5
3 2005/06/15 00:15:08
momjian Exp $
*
*
*-------------------------------------------------------------------------
*-------------------------------------------------------------------------
*/
*/
...
@@ -361,12 +361,19 @@ UtfToLocal(unsigned char *utf, unsigned char *iso,
...
@@ -361,12 +361,19 @@ UtfToLocal(unsigned char *utf, unsigned char *iso,
iutf
=
*
utf
++
<<
8
;
iutf
=
*
utf
++
<<
8
;
iutf
|=
*
utf
++
;
iutf
|=
*
utf
++
;
}
}
else
else
if
(
l
==
3
)
{
{
iutf
=
*
utf
++
<<
16
;
iutf
=
*
utf
++
<<
16
;
iutf
|=
*
utf
++
<<
8
;
iutf
|=
*
utf
++
<<
8
;
iutf
|=
*
utf
++
;
iutf
|=
*
utf
++
;
}
}
else
if
(
l
==
4
)
{
iutf
=
*
utf
++
<<
24
;
iutf
|=
*
utf
++
<<
16
;
iutf
|=
*
utf
++
<<
8
;
iutf
|=
*
utf
++
;
}
p
=
bsearch
(
&
iutf
,
map
,
size
,
p
=
bsearch
(
&
iutf
,
map
,
size
,
sizeof
(
pg_utf_to_local
),
compare1
);
sizeof
(
pg_utf_to_local
),
compare1
);
if
(
p
==
NULL
)
if
(
p
==
NULL
)
...
...
src/backend/utils/mb/wchar.c
View file @
59559458
/*
/*
* conversion functions between pg_wchar and multibyte streams.
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
* Tatsuo Ishii
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.4
3 2005/03/14 18:31:20
momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.4
4 2005/06/15 00:15:08
momjian Exp $
*
*
* WIN1250 client encoding updated by Pavel Behal
* WIN1250 client encoding updated by Pavel Behal
*
*
...
@@ -406,8 +406,14 @@ pg_utf_mblen(const unsigned char *s)
...
@@ -406,8 +406,14 @@ pg_utf_mblen(const unsigned char *s)
len
=
1
;
len
=
1
;
else
if
((
*
s
&
0xe0
)
==
0xc0
)
else
if
((
*
s
&
0xe0
)
==
0xc0
)
len
=
2
;
len
=
2
;
else
if
((
*
s
&
0xe0
)
==
0xe0
)
else
if
((
*
s
&
0xf0
)
==
0xe0
)
len
=
3
;
len
=
3
;
else
if
((
*
s
&
0xf8
)
==
0xf0
)
len
=
4
;
else
if
((
*
s
&
0xfc
)
==
0xf8
)
len
=
5
;
else
if
((
*
s
&
0xfe
)
==
0xfc
)
len
=
6
;
return
(
len
);
return
(
len
);
}
}
...
@@ -721,7 +727,7 @@ pg_wchar_tbl pg_wchar_table[] = {
...
@@ -721,7 +727,7 @@ pg_wchar_tbl pg_wchar_table[] = {
{
pg_euckr2wchar_with_len
,
pg_euckr_mblen
,
pg_euckr_dsplen
,
3
},
/* 3; PG_EUC_KR */
{
pg_euckr2wchar_with_len
,
pg_euckr_mblen
,
pg_euckr_dsplen
,
3
},
/* 3; PG_EUC_KR */
{
pg_euctw2wchar_with_len
,
pg_euctw_mblen
,
pg_euctw_dsplen
,
3
},
/* 4; PG_EUC_TW */
{
pg_euctw2wchar_with_len
,
pg_euctw_mblen
,
pg_euctw_dsplen
,
3
},
/* 4; PG_EUC_TW */
{
pg_johab2wchar_with_len
,
pg_johab_mblen
,
pg_johab_dsplen
,
3
},
/* 5; PG_JOHAB */
{
pg_johab2wchar_with_len
,
pg_johab_mblen
,
pg_johab_dsplen
,
3
},
/* 5; PG_JOHAB */
{
pg_utf2wchar_with_len
,
pg_utf_mblen
,
pg_utf_dsplen
,
3
},
/* 6; PG_UTF8 */
{
pg_utf2wchar_with_len
,
pg_utf_mblen
,
pg_utf_dsplen
,
4
},
/* 6; PG_UTF8 */
{
pg_mule2wchar_with_len
,
pg_mule_mblen
,
pg_mule_dsplen
,
3
},
/* 7; PG_MULE_INTERNAL */
{
pg_mule2wchar_with_len
,
pg_mule_mblen
,
pg_mule_dsplen
,
3
},
/* 7; PG_MULE_INTERNAL */
{
pg_latin12wchar_with_len
,
pg_latin1_mblen
,
pg_latin1_dsplen
,
1
},
/* 8; PG_LATIN1 */
{
pg_latin12wchar_with_len
,
pg_latin1_mblen
,
pg_latin1_dsplen
,
1
},
/* 8; PG_LATIN1 */
{
pg_latin12wchar_with_len
,
pg_latin1_mblen
,
pg_latin1_dsplen
,
1
},
/* 9; PG_LATIN2 */
{
pg_latin12wchar_with_len
,
pg_latin1_mblen
,
pg_latin1_dsplen
,
1
},
/* 9; PG_LATIN2 */
...
@@ -800,6 +806,31 @@ pg_encoding_max_length(int encoding)
...
@@ -800,6 +806,31 @@ pg_encoding_max_length(int encoding)
#ifndef FRONTEND
#ifndef FRONTEND
bool
pg_utf8_islegal
(
const
unsigned
char
*
source
,
int
length
)
{
unsigned
char
a
;
const
unsigned
char
*
srcptr
=
source
+
length
;
switch
(
length
)
{
default:
return
false
;
/* Everything else falls through when "true"... */
case
4
:
if
((
a
=
(
*--
srcptr
))
<
0x80
||
a
>
0xBF
)
return
false
;
case
3
:
if
((
a
=
(
*--
srcptr
))
<
0x80
||
a
>
0xBF
)
return
false
;
case
2
:
if
((
a
=
(
*--
srcptr
))
>
0xBF
)
return
false
;
switch
(
*
source
)
{
/* no fall-through in this inner switch */
case
0xE0
:
if
(
a
<
0xA0
)
return
false
;
break
;
case
0xED
:
if
(
a
>
0x9F
)
return
false
;
break
;
case
0xF0
:
if
(
a
<
0x90
)
return
false
;
break
;
case
0xF4
:
if
(
a
>
0x8F
)
return
false
;
break
;
default:
if
(
a
<
0x80
)
return
false
;
}
case
1
:
if
(
*
source
>=
0x80
&&
*
source
<
0xC2
)
return
false
;
}
if
(
*
source
>
0xF4
)
return
false
;
return
true
;
}
/*
/*
* Verify mbstr to make sure that it has a valid character sequence.
* Verify mbstr to make sure that it has a valid character sequence.
* mbstr is not necessarily NULL terminated; length of mbstr is
* mbstr is not necessarily NULL terminated; length of mbstr is
...
@@ -823,51 +854,47 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
...
@@ -823,51 +854,47 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
while
(
len
>
0
&&
*
mbstr
)
while
(
len
>
0
&&
*
mbstr
)
{
{
/* special UTF8 check */
if
(
encoding
==
PG_UTF8
&&
(
*
mbstr
&
0xf8
)
==
0xf0
)
{
if
(
noError
)
return
false
;
ereport
(
ERROR
,
(
errcode
(
ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
errmsg
(
"Unicode characters greater than or equal to 0x10000 are not supported"
)));
}
l
=
pg_mblen
(
mbstr
);
l
=
pg_mblen
(
mbstr
);
for
(
i
=
1
;
i
<
l
;
i
++
)
/* special UTF-8 check */
{
if
(
encoding
==
PG_UTF8
)
{
/*
if
(
!
pg_utf8_islegal
(
mbstr
,
l
))
{
* we expect that every multibyte char consists of bytes
if
(
noError
)
return
false
;
* having the 8th bit set
ereport
(
ERROR
,(
errcode
(
ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
errmsg
(
"Invalid UNICODE byte sequence detected near byte %c"
,
*
mbstr
)));
*/
}
if
(
i
>=
len
||
(
mbstr
[
i
]
&
0x80
)
==
0
)
}
else
{
for
(
i
=
1
;
i
<
l
;
i
++
)
{
{
char
buf
[
8
*
2
+
1
];
/*
char
*
p
=
buf
;
* we expect that every multibyte char consists of bytes
int
j
,
* having the 8th bit set
*/
if
(
i
>=
len
||
(
mbstr
[
i
]
&
0x80
)
==
0
)
{
char
buf
[
8
*
2
+
1
];
char
*
p
=
buf
;
int
j
,
jlimit
;
jlimit
;
if
(
noError
)
if
(
noError
)
return
false
;
return
false
;
jlimit
=
Min
(
l
,
len
);
jlimit
=
Min
(
l
,
len
);
jlimit
=
Min
(
jlimit
,
8
);
/* prevent buffer overrun */
jlimit
=
Min
(
jlimit
,
8
);
/* prevent buffer overrun */
for
(
j
=
0
;
j
<
jlimit
;
j
++
)
for
(
j
=
0
;
j
<
jlimit
;
j
++
)
p
+=
sprintf
(
p
,
"%02x"
,
mbstr
[
j
]);
p
+=
sprintf
(
p
,
"%02x"
,
mbstr
[
j
]);
ereport
(
ERROR
,
ereport
(
ERROR
,
(
errcode
(
ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
(
errcode
(
ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
errmsg
(
"invalid byte sequence for encoding
\"
%s
\"
: 0x%s"
,
errmsg
(
"invalid byte sequence for encoding
\"
%s
\"
: 0x%s"
,
GetDatabaseEncodingName
(),
buf
)));
GetDatabaseEncodingName
(),
buf
)));
}
}
}
}
}
len
-=
l
;
len
-=
l
;
mbstr
+=
l
;
mbstr
+=
l
;
}
}
return
true
;
return
true
;
}
}
...
...
src/include/mb/pg_wchar.h
View file @
59559458
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.5
8 2005/03/14 18:31:24
momjian Exp $ */
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.5
9 2005/06/15 00:15:08
momjian Exp $ */
#ifndef PG_WCHAR_H
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
#define PG_WCHAR_H
...
@@ -340,4 +340,6 @@ extern void mic2latin(unsigned char *mic, unsigned char *p, int len, int lc);
...
@@ -340,4 +340,6 @@ extern void mic2latin(unsigned char *mic, unsigned char *p, int len, int lc);
extern
void
latin2mic_with_table
(
unsigned
char
*
l
,
unsigned
char
*
p
,
int
len
,
int
lc
,
unsigned
char
*
tab
);
extern
void
latin2mic_with_table
(
unsigned
char
*
l
,
unsigned
char
*
p
,
int
len
,
int
lc
,
unsigned
char
*
tab
);
extern
void
mic2latin_with_table
(
unsigned
char
*
mic
,
unsigned
char
*
p
,
int
len
,
int
lc
,
unsigned
char
*
tab
);
extern
void
mic2latin_with_table
(
unsigned
char
*
mic
,
unsigned
char
*
p
,
int
len
,
int
lc
,
unsigned
char
*
tab
);
extern
bool
pg_utf8_islegal
(
const
unsigned
char
*
source
,
int
length
);
#endif
/* PG_WCHAR_H */
#endif
/* PG_WCHAR_H */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment