mirror of https://github.com/acidanthera/audk.git
StdLib: Fix several problems where characters were not being correctly converted between wide and MBCS.
Add utility functions for determining character length of strings. Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: daryl.mcdaniel@intel.com Reviewed-by: erik.c.bjorge@intel.com Reviewed-by: lee.g.rosenbaum@intel.com StdLib/LibC/ Locale/multibyte_Utf8.c Improve comments. Define implementation-specific MBCS utility functions, as declared in <stdlib.h>. Enhance functionality of EncodeUtf8() and improve error handling. Set correct conversion state in wcrtomb(). Bug fixes in wcsrtombs(). Make wctob() properly MBCS compliant. Main/Main.c Remove code obsoleted by new wcsrtombs() implementation. git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13785 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
parent
e2a013fa40
commit
c42c9cac8c
|
@ -15,9 +15,9 @@
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
typedef int ch_UCS4;
|
typedef int ch_UCS4;
|
||||||
|
|
||||||
static mbstate_t LocalConvState = {0};
|
static mbstate_t LocalConvState = {0};
|
||||||
|
|
||||||
/** Map a UTF-8 encoded prefix byte to a sequence length.
|
/** Map a UTF-8 encoded prefix byte to a sequence length.
|
||||||
Zero means illegal prefix, but valid surrogate if < 0xC0.
|
Zero means illegal prefix, but valid surrogate if < 0xC0.
|
||||||
|
@ -59,12 +59,12 @@ UINT8 utf8_code_length[256] = {
|
||||||
|
|
||||||
/** Process one byte of a multibyte character.
|
/** Process one byte of a multibyte character.
|
||||||
|
|
||||||
@param ch
|
@param[in] ch One byte of a multibyte character.
|
||||||
@param ps
|
@param[in,out] ps Pointer to a conversion state object.
|
||||||
|
|
||||||
@retval -2
|
@retval -2 ch is an incomplete but potentially valid character.
|
||||||
@retval -1
|
@retval -1 ch is not valid in this context.
|
||||||
@retval 1:4
|
@retval 1:4 The length, in bytes, of the character ch just completed.
|
||||||
**/
|
**/
|
||||||
static
|
static
|
||||||
int
|
int
|
||||||
|
@ -174,10 +174,10 @@ ProcessOneByte(unsigned char ch, mbstate_t *ps)
|
||||||
|
|
||||||
/** Convert one Multibyte sequence.
|
/** Convert one Multibyte sequence.
|
||||||
|
|
||||||
@param Dest
|
@param[out] Dest Pointer to output location, or NULL
|
||||||
@param Src
|
@param[in] Src Multibyte Source (UTF8)
|
||||||
@param Len
|
@param[in] Len Max Number of bytes to convert
|
||||||
@param pS
|
@param[in] pS Pointer to State struct., or NULL
|
||||||
|
|
||||||
@retval -2 Bytes processed comprise an incomplete, but potentially valid, character.
|
@retval -2 Bytes processed comprise an incomplete, but potentially valid, character.
|
||||||
@retval -1 An encoding error was encountered. ps->E indicates the number of bytes consumed.
|
@retval -1 An encoding error was encountered. ps->E indicates the number of bytes consumed.
|
||||||
|
@ -219,87 +219,212 @@ DecodeOneStateful(
|
||||||
return NumConv;
|
return NumConv;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Convert wide characters (UTF16) into multibyte characters (UTF8)
|
/* Determine the number of bytes needed to represent a Wide character
|
||||||
|
as a MBCS character.
|
||||||
|
|
||||||
|
A single wide character may convert into a one, two, three, or four byte
|
||||||
|
narrow (MBCS or UTF-8) character. The number of MBCS bytes can be determined
|
||||||
|
as follows.
|
||||||
|
|
||||||
|
If WCS char < 0x00000080 One Byte
|
||||||
|
Else if WCS char < 0x0000D800 Two Bytes
|
||||||
|
Else Three Bytes
|
||||||
|
|
||||||
|
Since UEFI only supports the Unicode Base Multilingual Plane (BMP),
|
||||||
|
Four-byte characters are not supported.
|
||||||
|
|
||||||
|
@param[in] InCh Wide character to test.
|
||||||
|
|
||||||
|
@retval -1 Improperly formed character
|
||||||
|
@retval 0 InCh is 0x0000
|
||||||
|
@retval >0 Number of bytes needed for the MBCS character
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
EFIAPI
|
||||||
|
OneWcToMcLen(const wchar_t InCh)
|
||||||
|
{
|
||||||
|
ssize_t NumBytes;
|
||||||
|
|
||||||
|
if(InCh == 0) { // Is this a NUL, 0x0000 ?
|
||||||
|
NumBytes = 0;
|
||||||
|
}
|
||||||
|
else if(InCh < 0x0080) { // Is this a 1-byte character?
|
||||||
|
NumBytes = 1;
|
||||||
|
}
|
||||||
|
else if(InCh < 0x0800) { // Is this a 2-byte character?
|
||||||
|
NumBytes = 2;
|
||||||
|
}
|
||||||
|
else if((InCh >= 0xD800) && (InCh < 0xE000)) { // Is this a surrogate?
|
||||||
|
NumBytes = -1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
NumBytes = 3; // Otherwise, it must be a 3-byte character.
|
||||||
|
}
|
||||||
|
return (int)NumBytes; // Return extimate of required bytes.
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Determine the number of bytes needed to represent a Wide character string
|
||||||
|
as a MBCS string of given maximum length. Will optionally return the number
|
||||||
|
of wide characters that would be consumed.
|
||||||
|
|
||||||
|
A single wide character may convert into a one, two, three, or four byte
|
||||||
|
narrow (MBCS or UTF-8) character. The number of MBCS bytes can be determined
|
||||||
|
as follows.
|
||||||
|
|
||||||
|
If WCS char < 0x00000080 One Byte
|
||||||
|
Else if WCS char < 0x00000800 Two Bytes
|
||||||
|
Else if WCS char < 0x00010000 Three Bytes
|
||||||
|
Else Four Bytes
|
||||||
|
|
||||||
|
Since UEFI only supports the Unicode Base Multilingual Plane (BMP),
|
||||||
|
Four-byte characters should not be encountered.
|
||||||
|
|
||||||
|
@param[in] Src Pointer to a wide character string.
|
||||||
|
@param[in] Limit Maximum number of bytes the converted string may occupy.
|
||||||
|
@param[out] NumChar Pointer to where to store the number of wide characters, or NULL.
|
||||||
|
|
||||||
|
@return The number of bytes required to convert Src to MBCS,
|
||||||
|
not including the terminating NUL. If NumChar is not NULL, the number
|
||||||
|
of characters represented by the return value will be written to
|
||||||
|
where it points.
|
||||||
|
*/
|
||||||
|
size_t
|
||||||
|
EFIAPI
|
||||||
|
EstimateWtoM(const wchar_t * Src, size_t Limit, size_t *NumChar)
|
||||||
|
{
|
||||||
|
ssize_t Estimate;
|
||||||
|
size_t CharCount;
|
||||||
|
ssize_t NumBytes;
|
||||||
|
wchar_t EChar;
|
||||||
|
|
||||||
|
Estimate = 0;
|
||||||
|
CharCount = 0;
|
||||||
|
EChar = *Src++; // Get the initial character and point to next
|
||||||
|
while(((NumBytes = OneWcToMcLen(EChar)) > 0) &&
|
||||||
|
((size_t)(Estimate + NumBytes) < Limit))
|
||||||
|
{ // Until one of the source characters is NUL
|
||||||
|
++CharCount; // Count this character.
|
||||||
|
Estimate += NumBytes; // Count the Bytes for this character
|
||||||
|
EChar = *Src++; // Get the next source character and point to the next.
|
||||||
|
}
|
||||||
|
if(NumChar != NULL) {
|
||||||
|
*NumChar = CharCount;
|
||||||
|
}
|
||||||
|
return (size_t)Estimate; // Return esimate of required bytes.
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Determine the number of characters in a MBCS string.
|
||||||
|
MBCS characters are one to four bytes long. By examining the first byte
|
||||||
|
of a MBCS character, one can determine the number of bytes comprising the
|
||||||
|
character.
|
||||||
|
|
||||||
|
0x00 - 0x7F One
|
||||||
|
0xC0 - 0xDF Two
|
||||||
|
0xE0 - 0xEF Three
|
||||||
|
0xF0 - 0xF7 Four
|
||||||
|
|
||||||
|
Since UEFI only supports the Unicode Base Multilingual Plane (BMP),
|
||||||
|
Four-byte characters should not be encountered.
|
||||||
|
|
||||||
|
@param[in] Src The string to examine
|
||||||
|
|
||||||
|
@return The number of characters represented by the MBCS string.
|
||||||
|
**/
|
||||||
|
size_t
|
||||||
|
EFIAPI
|
||||||
|
CountMbcsChars(const char *Src)
|
||||||
|
{
|
||||||
|
size_t Count;
|
||||||
|
char EChar;
|
||||||
|
|
||||||
|
Count = 0;
|
||||||
|
EChar = *Src++;
|
||||||
|
while(EChar != 0) {
|
||||||
|
if(EChar < 0x80) {
|
||||||
|
++Count;
|
||||||
|
}
|
||||||
|
else if(EChar < 0xE0) {
|
||||||
|
Count += 2;
|
||||||
|
++Src;
|
||||||
|
}
|
||||||
|
else if(EChar < 0xF0) {
|
||||||
|
Count += 3;
|
||||||
|
Src += 2;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Ill-formed character
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Convert a wide character (UTF16) into a multibyte character (UTF8)
|
||||||
|
|
||||||
|
Converts a wide character into a corresponding multibyte character that
|
||||||
|
begins in the conversion state described by the object pointed to by ps.
|
||||||
|
If dst is not a null pointer, the converted character is then stored into
|
||||||
|
the array pointed to by dst.
|
||||||
|
|
||||||
|
It is the caller's responsibility to ensure that Dest is large enough to
|
||||||
|
hold the resulting MBCS sequence.
|
||||||
|
|
||||||
@param s Pointer to the wide-character string to convert
|
@param s Pointer to the wide-character string to convert
|
||||||
@param size Number of wide characters in s. size <= wcslen(s);
|
@param Dest Pointer to the buffer in which to place the converted sequence, or NULL.
|
||||||
|
|
||||||
@return A newly allocated buffer containing the converted string is returned,
|
@retval -1 An error occurred. The error reason is in errno.
|
||||||
or NULL if an error occurred. Global variable errno contains more
|
@retval >=0 The number of bytes stored into Dest.
|
||||||
information if NULL is returned.
|
|
||||||
**/
|
**/
|
||||||
ssize_t
|
ssize_t
|
||||||
EncodeUtf8(char *Dest, wchar_t *s, ssize_t size)
|
EncodeUtf8(char *Dest, wchar_t ch)
|
||||||
{
|
{
|
||||||
char *p; /* next free byte in build buffer */
|
char *p; /* next free byte in build buffer */
|
||||||
char *v; /* next free byte in destination */
|
|
||||||
ssize_t nneeded; /* number of result bytes needed */
|
|
||||||
int i; /* index into s of next input byte */
|
|
||||||
int NumInBuff; // number of bytes in Buff
|
int NumInBuff; // number of bytes in Buff
|
||||||
char Buff[4]; // Buffer into which each character is built
|
char Buff[4]; // Buffer into which each character is built
|
||||||
|
|
||||||
assert(s != NULL);
|
|
||||||
assert(size >= 0);
|
|
||||||
|
|
||||||
v = Dest;
|
|
||||||
nneeded = 0;
|
|
||||||
if((size * MB_LEN_MAX) / MB_LEN_MAX != size) {
|
|
||||||
// size is too large and resulted in overflow when multiplied by MB_LEN_MAX
|
|
||||||
errno = EINVAL;
|
|
||||||
return (ssize_t)-1;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < size;) {
|
|
||||||
ch_UCS4 ch = s[i++];
|
|
||||||
p = Buff;
|
p = Buff;
|
||||||
|
|
||||||
if (ch < 0x80) {
|
NumInBuff = 0;
|
||||||
/* Encode ASCII -- One Byte */
|
if (ch < 0x80) {
|
||||||
*p++ = (char) ch;
|
/* Encode ASCII -- One Byte */
|
||||||
}
|
*p++ = (char) ch;
|
||||||
else if (ch < 0x0800) {
|
NumInBuff = 1;
|
||||||
/* Encode Latin-1 -- Two Byte */
|
}
|
||||||
*p++ = (char)(0xc0 | (ch >> 6));
|
else if (ch < 0x0800) {
|
||||||
*p++ = (char)(0x80 | (ch & 0x3f));
|
/* Encode Latin-1 -- Two Byte */
|
||||||
}
|
*p++ = (char)(0xc0 | (ch >> 6));
|
||||||
else {
|
*p++ = (char)(0x80 | (ch & 0x3f));
|
||||||
|
NumInBuff = 2;
|
||||||
|
}
|
||||||
|
else {
|
||||||
/* Encode UCS2 Unicode ordinals -- Three Byte */
|
/* Encode UCS2 Unicode ordinals -- Three Byte */
|
||||||
/* Special case: check for high surrogate -- Shouldn't happen in UEFI */
|
/* Special case: check for surrogate -- Shouldn't happen in UEFI */
|
||||||
if (0xD800 <= ch && ch <= 0xDBFF && i < size) {
|
if (0xD800 <= ch && ch < 0xE000) {
|
||||||
ch_UCS4 ch2 = s[i];
|
errno = EILSEQ;
|
||||||
/* Check for low surrogate and combine the two to
|
return -1;
|
||||||
form a UCS4 value */
|
|
||||||
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
|
|
||||||
ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
|
|
||||||
i++;
|
|
||||||
/* Encode UCS4 Unicode ordinals -- Four Byte */
|
|
||||||
*p++ = (char)(0xf0 | (ch >> 18));
|
|
||||||
*p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
|
|
||||||
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
|
||||||
*p++ = (char)(0x80 | (ch & 0x3f));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
/* Fall through: handles isolated high surrogates */
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
*p++ = (char)(0xe0 | (ch >> 12));
|
*p++ = (char)(0xe0 | (ch >> 12));
|
||||||
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
|
||||||
*p++ = (char)(0x80 | (ch & 0x3f));
|
*p++ = (char)(0x80 | (ch & 0x3f));
|
||||||
|
NumInBuff = 3;
|
||||||
}
|
}
|
||||||
/* At this point, Buff holds the converted character which is NumInBuff bytes long.
|
}
|
||||||
NumInBuff is the value 1, 2, 3, or 4
|
/* At this point, Buff holds the converted character which is NumInBuff bytes long.
|
||||||
*/
|
NumInBuff is the value 1, 2, 3, or 4
|
||||||
NumInBuff = (int)(p - Buff); // Number of bytes in Buff
|
*/
|
||||||
if(Dest != NULL) { // Save character if Dest is not NULL
|
if(Dest != NULL) { // Save character if Dest is not NULL
|
||||||
memcpy(v, Buff, NumInBuff);
|
memcpy(Dest, Buff, NumInBuff);
|
||||||
v += NumInBuff;
|
|
||||||
|
if(ch != 0) {
|
||||||
|
// Terminate the destination string.
|
||||||
|
Dest[NumInBuff] = '\0';
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
NumInBuff = 0;
|
||||||
}
|
}
|
||||||
nneeded += NumInBuff; // Keep track of the number of bytes put into Dest
|
|
||||||
}
|
}
|
||||||
if(Dest != NULL) {
|
return NumInBuff; // Tell the caller
|
||||||
// Terminate the destination string.
|
|
||||||
*v = '\0';
|
|
||||||
}
|
|
||||||
return nneeded; // Tell the caller
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ######################## Narrow to Wide Conversions #######################
|
// ######################## Narrow to Wide Conversions #######################
|
||||||
|
@ -307,6 +432,8 @@ EncodeUtf8(char *Dest, wchar_t *s, ssize_t size)
|
||||||
/** If ps is not a null pointer, the mbsinit function determines whether the
|
/** If ps is not a null pointer, the mbsinit function determines whether the
|
||||||
pointed-to mbstate_t object describes an initial conversion state.
|
pointed-to mbstate_t object describes an initial conversion state.
|
||||||
|
|
||||||
|
@param[in] ps Pointer to the conversion state object to test.
|
||||||
|
|
||||||
@return The mbsinit function returns nonzero if ps is a null pointer
|
@return The mbsinit function returns nonzero if ps is a null pointer
|
||||||
or if the pointed-to object describes an initial conversion
|
or if the pointed-to object describes an initial conversion
|
||||||
state; otherwise, it returns zero.
|
state; otherwise, it returns zero.
|
||||||
|
@ -329,8 +456,14 @@ mbsinit(const mbstate_t *ps)
|
||||||
where internal is the mbstate_t object for the mbrlen function, except that
|
where internal is the mbstate_t object for the mbrlen function, except that
|
||||||
the expression designated by ps is evaluated only once.
|
the expression designated by ps is evaluated only once.
|
||||||
|
|
||||||
@return The mbrlen function returns a value between zero and n,
|
@param[in] s Pointer to a multibyte character sequence.
|
||||||
inclusive, (size_t)(-2), or (size_t)(-1).
|
@param[in] n Maximum number of bytes to examine.
|
||||||
|
@param[in] pS Pointer to the conversion state object.
|
||||||
|
|
||||||
|
@retval 0 The next n or fewer characters complete a NUL.
|
||||||
|
@retval 1..n The number of bytes that complete the multibyte character.
|
||||||
|
@retval -2 The next n bytes contribute to an incomplete (but potentially valid) multibyte character.
|
||||||
|
@retval -1 An encoding error occurred.
|
||||||
|
|
||||||
Declared in: wchar.h
|
Declared in: wchar.h
|
||||||
**/
|
**/
|
||||||
|
@ -338,10 +471,10 @@ size_t
|
||||||
mbrlen(
|
mbrlen(
|
||||||
const char *s,
|
const char *s,
|
||||||
size_t n,
|
size_t n,
|
||||||
mbstate_t *ps
|
mbstate_t *pS
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return mbrtowc(NULL, s, n, ps);
|
return mbrtowc(NULL, s, n, pS);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Determine the number of bytes comprising a multibyte character.
|
/** Determine the number of bytes comprising a multibyte character.
|
||||||
|
@ -392,6 +525,11 @@ corresponding wide character and then, if pwc is not a null pointer, stores that
|
||||||
the object pointed to by pwc. If the corresponding wide character is the null wide
|
the object pointed to by pwc. If the corresponding wide character is the null wide
|
||||||
character, the resulting state described is the initial conversion state.
|
character, the resulting state described is the initial conversion state.
|
||||||
|
|
||||||
|
@param[out] pwc Pointer to where the resulting wide character is to be stored.
|
||||||
|
@param[in] s Pointer to a multibyte character "string".
|
||||||
|
@param[in] n The maximum number of bytes to inspect.
|
||||||
|
@param[in] ps Pointer to a conversion state object.
|
||||||
|
|
||||||
@retval 0 if the next n or fewer bytes complete the multibyte
|
@retval 0 if the next n or fewer bytes complete the multibyte
|
||||||
character that corresponds to the null wide
|
character that corresponds to the null wide
|
||||||
character (which is the value stored).
|
character (which is the value stored).
|
||||||
|
@ -480,6 +618,11 @@ just past the last multibyte character converted (if any). If conversion stopped
|
||||||
reaching a terminating null character and if dst is not a null pointer, the resulting state
|
reaching a terminating null character and if dst is not a null pointer, the resulting state
|
||||||
described is the initial conversion state.
|
described is the initial conversion state.
|
||||||
|
|
||||||
|
@param[out] dst Pointer to where the resulting wide character sequence is stored.
|
||||||
|
@param[in] src Pointer to a pointer to the multibyte character sequence to convert.
|
||||||
|
@param[in] len Maximum number of wide characters to be stored into dst.
|
||||||
|
@param[in] ps Pointer to a conversion state object.
|
||||||
|
|
||||||
@return If the input conversion encounters a sequence of bytes that do
|
@return If the input conversion encounters a sequence of bytes that do
|
||||||
not form a valid multibyte character, an encoding error occurs:
|
not form a valid multibyte character, an encoding error occurs:
|
||||||
the mbsrtowcs function stores the value of the macro EILSEQ in
|
the mbsrtowcs function stores the value of the macro EILSEQ in
|
||||||
|
@ -564,21 +707,23 @@ mbsrtowcs(
|
||||||
**/
|
**/
|
||||||
size_t
|
size_t
|
||||||
mbstowcs(
|
mbstowcs(
|
||||||
wchar_t *pwcs,
|
wchar_t *Dest,
|
||||||
const char *s,
|
const char *Src,
|
||||||
size_t n
|
size_t Limit
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
|
||||||
/* pwcs may be NULL */
|
/* Dest may be NULL */
|
||||||
/* s may be NULL */
|
/* Src may be NULL */
|
||||||
|
|
||||||
return mbsrtowcs(pwcs, &s, n, NULL);
|
return mbsrtowcs(Dest, &Src, Limit, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** The btowc function determines whether C constitutes a valid single-byte
|
/** The btowc function determines whether C constitutes a valid single-byte
|
||||||
character in the initial shift state.
|
character in the initial shift state.
|
||||||
|
|
||||||
|
@param[in] C A narrow character to test or convert to wide.
|
||||||
|
|
||||||
@return The btowc function returns WEOF if c has the value EOF or if
|
@return The btowc function returns WEOF if c has the value EOF or if
|
||||||
(unsigned char)C does not constitute a valid single-byte
|
(unsigned char)C does not constitute a valid single-byte
|
||||||
character in the initial shift state. Otherwise, it returns the
|
character in the initial shift state. Otherwise, it returns the
|
||||||
|
@ -621,6 +766,12 @@ array whose first element is pointed to by S. At most MB_CUR_MAX bytes are store
|
||||||
wc is a null wide character, a null byte is stored, preceded by any shift sequence needed
|
wc is a null wide character, a null byte is stored, preceded by any shift sequence needed
|
||||||
to restore the initial shift state; the resulting state described is the initial conversion state.
|
to restore the initial shift state; the resulting state described is the initial conversion state.
|
||||||
|
|
||||||
|
@param[out] Dest Pointer to the location in which to store the resulting
|
||||||
|
multibyte character. Otherwise, NULL to reset the
|
||||||
|
conversion state.
|
||||||
|
@param[in] wchar The wide character to convert.
|
||||||
|
@param[in,out] pS Pointer to a conversion state object, or NULL.
|
||||||
|
|
||||||
@return The wcrtomb function returns the number of bytes stored in the
|
@return The wcrtomb function returns the number of bytes stored in the
|
||||||
array object (including any shift sequences). When wc is not a
|
array object (including any shift sequences). When wc is not a
|
||||||
valid wide character, an encoding error occurs: the function
|
valid wide character, an encoding error occurs: the function
|
||||||
|
@ -631,26 +782,31 @@ to restore the initial shift state; the resulting state described is the initial
|
||||||
**/
|
**/
|
||||||
size_t
|
size_t
|
||||||
wcrtomb(
|
wcrtomb(
|
||||||
char *s,
|
char *Dest,
|
||||||
wchar_t wchar,
|
wchar_t wchar,
|
||||||
mbstate_t *ps
|
mbstate_t *pS
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
size_t RetVal;
|
size_t RetVal;
|
||||||
|
|
||||||
/* s may be NULL */
|
/* Dest may be NULL */
|
||||||
if (s == NULL) {
|
if (Dest == NULL) {
|
||||||
RetVal = 1;
|
RetVal = 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (wchar == L'\0') {
|
if (wchar == L'\0') {
|
||||||
*s = '\0';
|
*Dest = '\0';
|
||||||
RetVal = 1;
|
RetVal = 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
RetVal = EncodeUtf8(s, &wchar, 1);
|
RetVal = EncodeUtf8(Dest, wchar);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if(pS == NULL) {
|
||||||
|
pS = &LocalConvState;
|
||||||
|
}
|
||||||
|
pS->A = 0; // Set ps to the initial conversion state
|
||||||
|
|
||||||
return RetVal;
|
return RetVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -698,27 +854,31 @@ wctomb(
|
||||||
}
|
}
|
||||||
|
|
||||||
/** The wcsrtombs function converts a sequence of wide characters from the array
|
/** The wcsrtombs function converts a sequence of wide characters from the array
|
||||||
indirectly pointed to by S into a sequence of corresponding multibyte
|
indirectly pointed to by Dest into a sequence of corresponding multibyte
|
||||||
characters that begins in the conversion state described by the object
|
characters that begins in the conversion state described by the object
|
||||||
pointed to by ps.
|
pointed to by ps.
|
||||||
|
|
||||||
If S is not a null pointer, the converted characters
|
If Dest is not a null pointer, the converted characters are stored into the
|
||||||
are then stored into the array pointed to by S. Conversion continues
|
array pointed to by Dest. Conversion continues up to and including a
|
||||||
up to and including a terminating null wide character, which is also
|
terminating null wide character, which is also stored. Conversion stops
|
||||||
stored. Conversion stops earlier in two cases: when a wide character is
|
earlier in two cases: when a wide character is reached that does not
|
||||||
reached that does not correspond to a valid multibyte character, or
|
correspond to a valid multibyte character, or (if Dest is not a null
|
||||||
(if S is not a null pointer) when the next multibyte character would
|
pointer) when the next multibyte character would exceed the limit of Limit
|
||||||
exceed the limit of N total bytes to be stored into the array pointed
|
total bytes to be stored into the array pointed to by Dest. Each conversion
|
||||||
to by S. Each conversion takes place as if by a call to the wcrtomb
|
takes place as if by a call to the wcrtomb function.)
|
||||||
function.)
|
|
||||||
|
|
||||||
If S is not a null pointer, the pointer object pointed to by pwcs is
|
If Dest is not a null pointer, the pointer object pointed to by Src is
|
||||||
assigned either a null pointer (if conversion stopped due to reaching
|
assigned either a null pointer (if conversion stopped due to reaching
|
||||||
a terminating null wide character) or the address just past the last wide
|
a terminating null wide character) or the address just past the last wide
|
||||||
character converted (if any). If conversion stopped due to reaching a
|
character converted (if any). If conversion stopped due to reaching a
|
||||||
terminating null wide character, the resulting state described is the
|
terminating null wide character, the resulting state described is the
|
||||||
initial conversion state.
|
initial conversion state.
|
||||||
|
|
||||||
|
@param[in] Dest
|
||||||
|
@param[in,out] Src
|
||||||
|
@param[in] Limit Max number of bytes to store in Dest.
|
||||||
|
@param[in,out] ps
|
||||||
|
|
||||||
@return If conversion stops because a wide character is reached that
|
@return If conversion stops because a wide character is reached that
|
||||||
does not correspond to a valid multibyte character, an
|
does not correspond to a valid multibyte character, an
|
||||||
encoding error occurs: the wcsrtombs function stores the
|
encoding error occurs: the wcsrtombs function stores the
|
||||||
|
@ -731,38 +891,50 @@ wctomb(
|
||||||
**/
|
**/
|
||||||
size_t
|
size_t
|
||||||
wcsrtombs(
|
wcsrtombs(
|
||||||
char *s,
|
char *Dest,
|
||||||
const wchar_t **pwcs,
|
const wchar_t **Src,
|
||||||
size_t n,
|
size_t Limit,
|
||||||
mbstate_t *ps
|
mbstate_t *ps
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
int count = 0;
|
size_t NumStored;
|
||||||
|
ssize_t MaxBytes;
|
||||||
|
int count;
|
||||||
|
wchar_t InCh;
|
||||||
|
|
||||||
/* s may be NULL */
|
NumStored = 0;
|
||||||
/* pwcs may be NULL */
|
MaxBytes = (ssize_t)Limit;
|
||||||
|
|
||||||
|
/* Dest may be NULL */
|
||||||
|
/* Src may be NULL */
|
||||||
/* ps appears to be unused */
|
/* ps appears to be unused */
|
||||||
|
|
||||||
if (pwcs == NULL || *pwcs == NULL)
|
if (Src == NULL || *Src == NULL)
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
if (s == NULL) {
|
if (Dest == NULL) {
|
||||||
while (*(*pwcs)++ != 0)
|
NumStored = EstimateWtoM(*Src, MaxBytes, NULL);
|
||||||
count++;
|
|
||||||
return(count);
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
if (n != 0) {
|
while (OneWcToMcLen(InCh = *(*Src)++) <= MaxBytes) {
|
||||||
do {
|
if(InCh == 0) {
|
||||||
if ((*s++ = (char) *(*pwcs)++) == 0) {
|
*Src = NULL;
|
||||||
*pwcs = NULL;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
count++;
|
count = (int)wcrtomb(Dest, InCh, NULL);
|
||||||
} while (--n != 0);
|
if(count >= 0) {
|
||||||
|
Dest += count;
|
||||||
|
MaxBytes -= count;
|
||||||
|
NumStored += count;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
NumStored = (size_t)(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return count;
|
|
||||||
|
return NumStored;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Convert a wide-character string into a multibyte character string.
|
/** Convert a wide-character string into a multibyte character string.
|
||||||
|
@ -794,19 +966,23 @@ wcsrtombs(
|
||||||
**/
|
**/
|
||||||
size_t
|
size_t
|
||||||
wcstombs(
|
wcstombs(
|
||||||
char *s,
|
char *Dest,
|
||||||
const wchar_t *pwcs,
|
const wchar_t *Src,
|
||||||
size_t n
|
size_t Limit
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
/* s may be NULL */
|
/* Dest may be NULL */
|
||||||
return wcsrtombs(s, &pwcs, n, NULL);
|
return wcsrtombs(Dest, &Src, Limit, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** The wctob function determines whether C corresponds to a member of the extended
|
/** The wctob function determines whether C corresponds to a member of the extended
|
||||||
character set whose multibyte character representation is a single byte when in the initial
|
character set whose multibyte character representation is a single byte when in the initial
|
||||||
shift state.
|
shift state.
|
||||||
|
|
||||||
|
wctob needs to be consistent with wcrtomb.
|
||||||
|
If wcrtomb says that a character is representable in 1 byte,
|
||||||
|
then wctob needs to also represent the character as 1 byte.
|
||||||
|
|
||||||
@return The wctob function returns EOF if C does not correspond to a multibyte
|
@return The wctob function returns EOF if C does not correspond to a multibyte
|
||||||
character with length one in the initial shift state. Otherwise, it
|
character with length one in the initial shift state. Otherwise, it
|
||||||
returns the single-byte representation of that character as an
|
returns the single-byte representation of that character as an
|
||||||
|
@ -817,13 +993,14 @@ wcstombs(
|
||||||
int
|
int
|
||||||
wctob(wint_t c)
|
wctob(wint_t c)
|
||||||
{
|
{
|
||||||
/* wctob needs to be consistent with wcrtomb.
|
int RetVal;
|
||||||
if wcrtomb says that a character is representable in 1 byte,
|
|
||||||
which this implementation always says, then wctob needs to
|
RetVal = EOF;
|
||||||
also represent the character as 1 byte.
|
if(c == 0) {
|
||||||
*/
|
RetVal = 0;
|
||||||
if (c == WEOF) {
|
|
||||||
return EOF;
|
|
||||||
}
|
}
|
||||||
return (int)(c & 0xFF);
|
else if (OneWcToMcLen((const wchar_t)c) == 1) {
|
||||||
|
RetVal = (int)(c & 0xFF);
|
||||||
|
}
|
||||||
|
return RetVal;
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,10 +113,9 @@ DEBUG_CODE_END();
|
||||||
for(count = 0; count < Argc; ++count) {
|
for(count = 0; count < Argc; ++count) {
|
||||||
nArgv[count] = string;
|
nArgv[count] = string;
|
||||||
AVsz = wcstombs(string, Argv[count], nArgvSize);
|
AVsz = wcstombs(string, Argv[count], nArgvSize);
|
||||||
string[AVsz] = 0; /* NULL terminate the argument */
|
|
||||||
DEBUG((DEBUG_INFO, "Cvt[%d] %d \"%s\" --> \"%a\"\n", (INT32)count, (INT32)AVsz, Argv[count], nArgv[count]));
|
DEBUG((DEBUG_INFO, "Cvt[%d] %d \"%s\" --> \"%a\"\n", (INT32)count, (INT32)AVsz, Argv[count], nArgv[count]));
|
||||||
string += AVsz + 1;
|
string += AVsz;
|
||||||
nArgvSize -= AVsz + 1;
|
nArgvSize -= AVsz;
|
||||||
if(nArgvSize < 0) {
|
if(nArgvSize < 0) {
|
||||||
Print(L"ABORTING: Internal Argv[%d] conversion error.\n", count);
|
Print(L"ABORTING: Internal Argv[%d] conversion error.\n", count);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
|
|
Loading…
Reference in New Issue