mirror of
https://github.com/acidanthera/audk.git
synced 2025-08-14 22:28:08 +02:00
Missing masks leads to shift out of bounds. Also there is no need to construct CHAR16 using cast to CHAR8 buffer, better to use native endian by assigning data directly into Ucs2Char variable Signed-off-by: Savva Mitrofanov <savvamtr@gmail.com> Reviewed-by: Marvin Häuser <mhaeuser@posteo.de>
399 lines
10 KiB
C
399 lines
10 KiB
C
/** @file
|
|
UCS2 to UTF8 manipulation library.
|
|
|
|
Copyright (c) 2018 - 2019, Intel Corporation. All rights reserved.<BR>
|
|
(C) Copyright 2020 Hewlett Packard Enterprise Development LP<BR>
|
|
|
|
SPDX-License-Identifier: BSD-2-Clause-Patent
|
|
|
|
**/
|
|
#include <Uefi.h>
|
|
#include <Library/BaseLib.h>
|
|
#include <Library/BaseMemoryLib.h>
|
|
#include <Library/BaseUcs2Utf8Lib.h>
|
|
#include <Library/DebugLib.h>
|
|
#include <Library/MemoryAllocationLib.h>
|
|
|
|
/**
|
|
Since each UCS2 character can be represented by 1-3 UTF8 encoded characters,
|
|
this function is used to retrieve the UTF8 encoding size for a UCS2 character.
|
|
|
|
@param[in] Utf8Buffer The buffer for UTF8 encoded data.
|
|
|
|
@retval Return the size of UTF8 encoding string or 0 if it is not for
|
|
UCS2 format.
|
|
|
|
**/
|
|
UINT8
|
|
GetUTF8SizeForUCS2 (
|
|
IN CHAR8 *Utf8Buffer
|
|
)
|
|
{
|
|
CHAR8 TempChar;
|
|
UINT8 Utf8Size;
|
|
|
|
ASSERT (Utf8Buffer != NULL);
|
|
|
|
TempChar = *Utf8Buffer;
|
|
if ((TempChar & 0xF0) == 0xF0) {
|
|
//
|
|
// This format is not for UCS2.
|
|
//
|
|
return 0;
|
|
}
|
|
|
|
Utf8Size = 1;
|
|
if ((TempChar & 0x80) == 0x80) {
|
|
if ((TempChar & 0xC0) == 0xC0) {
|
|
Utf8Size++;
|
|
if ((TempChar & 0xE0) == 0xE0) {
|
|
Utf8Size++;
|
|
}
|
|
}
|
|
}
|
|
|
|
return Utf8Size;
|
|
}
|
|
|
|
/**
|
|
Since each UCS2 character can be represented by the format: \uXXXX, this function
|
|
is used to retrieve the UCS2 character from a Unicode format.
|
|
Call MUST make sure there are at least 6 Bytes in the input UTF8 buffer.
|
|
|
|
@param[in] Utf8Buffer The buffer for UTF8 encoded data.
|
|
@param[out] Ucs2Char The converted UCS2 character.
|
|
|
|
@retval EFI_INVALID_PARAMETER Non-Ascii characters found in the hexadecimal
|
|
digits string, and can't be converted to a UCS2
|
|
character.
|
|
@retval EFI_SUCCESS The UCS2 character has been retrieved.
|
|
|
|
**/
|
|
EFI_STATUS
|
|
GetUCS2CharByFormat (
|
|
IN CHAR8 *Utf8Buffer,
|
|
OUT CHAR16 *Ucs2Char
|
|
)
|
|
{
|
|
UINT8 Num1;
|
|
UINT8 Num2;
|
|
UINT8 Index;
|
|
CHAR8 Ucs2CharFormat[UNICODE_FORMAT_CHAR_SIZE]; /// two Hexadecimal digits Ascii string, like "3F"
|
|
|
|
for (Index = 0; Index < 4; Index++) {
|
|
if ((*(Utf8Buffer + 2 + Index) & 0x80) != 0x00) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
}
|
|
|
|
ZeroMem (Ucs2CharFormat, UNICODE_FORMAT_CHAR_SIZE);
|
|
|
|
//
|
|
// Get the First Number, Offset is 2
|
|
//
|
|
CopyMem (Ucs2CharFormat, Utf8Buffer + 2, UNICODE_FORMAT_CHAR_LEN);
|
|
Num1 = (UINT8)AsciiStrHexToUintn (Ucs2CharFormat);
|
|
|
|
//
|
|
// Get the Second Number, Offset is 4
|
|
//
|
|
CopyMem (Ucs2CharFormat, Utf8Buffer + 4, UNICODE_FORMAT_CHAR_LEN);
|
|
Num2 = (UINT8)AsciiStrHexToUintn (Ucs2CharFormat);
|
|
|
|
//
|
|
// Ucs2Char is Little-Endian
|
|
//
|
|
*((CHAR8 *)Ucs2Char) = Num2;
|
|
*(((CHAR8 *)Ucs2Char) + 1) = Num1;
|
|
|
|
return EFI_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
Convert a UCS2 character to UTF8 encoding string.
|
|
|
|
@param[in] Ucs2Char The provided UCS2 character.
|
|
@param[out] Utf8Buffer The converted UTF8 encoded data.
|
|
|
|
@retval Return the size of UTF8 encoding data for this UCS2 character.
|
|
|
|
**/
|
|
UINT8
|
|
UCS2CharToUTF8 (
|
|
IN CHAR16 Ucs2Char,
|
|
OUT CHAR8 *Utf8Buffer
|
|
)
|
|
{
|
|
UINT16 Ucs2Number;
|
|
|
|
ASSERT (Utf8Buffer != NULL);
|
|
|
|
Ucs2Number = (UINT16)Ucs2Char;
|
|
if (Ucs2Number <= 0x007F) {
|
|
//
|
|
// UTF8 format: 0xxxxxxx
|
|
//
|
|
*Utf8Buffer = Ucs2Char & 0x7F;
|
|
return 1;
|
|
} else if ((Ucs2Number >= 0x0080) && (Ucs2Number <= 0x07FF)) {
|
|
//
|
|
// UTF8 format: 110xxxxx 10xxxxxx
|
|
//
|
|
*(Utf8Buffer + 1) = (Ucs2Char & 0x3F) | 0x80;
|
|
*Utf8Buffer = ((Ucs2Char >> 6) & 0x1F) | 0xC0;
|
|
return 2;
|
|
} else {
|
|
/// Ucs2Number >= 0x0800 && Ucs2Number <= 0xFFFF
|
|
|
|
//
|
|
// UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
|
|
//
|
|
*(Utf8Buffer + 2) = (Ucs2Char & 0x3F) | 0x80;
|
|
*(Utf8Buffer + 1) = ((Ucs2Char >> 6) & 0x3F) | 0x80;
|
|
*Utf8Buffer = ((Ucs2Char >> 12) & 0x0F) | 0xE0;
|
|
return 3;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Convert a UTF8 encoded data to a UCS2 character.
|
|
|
|
@param[in] Utf8Buffer The provided UTF8 encoded data.
|
|
@param[out] Ucs2Char The converted UCS2 character.
|
|
|
|
@retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid or
|
|
not for UCS2 character.
|
|
@retval EFI_SUCCESS The converted UCS2 character.
|
|
|
|
**/
|
|
EFI_STATUS
|
|
UTF8ToUCS2Char (
|
|
IN CHAR8 *Utf8Buffer,
|
|
OUT CHAR16 *Ucs2Char
|
|
)
|
|
{
|
|
UINT8 Utf8Size;
|
|
CHAR8 TempChar1;
|
|
CHAR8 TempChar2;
|
|
CHAR8 TempChar3;
|
|
|
|
ASSERT (Utf8Buffer != NULL && Ucs2Char != NULL);
|
|
*Ucs2Char = 0;
|
|
|
|
Utf8Size = GetUTF8SizeForUCS2 (Utf8Buffer);
|
|
switch (Utf8Size) {
|
|
case 1:
|
|
|
|
//
|
|
// UTF8 format: 0xxxxxxx
|
|
//
|
|
TempChar1 = *Utf8Buffer;
|
|
if ((TempChar1 & 0x80) != 0x00) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
*Ucs2Char = (CHAR16)TempChar1;
|
|
break;
|
|
|
|
case 2:
|
|
|
|
//
|
|
// UTF8 format: 110xxxxx 10xxxxxx
|
|
//
|
|
TempChar1 = *Utf8Buffer;
|
|
if ((TempChar1 & 0xE0) != 0xC0) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
TempChar2 = *(Utf8Buffer + 1);
|
|
if ((TempChar2 & 0xC0) != 0x80) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
*Ucs2Char = (TempChar1 & 0x1F) << 6 | (TempChar2 & 0x3F);
|
|
break;
|
|
|
|
case 3:
|
|
|
|
//
|
|
// UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
|
|
//
|
|
TempChar1 = *Utf8Buffer;
|
|
if ((TempChar1 & 0xF0) != 0xE0) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
TempChar2 = *(Utf8Buffer + 1);
|
|
if ((TempChar2 & 0xC0) != 0x80) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
TempChar3 = *(Utf8Buffer + 2);
|
|
if ((TempChar3 & 0xC0) != 0x80) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
*Ucs2Char = (TempChar1 & 0x0F) << 12 | (TempChar2 & 0x3F) << 6 | (TempChar3 & 0x3F);
|
|
break;
|
|
|
|
default:
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
return EFI_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
Convert a UCS2 string to a UTF8 encoded string.
|
|
|
|
@param[in] Ucs2Str The provided UCS2 string.
|
|
@param[out] Utf8StrAddr The converted UTF8 string address. Caller
|
|
is responsible for Free this string.
|
|
|
|
@retval EFI_INVALID_PARAMETER One or more parameters are invalid.
|
|
@retval EFI_OUT_OF_RESOURCES System runs out of resources.
|
|
@retval EFI_SUCCESS The UTF8 encoded string has been converted.
|
|
|
|
**/
|
|
EFI_STATUS
|
|
UCS2StrToUTF8 (
|
|
IN CHAR16 *Ucs2Str,
|
|
OUT CHAR8 **Utf8StrAddr
|
|
)
|
|
{
|
|
UINTN Ucs2StrIndex;
|
|
UINTN Ucs2StrLength;
|
|
CHAR8 *Utf8Str;
|
|
UINTN Utf8StrLength;
|
|
UINTN Utf8StrIndex;
|
|
CHAR8 Utf8Buffer[UTF8_BUFFER_FOR_UCS2_MAX_SIZE];
|
|
UINT8 Utf8BufferSize;
|
|
|
|
if ((Ucs2Str == NULL) || (Utf8StrAddr == NULL)) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
Ucs2StrLength = StrLen (Ucs2Str);
|
|
Utf8StrLength = 0;
|
|
|
|
for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex++) {
|
|
ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
|
|
Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
|
|
Utf8StrLength += Utf8BufferSize;
|
|
}
|
|
|
|
Utf8Str = AllocateZeroPool (Utf8StrLength + 1);
|
|
if (Utf8Str == NULL) {
|
|
return EFI_OUT_OF_RESOURCES;
|
|
}
|
|
|
|
Utf8StrIndex = 0;
|
|
for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex++) {
|
|
ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
|
|
Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
|
|
|
|
CopyMem (Utf8Str + Utf8StrIndex, Utf8Buffer, Utf8BufferSize);
|
|
Utf8StrIndex += Utf8BufferSize;
|
|
}
|
|
|
|
Utf8Str[Utf8StrIndex] = '\0';
|
|
*Utf8StrAddr = Utf8Str;
|
|
|
|
return EFI_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
Convert a UTF8 encoded string to a UCS2 string.
|
|
|
|
@param[in] Utf8Str The provided UTF8 encoded string.
|
|
@param[out] Ucs2StrAddr The converted UCS2 string address. Caller
|
|
is responsible for Free this string.
|
|
|
|
@retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid to
|
|
convert to UCS2 string.
|
|
One or more parameters are invalid.
|
|
@retval EFI_OUT_OF_RESOURCES System runs out of resources.
|
|
@retval EFI_SUCCESS The UCS2 string has been converted.
|
|
|
|
**/
|
|
EFI_STATUS
|
|
UTF8StrToUCS2 (
|
|
IN CHAR8 *Utf8Str,
|
|
OUT CHAR16 **Ucs2StrAddr
|
|
)
|
|
{
|
|
EFI_STATUS Status;
|
|
UINTN Utf8StrIndex;
|
|
UINTN Utf8StrLength;
|
|
UINTN Ucs2StrIndex;
|
|
UINT8 Utf8BufferSize;
|
|
CHAR16 *Ucs2StrTemp;
|
|
|
|
if ((Utf8Str == NULL) || (Ucs2StrAddr == NULL)) {
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
//
|
|
// It is not an Ascii string, calculate string length.
|
|
//
|
|
Utf8StrLength = 0;
|
|
while (*(Utf8Str + Utf8StrLength) != '\0') {
|
|
Utf8StrLength++;
|
|
}
|
|
|
|
//
|
|
// UCS2 string shall not be longer than the UTF8 string.
|
|
//
|
|
Ucs2StrTemp = AllocateZeroPool ((Utf8StrLength + 1) * sizeof (CHAR16));
|
|
if (Ucs2StrTemp == NULL) {
|
|
return EFI_OUT_OF_RESOURCES;
|
|
}
|
|
|
|
Utf8StrIndex = 0;
|
|
Ucs2StrIndex = 0;
|
|
while (Utf8Str[Utf8StrIndex] != '\0') {
|
|
if ((CompareMem (Utf8Str + Utf8StrIndex, "\\u", 2) == 0) &&
|
|
(Utf8StrLength - Utf8StrIndex >= UNICODE_FORMAT_LEN))
|
|
{
|
|
Status = GetUCS2CharByFormat (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
|
|
if (!EFI_ERROR (Status)) {
|
|
Utf8StrIndex += UNICODE_FORMAT_LEN;
|
|
Ucs2StrIndex++;
|
|
} else {
|
|
StrCpyS (Ucs2StrTemp + Ucs2StrIndex, 3, L"\\u");
|
|
|
|
Ucs2StrIndex += 2;
|
|
Utf8StrIndex += 2;
|
|
}
|
|
} else {
|
|
Utf8BufferSize = GetUTF8SizeForUCS2 (Utf8Str + Utf8StrIndex);
|
|
if ((Utf8BufferSize == 0) || (Utf8StrLength - Utf8StrIndex < Utf8BufferSize)) {
|
|
FreePool (Ucs2StrTemp);
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
Status = UTF8ToUCS2Char (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
|
|
if (EFI_ERROR (Status)) {
|
|
FreePool (Ucs2StrTemp);
|
|
return EFI_INVALID_PARAMETER;
|
|
}
|
|
|
|
Ucs2StrIndex++;
|
|
Utf8StrIndex += Utf8BufferSize;
|
|
}
|
|
}
|
|
|
|
*Ucs2StrAddr = AllocateZeroPool ((Ucs2StrIndex + 1) * sizeof (CHAR16));
|
|
if (*Ucs2StrAddr == NULL) {
|
|
FreePool (Ucs2StrTemp);
|
|
return EFI_OUT_OF_RESOURCES;
|
|
}
|
|
|
|
StrCpyS (*Ucs2StrAddr, Ucs2StrIndex + 1, Ucs2StrTemp);
|
|
*(*Ucs2StrAddr + Ucs2StrIndex) = L'\0';
|
|
FreePool (Ucs2StrTemp);
|
|
|
|
return EFI_SUCCESS;
|
|
}
|