2020-12-08 02:56:56 +01:00
|
|
|
/** @file
|
|
|
|
UCS2 to UTF8 manipulation library.
|
|
|
|
|
|
|
|
Copyright (c) 2018 - 2019, Intel Corporation. All rights reserved.<BR>
|
|
|
|
(C) Copyright 2020 Hewlett Packard Enterprise Development LP<BR>
|
|
|
|
|
|
|
|
SPDX-License-Identifier: BSD-2-Clause-Patent
|
|
|
|
|
|
|
|
**/
|
|
|
|
#include <Uefi.h>
|
|
|
|
#include <Library/BaseLib.h>
|
|
|
|
#include <Library/BaseMemoryLib.h>
|
|
|
|
#include <Library/BaseUcs2Utf8Lib.h>
|
|
|
|
#include <Library/DebugLib.h>
|
|
|
|
#include <Library/MemoryAllocationLib.h>
|
|
|
|
|
|
|
|
/**
|
|
|
|
Since each UCS2 character can be represented by 1-3 UTF8 encoded characters,
|
|
|
|
this function is used to retrieve the UTF8 encoding size for a UCS2 character.
|
|
|
|
|
|
|
|
@param[in] Utf8Buffer The buffer for UTF8 encoded data.
|
|
|
|
|
|
|
|
@retval Return the size of UTF8 encoding string or 0 if it is not for
|
|
|
|
UCS2 format.
|
|
|
|
|
|
|
|
**/
|
|
|
|
UINT8
|
|
|
|
GetUTF8SizeForUCS2 (
|
|
|
|
IN CHAR8 *Utf8Buffer
|
|
|
|
)
|
|
|
|
{
|
|
|
|
CHAR8 TempChar;
|
|
|
|
UINT8 Utf8Size;
|
|
|
|
|
|
|
|
ASSERT (Utf8Buffer != NULL);
|
|
|
|
|
|
|
|
TempChar = *Utf8Buffer;
|
|
|
|
if ((TempChar & 0xF0) == 0xF0) {
|
|
|
|
//
|
|
|
|
// This format is not for UCS2.
|
|
|
|
//
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
Utf8Size = 1;
|
|
|
|
if ((TempChar & 0x80) == 0x80) {
|
|
|
|
if ((TempChar & 0xC0) == 0xC0) {
|
|
|
|
Utf8Size++;
|
|
|
|
if ((TempChar & 0xE0) == 0xE0) {
|
|
|
|
Utf8Size++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Utf8Size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Since each UCS2 character can be represented by the format: \uXXXX, this function
|
|
|
|
is used to retrieve the UCS2 character from a Unicode format.
|
|
|
|
Call MUST make sure there are at least 6 Bytes in the input UTF8 buffer.
|
|
|
|
|
|
|
|
@param[in] Utf8Buffer The buffer for UTF8 encoded data.
|
|
|
|
@param[out] Ucs2Char The converted UCS2 character.
|
|
|
|
|
|
|
|
@retval EFI_INVALID_PARAMETER Non-Ascii characters found in the hexadecimal
|
|
|
|
digits string, and can't be converted to a UCS2
|
|
|
|
character.
|
|
|
|
@retval EFI_SUCCESS The UCS2 character has been retrieved.
|
|
|
|
|
|
|
|
**/
|
|
|
|
EFI_STATUS
|
|
|
|
GetUCS2CharByFormat (
|
|
|
|
IN CHAR8 *Utf8Buffer,
|
|
|
|
OUT CHAR16 *Ucs2Char
|
|
|
|
)
|
|
|
|
{
|
|
|
|
UINT8 Num1;
|
|
|
|
UINT8 Num2;
|
|
|
|
UINT8 Index;
|
|
|
|
CHAR8 Ucs2CharFormat[UNICODE_FORMAT_CHAR_SIZE]; /// two Hexadecimal digits Ascii string, like "3F"
|
|
|
|
|
|
|
|
for (Index = 0; Index < 4; Index++) {
|
|
|
|
if ((*(Utf8Buffer + 2 + Index) & 0x80) != 0x00) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ZeroMem (Ucs2CharFormat, UNICODE_FORMAT_CHAR_SIZE);
|
|
|
|
|
|
|
|
//
|
|
|
|
// Get the First Number, Offset is 2
|
|
|
|
//
|
|
|
|
CopyMem (Ucs2CharFormat, Utf8Buffer + 2, UNICODE_FORMAT_CHAR_LEN);
|
|
|
|
Num1 = (UINT8)AsciiStrHexToUintn (Ucs2CharFormat);
|
|
|
|
|
|
|
|
//
|
|
|
|
// Get the Second Number, Offset is 4
|
|
|
|
//
|
|
|
|
CopyMem (Ucs2CharFormat, Utf8Buffer + 4, UNICODE_FORMAT_CHAR_LEN);
|
|
|
|
Num2 = (UINT8)AsciiStrHexToUintn (Ucs2CharFormat);
|
|
|
|
|
|
|
|
//
|
|
|
|
// Ucs2Char is Little-Endian
|
|
|
|
//
|
|
|
|
*((CHAR8 *)Ucs2Char) = Num2;
|
|
|
|
*(((CHAR8 *)Ucs2Char) + 1) = Num1;
|
|
|
|
|
|
|
|
return EFI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Convert a UCS2 character to UTF8 encoding string.
|
|
|
|
|
|
|
|
@param[in] Ucs2Char The provided UCS2 character.
|
|
|
|
@param[out] Utf8Buffer The converted UTF8 encoded data.
|
|
|
|
|
|
|
|
@retval Return the size of UTF8 encoding data for this UCS2 character.
|
|
|
|
|
|
|
|
**/
|
|
|
|
UINT8
|
|
|
|
UCS2CharToUTF8 (
|
|
|
|
IN CHAR16 Ucs2Char,
|
|
|
|
OUT CHAR8 *Utf8Buffer
|
|
|
|
)
|
|
|
|
{
|
|
|
|
UINT16 Ucs2Number;
|
|
|
|
|
|
|
|
ASSERT (Utf8Buffer != NULL);
|
|
|
|
|
|
|
|
Ucs2Number = (UINT16)Ucs2Char;
|
|
|
|
if (Ucs2Number <= 0x007F) {
|
|
|
|
//
|
|
|
|
// UTF8 format: 0xxxxxxx
|
|
|
|
//
|
|
|
|
*Utf8Buffer = Ucs2Char & 0x7F;
|
|
|
|
return 1;
|
|
|
|
} else if ((Ucs2Number >= 0x0080) && (Ucs2Number <= 0x07FF)) {
|
|
|
|
//
|
|
|
|
// UTF8 format: 110xxxxx 10xxxxxx
|
|
|
|
//
|
|
|
|
*(Utf8Buffer + 1) = (Ucs2Char & 0x3F) | 0x80;
|
|
|
|
*Utf8Buffer = ((Ucs2Char >> 6) & 0x1F) | 0xC0;
|
|
|
|
return 2;
|
|
|
|
} else {
|
|
|
|
/// Ucs2Number >= 0x0800 && Ucs2Number <= 0xFFFF
|
|
|
|
|
|
|
|
//
|
|
|
|
// UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
//
|
|
|
|
*(Utf8Buffer + 2) = (Ucs2Char & 0x3F) | 0x80;
|
|
|
|
*(Utf8Buffer + 1) = ((Ucs2Char >> 6) & 0x3F) | 0x80;
|
|
|
|
*Utf8Buffer = ((Ucs2Char >> 12) & 0x0F) | 0xE0;
|
|
|
|
return 3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Convert a UTF8 encoded data to a UCS2 character.
|
|
|
|
|
|
|
|
@param[in] Utf8Buffer The provided UTF8 encoded data.
|
|
|
|
@param[out] Ucs2Char The converted UCS2 character.
|
|
|
|
|
|
|
|
@retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid or
|
|
|
|
not for UCS2 character.
|
|
|
|
@retval EFI_SUCCESS The converted UCS2 character.
|
|
|
|
|
|
|
|
**/
|
|
|
|
EFI_STATUS
|
|
|
|
UTF8ToUCS2Char (
|
|
|
|
IN CHAR8 *Utf8Buffer,
|
|
|
|
OUT CHAR16 *Ucs2Char
|
|
|
|
)
|
|
|
|
{
|
|
|
|
UINT8 Utf8Size;
|
|
|
|
CHAR8 *Ucs2Buffer;
|
|
|
|
CHAR8 TempChar1;
|
|
|
|
CHAR8 TempChar2;
|
|
|
|
CHAR8 TempChar3;
|
|
|
|
|
|
|
|
ASSERT (Utf8Buffer != NULL && Ucs2Char != NULL);
|
|
|
|
ZeroMem (Ucs2Char, sizeof (CHAR16));
|
|
|
|
Ucs2Buffer = (CHAR8 *)Ucs2Char;
|
|
|
|
|
|
|
|
Utf8Size = GetUTF8SizeForUCS2 (Utf8Buffer);
|
|
|
|
switch (Utf8Size) {
|
|
|
|
case 1:
|
|
|
|
|
|
|
|
//
|
|
|
|
// UTF8 format: 0xxxxxxx
|
|
|
|
//
|
|
|
|
TempChar1 = *Utf8Buffer;
|
|
|
|
if ((TempChar1 & 0x80) != 0x00) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
*Ucs2Buffer = TempChar1;
|
|
|
|
*(Ucs2Buffer + 1) = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
|
|
|
//
|
|
|
|
// UTF8 format: 110xxxxx 10xxxxxx
|
|
|
|
//
|
|
|
|
TempChar1 = *Utf8Buffer;
|
|
|
|
if ((TempChar1 & 0xE0) != 0xC0) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
TempChar2 = *(Utf8Buffer + 1);
|
|
|
|
if ((TempChar2 & 0xC0) != 0x80) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
*Ucs2Buffer = (TempChar1 << 6) + (TempChar2 & 0x3F);
|
|
|
|
*(Ucs2Buffer + 1) = (TempChar1 >> 2) & 0x07;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 3:
|
|
|
|
|
|
|
|
//
|
|
|
|
// UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
//
|
|
|
|
TempChar1 = *Utf8Buffer;
|
|
|
|
if ((TempChar1 & 0xF0) != 0xE0) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
TempChar2 = *(Utf8Buffer + 1);
|
|
|
|
if ((TempChar2 & 0xC0) != 0x80) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
TempChar3 = *(Utf8Buffer + 2);
|
|
|
|
if ((TempChar3 & 0xC0) != 0x80) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
*Ucs2Buffer = (TempChar2 << 6) + (TempChar3 & 0x3F);
|
|
|
|
*(Ucs2Buffer + 1) = (TempChar1 << 4) + ((TempChar2 >> 2) & 0x0F);
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
return EFI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Convert a UCS2 string to a UTF8 encoded string.
|
|
|
|
|
|
|
|
@param[in] Ucs2Str The provided UCS2 string.
|
|
|
|
@param[out] Utf8StrAddr The converted UTF8 string address. Caller
|
|
|
|
is responsible for Free this string.
|
|
|
|
|
|
|
|
@retval EFI_INVALID_PARAMETER One or more parameters are invalid.
|
|
|
|
@retval EFI_OUT_OF_RESOURCES System runs out of resources.
|
|
|
|
@retval EFI_SUCCESS The UTF8 encoded string has been converted.
|
|
|
|
|
|
|
|
**/
|
|
|
|
EFI_STATUS
|
|
|
|
UCS2StrToUTF8 (
|
|
|
|
IN CHAR16 *Ucs2Str,
|
|
|
|
OUT CHAR8 **Utf8StrAddr
|
|
|
|
)
|
|
|
|
{
|
|
|
|
UINTN Ucs2StrIndex;
|
|
|
|
UINTN Ucs2StrLength;
|
|
|
|
CHAR8 *Utf8Str;
|
|
|
|
UINTN Utf8StrLength;
|
|
|
|
UINTN Utf8StrIndex;
|
|
|
|
CHAR8 Utf8Buffer[UTF8_BUFFER_FOR_UCS2_MAX_SIZE];
|
|
|
|
UINT8 Utf8BufferSize;
|
2021-12-05 23:54:11 +01:00
|
|
|
|
2020-12-08 02:56:56 +01:00
|
|
|
if ((Ucs2Str == NULL) || (Utf8StrAddr == NULL)) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ucs2StrLength = StrLen (Ucs2Str);
|
|
|
|
Utf8StrLength = 0;
|
|
|
|
|
|
|
|
for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex++) {
|
|
|
|
ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
|
|
|
|
Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
|
|
|
|
Utf8StrLength += Utf8BufferSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
Utf8Str = AllocateZeroPool (Utf8StrLength + 1);
|
|
|
|
if (Utf8Str == NULL) {
|
|
|
|
return EFI_OUT_OF_RESOURCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
Utf8StrIndex = 0;
|
|
|
|
for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex++) {
|
|
|
|
ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
|
|
|
|
Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
|
|
|
|
|
|
|
|
CopyMem (Utf8Str + Utf8StrIndex, Utf8Buffer, Utf8BufferSize);
|
|
|
|
Utf8StrIndex += Utf8BufferSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
Utf8Str[Utf8StrIndex] = '\0';
|
|
|
|
*Utf8StrAddr = Utf8Str;
|
|
|
|
|
|
|
|
return EFI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Convert a UTF8 encoded string to a UCS2 string.
|
|
|
|
|
|
|
|
@param[in] Utf8Str The provided UTF8 encoded string.
|
|
|
|
@param[out] Ucs2StrAddr The converted UCS2 string address. Caller
|
|
|
|
is responsible for Free this string.
|
|
|
|
|
|
|
|
@retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid to
|
|
|
|
convert to UCS2 string.
|
|
|
|
One or more parameters are invalid.
|
|
|
|
@retval EFI_OUT_OF_RESOURCES System runs out of resources.
|
|
|
|
@retval EFI_SUCCESS The UCS2 string has been converted.
|
|
|
|
|
|
|
|
**/
|
|
|
|
EFI_STATUS
|
|
|
|
UTF8StrToUCS2 (
|
|
|
|
IN CHAR8 *Utf8Str,
|
|
|
|
OUT CHAR16 **Ucs2StrAddr
|
|
|
|
)
|
|
|
|
{
|
|
|
|
EFI_STATUS Status;
|
|
|
|
UINTN Utf8StrIndex;
|
|
|
|
UINTN Utf8StrLength;
|
|
|
|
UINTN Ucs2StrIndex;
|
|
|
|
UINT8 Utf8BufferSize;
|
|
|
|
CHAR16 *Ucs2StrTemp;
|
2021-12-05 23:54:11 +01:00
|
|
|
|
2020-12-08 02:56:56 +01:00
|
|
|
if ((Utf8Str == NULL) || (Ucs2StrAddr == NULL)) {
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// It is not an Ascii string, calculate string length.
|
|
|
|
//
|
|
|
|
Utf8StrLength = 0;
|
|
|
|
while (*(Utf8Str + Utf8StrLength) != '\0') {
|
|
|
|
Utf8StrLength++;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// UCS2 string shall not be longer than the UTF8 string.
|
|
|
|
//
|
|
|
|
Ucs2StrTemp = AllocateZeroPool ((Utf8StrLength + 1) * sizeof (CHAR16));
|
|
|
|
if (Ucs2StrTemp == NULL) {
|
|
|
|
return EFI_OUT_OF_RESOURCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
Utf8StrIndex = 0;
|
|
|
|
Ucs2StrIndex = 0;
|
|
|
|
while (Utf8Str[Utf8StrIndex] != '\0') {
|
|
|
|
if ((CompareMem (Utf8Str + Utf8StrIndex, "\\u", 2) == 0) &&
|
|
|
|
(Utf8StrLength - Utf8StrIndex >= UNICODE_FORMAT_LEN))
|
|
|
|
{
|
|
|
|
Status = GetUCS2CharByFormat (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
|
|
|
|
if (!EFI_ERROR (Status)) {
|
|
|
|
Utf8StrIndex += UNICODE_FORMAT_LEN;
|
|
|
|
Ucs2StrIndex++;
|
|
|
|
} else {
|
|
|
|
StrCpyS (Ucs2StrTemp + Ucs2StrIndex, 3, L"\\u");
|
|
|
|
|
|
|
|
Ucs2StrIndex += 2;
|
|
|
|
Utf8StrIndex += 2;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Utf8BufferSize = GetUTF8SizeForUCS2 (Utf8Str + Utf8StrIndex);
|
|
|
|
if ((Utf8BufferSize == 0) || (Utf8StrLength - Utf8StrIndex < Utf8BufferSize)) {
|
|
|
|
FreePool (Ucs2StrTemp);
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status = UTF8ToUCS2Char (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
|
|
|
|
if (EFI_ERROR (Status)) {
|
|
|
|
FreePool (Ucs2StrTemp);
|
|
|
|
return EFI_INVALID_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ucs2StrIndex++;
|
|
|
|
Utf8StrIndex += Utf8BufferSize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*Ucs2StrAddr = AllocateZeroPool ((Ucs2StrIndex + 1) * sizeof (CHAR16));
|
|
|
|
if (*Ucs2StrAddr == NULL) {
|
|
|
|
FreePool (Ucs2StrTemp);
|
|
|
|
return EFI_OUT_OF_RESOURCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
StrCpyS (*Ucs2StrAddr, Ucs2StrIndex + 1, Ucs2StrTemp);
|
|
|
|
*(*Ucs2StrAddr + Ucs2StrIndex) = L'\0';
|
|
|
|
FreePool (Ucs2StrTemp);
|
|
|
|
|
|
|
|
return EFI_SUCCESS;
|
|
|
|
}
|