Utility::ValidateUTF8(): use UTF8-CPP

This commit is contained in:
Alexander A. Klimov 2019-03-14 09:34:51 +01:00
parent 85c0d2c2cc
commit a72f4db5c9

View File

@ -20,8 +20,11 @@
#include <ios> #include <ios>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <iterator>
#include <stdlib.h> #include <stdlib.h>
#include <future> #include <future>
#include <utf8.h>
#include <vector>
#ifdef __FreeBSD__ #ifdef __FreeBSD__
# include <pthread_np.h> # include <pthread_np.h>
@ -1701,40 +1704,20 @@ String Utility::GetPlatformArchitecture()
#endif /* _WIN32 */ #endif /* _WIN32 */
} }
const char l_Utf8Replacement[] = "\xEF\xBF\xBD";
String Utility::ValidateUTF8(const String& input) String Utility::ValidateUTF8(const String& input)
{ {
String output; std::vector<char> output;
size_t length = input.GetLength(); output.reserve(input.GetLength() * 3u);
for (size_t i = 0; i < length; i++) { try {
if ((input[i] & 0x80) == 0) { utf8::replace_invalid(input.Begin(), input.End(), std::back_inserter(output));
output += input[i]; } catch (const utf8::not_enough_room&) {
continue; output.insert(output.end(), (const char*)l_Utf8Replacement, (const char*)l_Utf8Replacement + 3);
}
if ((input[i] & 0xE0) == 0xC0 && length > i + 1 &&
(input[i + 1] & 0xC0) == 0x80) {
output += input[i];
output += input[i + 1];
i++;
continue;
}
if ((input[i] & 0xF0) == 0xE0 && length > i + 2 &&
(input[i + 1] & 0xC0) == 0x80 && (input[i + 2] & 0xC0) == 0x80) {
output += input[i];
output += input[i + 1];
output += input[i + 2];
i += 2;
continue;
}
output += '\xEF';
output += '\xBF';
output += '\xBD';
} }
return output; return String(output.begin(), output.end());
} }
String Utility::CreateTempFile(const String& path, int mode, std::fstream& fp) String Utility::CreateTempFile(const String& path, int mode, std::fstream& fp)