JsonEncoder: let the serializer replace invalid UTF-8 characters

Replacing invalid UTF-8 characters beforehand by our selves doesn't make any sense, the serializer can literally perform the same replacement ops with the exact same Unicode replacement character (U+FFFD) on its own. So, why not just use it directly? Instead of wasting memory on a temporary `String` object to always UTF-8 validate every and each value, we just use the serializer to directly to dump the replaced char (if any) into the output writer. No memory waste, no fuss!
2025-07-31 01:24:19 +02:00 · 2025-07-04 16:51:26 +02:00 · 2025-07-04 16:51:26 +02:00 · 89418f38ee
commit 89418f38ee
parent dad4c0889f
2 changed files with 9 additions and 8 deletions
--- a/lib/base/json.cpp
+++ b/lib/base/json.cpp
@ -5,6 +5,7 @@
 #include "base/dictionary.hpp"
 #include "base/namespace.hpp"
 #include "base/objectlock.hpp"
 #include "base/utility.hpp"
 #include <boost/numeric/conversion/cast.hpp>
 #include <stack>
 #include <utility>
@ -56,7 +57,7 @@ void JsonEncoder::Encode(const Value& value, boost::asio::yield_context* yc)
 			Write(value.ToBool() ? "true" : "false");
 			break;
 		case ValueString:
-			EncodeNlohmannJson(Utility::ValidateUTF8(value.Get<String>()));
+			EncodeNlohmannJson(value.Get<String>());
 			break;
 		case ValueNumber:
 			EncodeNumber(value.Get<double>());
@ -76,7 +77,7 @@ void JsonEncoder::Encode(const Value& value, boost::asio::yield_context* yc)
 				EncodeValueGenerator(gen, yc);
 			} else {
 				// Some other non-serializable object type!
-				EncodeNlohmannJson(Utility::ValidateUTF8(obj->ToString()));
+				EncodeNlohmannJson(obj->ToString());
 			}
 			break;
 		}
@ -166,7 +167,7 @@ void JsonEncoder::EncodeObject(const Iterable& container, const ValExtractor& ex
 		WriteSeparatorAndIndentStrIfNeeded(!isEmpty);
 		isEmpty = false;
-		EncodeNlohmannJson(Utility::ValidateUTF8(key));
+		EncodeNlohmannJson(key);
 		Write(m_Pretty ? ": " : ":");
 		Encode(extractor(val), yc);
@ -179,13 +180,15 @@ void JsonEncoder::EncodeObject(const Iterable& container, const ValExtractor& ex
 * Dumps a nlohmann::json object to the output stream using the serializer.
 *
 * This function uses the @c nlohmann::detail::serializer to dump the provided @c nlohmann::json
- * object to the output stream managed by the @c JsonEncoder.
+ * object to the output stream managed by the @c JsonEncoder. Strings will be properly escaped, and
 * if any invalid UTF-8 sequences are encountered, it will replace them with the Unicode replacement
 * character (U+FFFD).
 *
 * @param json The nlohmann::json object to encode.
 */
 void JsonEncoder::EncodeNlohmannJson(const nlohmann::json& json) const
 {
-	nlohmann::detail::serializer<nlohmann::json> s(m_Writer, ' ', nlohmann::json::error_handler_t::strict);
+	nlohmann::detail::serializer<nlohmann::json> s(m_Writer, ' ', nlohmann::json::error_handler_t::replace);
 	s.dump(json, m_Pretty, true, 0, 0);
 }
--- a/lib/base/json.hpp
+++ b/lib/base/json.hpp
@ -6,7 +6,6 @@
 #include "base/i2-base.hpp"
 #include "base/array.hpp"
 #include "base/generator.hpp"
 #include "base/utility.hpp"
 #include <boost/asio/spawn.hpp>
 #include <json.hpp>
@ -58,8 +57,7 @@ class Value;
 * The JSON encoder generates most of the low level JSON tokens, but it still relies on the already existing
 * @c nlohmann::detail::serializer<> class to dump numbers and ASCII validated JSON strings. This means that the
 * encoder doesn't perform any kind of JSON validation or escaping on its own, but simply delegates all this kind
- * of work to serializer<>. However, Strings are UTF-8 validated beforehand using the @c Utility::ValidateUTF8()
+ * of work to serializer<>.
 * function and only the validated (copy of the original) String is passed to the serializer.
 *
 * The generated JSON can be either prettified or compact, depending on your needs. The prettified JSON object
 * is indented with 4 spaces and grows linearly with the depth of the object tree.