Utility: add a function to truncate strings while avoiding collisions

This commit is contained in:
Julian Brost 2021-06-17 08:49:54 +02:00
parent 5a114a6bf2
commit 97d6876803
3 changed files with 61 additions and 0 deletions

View File

@ -7,7 +7,9 @@
#include "base/string.hpp"
#include "base/array.hpp"
#include "base/threadpool.hpp"
#include "base/tlsutility.hpp"
#include <boost/thread/tss.hpp>
#include <openssl/sha.h>
#include <functional>
#include <typeinfo>
#include <vector>
@ -146,6 +148,40 @@ public:
static void IncrementTime(double);
#endif /* I2_DEBUG */
/**
* TruncateUsingHash truncates a given string to an allowed maximum length while avoiding collisions in the output
* using a hash function (SHA1).
*
* For inputs shorter than the maximum output length, the output will be the same as the input. If the input has at
* least the maximum output length, it is hashed used SHA1 and the output has the format "A...B" where A is a prefix
* of the input and B is the hex-encoded SHA1 hash of the input. The length of A is chosen so that the result has
* the maximum allowed output length.
*
* @tparam maxLength Maximum length of the output string (must be at least 44)
* @param in String to truncate
* @return A truncated string derived from in of at most length maxLength
*/
template<size_t maxLength>
static String TruncateUsingHash(const String &in) {
/*
* Note: be careful when changing this function as it is used to derive file names that should not change
* between versions or would need special handling if they do (/var/lib/icinga2/api/packages/_api).
*/
const size_t sha1HexLength = SHA_DIGEST_LENGTH*2;
static_assert(maxLength >= 1 + 3 + sha1HexLength,
"maxLength must be at least 44 to hold one character, '...', and a hex-encoded SHA1 hash");
/* If the input is shorter than the limit, no truncation is needed */
if (in.GetLength() < maxLength) {
return in;
}
const char *trunc = "...";
return in.SubStr(0, maxLength - sha1HexLength - strlen(trunc)) + trunc + SHA1(in);
}
private:
Utility();

View File

@ -117,6 +117,7 @@ add_boost_test(base
base_utility/comparepasswords_issafe
base_utility/validateutf8
base_utility/EscapeCreateProcessArg
base_utility/TruncateUsingHash
base_value/scalar
base_value/convert
base_value/format

View File

@ -111,4 +111,28 @@ BOOST_AUTO_TEST_CASE(EscapeCreateProcessArg)
#endif /* _WIN32 */
}
BOOST_AUTO_TEST_CASE(TruncateUsingHash)
{
/*
* Note: be careful when changing the output of TruncateUsingHash as it is used to derive file names that should not
* change between versions or would need special handling if they do (/var/lib/icinga2/api/packages/_api).
*/
/* minimum allowed value for maxLength template parameter */
BOOST_CHECK_EQUAL(Utility::TruncateUsingHash<44>(std::string(64, 'a')),
"a...0098ba824b5c16427bd7a1122a5a442a25ec644d");
BOOST_CHECK_EQUAL(Utility::TruncateUsingHash<80>(std::string(100, 'a')),
std::string(37, 'a') + "...7f9000257a4918d7072655ea468540cdcbd42e0c");
/* short enough values should not be truncated */
BOOST_CHECK_EQUAL(Utility::TruncateUsingHash<80>(""), "");
BOOST_CHECK_EQUAL(Utility::TruncateUsingHash<80>(std::string(60, 'a')), std::string(60, 'a'));
BOOST_CHECK_EQUAL(Utility::TruncateUsingHash<80>(std::string(79, 'a')), std::string(79, 'a'));
/* inputs of maxLength are hashed to avoid collisions */
BOOST_CHECK_EQUAL(Utility::TruncateUsingHash<80>(std::string(80, 'a')),
std::string(37, 'a') + "...86f33652fcffd7fa1443e246dd34fe5d00e25ffd");
}
BOOST_AUTO_TEST_SUITE_END()