Browse Source

Use U+FFFD on Debug code too, and finish test cases for replacement

Michael Hansen 10 years ago
parent
commit
56553c8271
  1. 20
      Sources/Plasma/CoreLib/plString.cpp
  2. 26
      Sources/Tests/CoreTests/test_plString.cpp

20
Sources/Plasma/CoreLib/plString.cpp

@ -171,19 +171,25 @@ void plString::IConvertFromUtf16(const uint16_t *utf16, size_t size)
plUniChar unichar = 0x10000;
if (sp + 1 >= utf16 + size) {
hsAssert(0, "Incomplete surrogate pair in UTF-16 data");
// Incomplete surrogate pair
unichar = BADCHAR_REPLACEMENT;
} else if (*sp < 0xDC00) {
unichar += (*sp++ & 0x3FF) << 10;
hsAssert(*sp >= 0xDC00 && *sp <= 0xDFFF,
"Invalid surrogate pair in UTF-16 data");
if (*sp < 0xDC00 || *sp > 0xDFFF) {
// Invalid surrogate pair
unichar = BADCHAR_REPLACEMENT;
} else {
unichar += (*sp & 0x3FF);
}
} else {
unichar += (*sp++ & 0x3FF);
hsAssert(*sp >= 0xD800 && *sp < 0xDC00,
"Invalid surrogate pair in UTF-16 data");
if (*sp < 0xD800 || *sp >= 0xDC00) {
// Invalid surrogate pair
unichar = BADCHAR_REPLACEMENT;
} else {
unichar += (*sp & 0x3FF) << 10;
}
}
*dp++ = 0xF0 | ((unichar >> 18) & 0x07);
*dp++ = 0x80 | ((unichar >> 12) & 0x3F);
*dp++ = 0x80 | ((unichar >> 6) & 0x3F);
@ -229,8 +235,8 @@ void plString::IConvertFromUtf32(const plUniChar *ustr, size_t size)
const plUniChar *sp = ustr;
while (sp < ustr + size) {
if (*sp > 0x10FFFF) {
hsAssert(0, "UTF-32 character out of range");
convlen += 3; // Use U+FFFD for release builds
// Invalid character gets replaced with U+FFFD
convlen += 3;
}
else if (*sp > 0xFFFF)
convlen += 4;

26
Sources/Tests/CoreTests/test_plString.cpp

@ -123,18 +123,30 @@ TEST(PlStringTest, ConvertInvalid)
{
// The following should encode replacement characters for invalid chars
const plUniChar unicode_replacement[] = { 0xfffd, 0 };
const char latin1_replacement[] = "?";
const plUniChar char_too_big[] = { 0xffffff, 0 };
plUnicodeBuffer too_big = plString::FromUtf32(char_too_big).GetUnicodeArray();
// Character outside of Unicode specification range
const plUniChar too_big_c[] = { 0xffffff, 0 };
plUnicodeBuffer too_big = plString::FromUtf32(too_big_c).GetUnicodeArray();
EXPECT_EQ(0, T_strcmp(unicode_replacement, too_big.GetData()));
// TODO: Invalid surrogate pairs can encode to 0xfffd, but it's handled
// by an assert right now.
// Invalid surrogate pairs
const uint16_t incomplete_surr_c[] = { 0xd800, 0 };
plString incomplete_surr = plString::FromUtf16(incomplete_surr_c);
EXPECT_EQ(0, T_strcmp(unicode_replacement,
incomplete_surr.GetUnicodeArray().GetData()));
const uint16_t double_low_c[] = { 0xd800, 0xd801, 0 };
plString double_low = plString::FromUtf16(double_low_c);
EXPECT_EQ(0, T_strcmp(unicode_replacement, double_low.GetUnicodeArray().GetData()));
const uint16_t bad_combo_c[] = { 0xdc00, 0x20, 0 };
plString bad_combo = plString::FromUtf16(double_low_c);
EXPECT_EQ(0, T_strcmp(unicode_replacement, bad_combo.GetUnicodeArray().GetData()));
// ISO-8859-1 doesn't have \ufffd, so it uses '?' instead
const plUniChar high_char[] = { 0x1ff, 0 };
const char latin1_replacement[] = "?";
plStringBuffer<char> non_latin1 = plString::FromUtf32(high_char).ToIso8859_1();
const plUniChar non_latin1_c[] = { 0x1ff, 0 };
plStringBuffer<char> non_latin1 = plString::FromUtf32(non_latin1_c).ToIso8859_1();
EXPECT_STREQ(latin1_replacement, non_latin1.GetData());
}

Loading…
Cancel
Save