From 66fe6ed73e1f6b80b83ee27a4b8eee7025f82c1a Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Tue, 18 Dec 2012 18:32:35 -0800 Subject: [PATCH] Add UnicodeBuffer as first class data type and allow plStrings to be constructed with it --- Sources/Plasma/CoreLib/plString.cpp | 33 ++++++++++++++++++++--------- Sources/Plasma/CoreLib/plString.h | 20 +++++++++++++---- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/Sources/Plasma/CoreLib/plString.cpp b/Sources/Plasma/CoreLib/plString.cpp index d56d2e4c..398d5073 100644 --- a/Sources/Plasma/CoreLib/plString.cpp +++ b/Sources/Plasma/CoreLib/plString.cpp @@ -204,19 +204,25 @@ void plString::IConvertFromWchar(const wchar_t *wstr, size_t size) // We assume that if sizeof(wchar_t) == 2, the data is UTF-16 already IConvertFromUtf16((const uint16_t *)wstr, size); #else + IConvertFromUtf32((const UniChar *)wstr, size); +#endif +} + +void plString::IConvertFromUtf32(const UniChar *ustr, size_t size) +{ fUtf8Buffer = plStringBuffer(); - if (wstr == nil) + if (ustr == nil) return; if ((int32_t)size < 0) - size = wcsnlen(wstr, -(int32_t)size); + size = ustrlen(ustr, -(int32_t)size); // Calculate the UTF-8 size size_t convlen = 0; - const wchar_t *sp = wstr; - while (sp < wstr + size) { + const UniChar *sp = ustr; + while (sp < ustr + size) { if (*sp > 0x10FFFF) { - hsAssert(0, "UCS-4 character out of range"); + hsAssert(0, "UTF-32 character out of range"); convlen += 3; // Use U+FFFD for release builds } else if (*sp > 0xFFFF) @@ -233,8 +239,8 @@ void plString::IConvertFromWchar(const wchar_t *wstr, size_t size) // And perform the actual conversion char *utf8 = fUtf8Buffer.CreateWritableBuffer(convlen); char *dp = utf8; - sp = wstr; - while (sp < wstr + size) { + sp = ustr; + while (sp < ustr + size) { if (*sp > 0x10FFFF) { // Character out of range; Use U+FFFD instead *dp++ = 0xE0 | ((BADCHAR_REPLACEMENT >> 12) & 0x0F); @@ -258,7 +264,6 @@ void plString::IConvertFromWchar(const wchar_t *wstr, size_t size) ++sp; } utf8[convlen] = 0; -#endif } void plString::IConvertFromIso8859_1(const char *astr, size_t size) @@ -462,9 +467,9 @@ plStringBuffer plString::ToIso8859_1() const return result; } -plStringBuffer plString::GetUnicodeArray() const +plUnicodeBuffer plString::GetUnicodeArray() const { - plStringBuffer result; + plUnicodeBuffer result; if (IsEmpty()) return result; @@ -822,3 +827,11 @@ plStringStream &plStringStream::operator<<(unsigned int num) snprintf(buffer, 12, "%u", num); return operator<<(buffer); } + +size_t ustrlen(const UniChar *ustr, size_t max) +{ + size_t length = 0; + for ( ; *ustr++ && max--; ++length) + ; + return length; +} diff --git a/Sources/Plasma/CoreLib/plString.h b/Sources/Plasma/CoreLib/plString.h index a8efd713..76a45087 100644 --- a/Sources/Plasma/CoreLib/plString.h +++ b/Sources/Plasma/CoreLib/plString.h @@ -121,7 +121,6 @@ public: const _Ch *GetData() const { return IHaveACow() ? fData->fStringData : fShort; } size_t GetSize() const { return fSize; } - operator const _Ch *() const { return GetData(); } // From Haxxia with love @@ -142,14 +141,15 @@ public: } }; +typedef plStringBuffer plUnicodeBuffer; + class plString { +public: enum { kSizeAuto = (size_t)(0x80000000) }; - -public: static const plString Null; private: @@ -158,6 +158,7 @@ private: void IConvertFromUtf8(const char *utf8, size_t size); void IConvertFromUtf16(const uint16_t *utf16, size_t size); void IConvertFromWchar(const wchar_t *wstr, size_t size); + void IConvertFromUtf32(const UniChar *ustr, size_t size); void IConvertFromIso8859_1(const char *astr, size_t size); public: @@ -166,10 +167,12 @@ public: plString(const char *cstr) { IConvertFromUtf8(cstr, kSizeAuto); } plString(const plString ©) : fUtf8Buffer(copy.fUtf8Buffer) { } plString(const plStringBuffer &init) { operator=(init); } + plString(const plUnicodeBuffer &init) { IConvertFromUtf32(init.GetData(), init.GetSize()); } plString &operator=(const char *cstr) { IConvertFromUtf8(cstr, kSizeAuto); return *this; } plString &operator=(const plString ©) { fUtf8Buffer = copy.fUtf8Buffer; return *this; } plString &operator=(const plStringBuffer &init); + plString &operator=(const plUnicodeBuffer &init) { IConvertFromUtf32(init.GetData(), init.GetSize()); } plString &operator+=(const char *cstr) { return operator=(*this + cstr); } plString &operator+=(const plString &str) { return operator=(*this + str); } @@ -213,7 +216,7 @@ public: plStringBuffer ToIso8859_1() const; // For use in displaying characters in a GUI - plStringBuffer GetUnicodeArray() const; + plUnicodeBuffer GetUnicodeArray() const; size_t GetSize() const { return fUtf8Buffer.GetSize(); } bool IsEmpty() const { return fUtf8Buffer.GetSize() == 0; } @@ -399,6 +402,13 @@ public: return ch; } + UniChar operator[](size_t offset) const + { + iterator copy(*this); + copy += offset; + return *copy; + } + bool AtEnd() const { return m_ptr >= m_end; } bool IsValid() const { return m_ptr != 0; } @@ -464,4 +474,6 @@ private: size_t fLength; }; +size_t ustrlen(const UniChar *ustr, size_t max = plString::kSizeAuto); + #endif //plString_Defined