diff --git a/Sources/Plasma/CoreLib/plString.cpp b/Sources/Plasma/CoreLib/plString.cpp index c60440c9..1c544a22 100644 --- a/Sources/Plasma/CoreLib/plString.cpp +++ b/Sources/Plasma/CoreLib/plString.cpp @@ -700,6 +700,60 @@ plString plString::ToLower() const return str; } +static bool ch_in_set(char ch, const char *set) +{ + for (const char *s = set; *s; ++s) { + if (ch == *s) + return true; + } + return false; +} + +std::vector plString::Tokenize(const char *delims) +{ + std::vector result; + + const char *next = c_str(); + const char *end = next + GetSize(); // So binary strings work + while (next != end) { + const char *cur = next; + while (cur != end && !ch_in_set(*cur, delims)) + ++cur; + + // Found a delimiter + if (cur != next) + result.push_back(plString::FromUtf8(next, cur - next)); + + next = cur; + while (next != end && ch_in_set(*next, delims)) + ++next; + } + + return result; +} + +//TODO: Not binary safe +std::vector plString::Split(const char *split, size_t maxSplits) +{ + std::vector result; + + const char *next = c_str(); + size_t splitlen = strlen(split); + while (maxSplits > 0) { + const char *sp = strstr(next, split); + + if (!sp) + break; + + result.push_back(plString::FromUtf8(next, sp - next)); + next = sp + splitlen; + --maxSplits; + } + + result.push_back(plString::FromUtf8(next)); + return result; +} + plString operator+(const plString &left, const plString &right) { plString cat; diff --git a/Sources/Plasma/CoreLib/plString.h b/Sources/Plasma/CoreLib/plString.h index 2014abb3..28b18430 100644 --- a/Sources/Plasma/CoreLib/plString.h +++ b/Sources/Plasma/CoreLib/plString.h @@ -44,7 +44,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #define plString_Defined #include "HeadSpin.h" -#include +#include typedef unsigned int UniChar; @@ -287,6 +287,13 @@ public: plString ToUpper() const; plString ToLower() const; + // Should replace other tokenization methods. The difference between Split + // and Tokenize is that Tokenize never returns a blank string (it strips + // all delimiters and only returns the pieces left between them), whereas + // Split will split on a full string, returning whatever is left between. + std::vector Split(const char *split, size_t maxSplits = kSizeAuto); + std::vector Tokenize(const char *delims = " \t\r\n\f\v"); + public: struct less {