Add string split and tokenize functions

2025-07-19 19:59:09 +00:00 · 2012-11-18 02:56:47 -08:00
parent 1b57055fd3
commit 188171235e
2 changed files with 62 additions and 1 deletions
--- a/Sources/Plasma/CoreLib/plString.cpp
+++ b/Sources/Plasma/CoreLib/plString.cpp
@ -700,6 +700,60 @@ plString plString::ToLower() const
    return str;
 }
 static bool ch_in_set(char ch, const char *set)
 {
    for (const char *s = set; *s; ++s) {
        if (ch == *s)
            return true;
    }
    return false;
 }
 std::vector<plString> plString::Tokenize(const char *delims)
 {
    std::vector<plString> result;
    const char *next = c_str();
    const char *end = next + GetSize();  // So binary strings work
    while (next != end) {
        const char *cur = next;
        while (cur != end && !ch_in_set(*cur, delims))
            ++cur;
        // Found a delimiter
        if (cur != next)
            result.push_back(plString::FromUtf8(next, cur - next));
        next = cur;
        while (next != end && ch_in_set(*next, delims))
            ++next;
    }
    return result;
 }
 //TODO: Not binary safe
 std::vector<plString> plString::Split(const char *split, size_t maxSplits)
 {
    std::vector<plString> result;
    const char *next = c_str();
    size_t splitlen = strlen(split);
    while (maxSplits > 0) {
        const char *sp = strstr(next, split);
        if (!sp)
            break;
        result.push_back(plString::FromUtf8(next, sp - next));
        next = sp + splitlen;
        --maxSplits;
    }
    result.push_back(plString::FromUtf8(next));
    return result;
 }
 plString operator+(const plString &left, const plString &right)
 {
    plString cat;
--- a/Sources/Plasma/CoreLib/plString.h
+++ b/Sources/Plasma/CoreLib/plString.h
@ -44,7 +44,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
 #define plString_Defined
 #include "HeadSpin.h"
-#include <stddef.h>
+#include <vector>
 typedef unsigned int UniChar;
@ -287,6 +287,13 @@ public:
    plString ToUpper() const;
    plString ToLower() const;
    // Should replace other tokenization methods.  The difference between Split
    // and Tokenize is that Tokenize never returns a blank string (it strips
    // all delimiters and only returns the pieces left between them), whereas
    // Split will split on a full string, returning whatever is left between.
    std::vector<plString> Split(const char *split, size_t maxSplits = kSizeAuto);
    std::vector<plString> Tokenize(const char *delims = " \t\r\n\f\v");
 public:
    struct less
    {