Add a Doxyfile and some plString documentation

13 years ago · dbb3b1a5b6
3 changed files with 2155 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@ -17,6 +17,7 @@ cmake_install.cmake
 install_manifest.txt
 build/
 build-*/
+Docs/Doxygen/

 *.aps
 *.user
--- a/1869
+++ b/1869
--- a/Sources/Plasma/CoreLib/plString.h
+++ b/Sources/Plasma/CoreLib/plString.h
@ -46,12 +46,19 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
 #include "HeadSpin.h"
 #include <vector>

+/** Single Unicode character code unit */
 typedef unsigned int UniChar;

 #define SSO_CHARS (16)
 #define STRING_STACK_SIZE (256)
 #define WHITESPACE_CHARS " \t\n\r"

+/** Ref-counted string data buffer.
+ *  This is used to store actual string data in any (unchecked) encoding format,
+ *  including both the internal UTF-8 data of plString itself as well as the
+ *  temporaries returned in the conversion operators.
+ *  \sa plString
+ */
 template <typename _Ch>
 class plStringBuffer
 {
@ -83,8 +90,10 @@ private:
    bool IHaveACow() const { return fSize >= SSO_CHARS; }

 public:
+    /** Construct an empty string buffer. */
    plStringBuffer() : fSize(0) { memset(fShort, 0, sizeof(fShort)); }

+    /** Copy constructor - adds a reference to the copied buffer */
    plStringBuffer(const plStringBuffer<_Ch> &copy) : fSize(copy.fSize)
    {
        memcpy(fShort, copy.fShort, sizeof(fShort));
@ -92,6 +101,11 @@ public:
            fData->AddRef();
    }

+    /** Construct a string buffer which holds a COPY of the \a data, up to
+     *  \a size characters.  The terminating '\0' is added automatically,
+     *  meaning this constructor is safe to use on buffers which are not
+     *  already null-terminated.
+     */
    plStringBuffer(const _Ch *data, size_t size) : fSize(size)
    {
        memset(fShort, 0, sizeof(fShort));
@ -103,12 +117,18 @@ public:
            fData = new StringRef(copyData);
    }

+    /** Destructor.  The ref-counted data will only be freed if no other
+     *  string buffers still reference it.
+     */
    ~plStringBuffer<_Ch>()
    {
        if (IHaveACow())
            fData->DecRef();
    }

+    /** Assignment operator.  Changes the reference to point to the
+     *  copied buffer in \a copy.
+     */
    plStringBuffer<_Ch> &operator=(const plStringBuffer<_Ch> &copy)
    {
        if (copy.IHaveACow())
@ -121,12 +141,29 @@ public:
        return *this;
    }

+    /** Returns a pointer to the referenced string buffer. */
    const _Ch *GetData() const { return IHaveACow() ? fData->fStringData : fShort; }
+
+    /** Returns the number of characters (not including the '\0') in the
+     *  referenced string buffer.
+     */
    size_t GetSize() const { return fSize; }
+
+    /** Cast operator.  This is a shortcut for not needing to call GetData on
+     *  buffer objects passed to methods or objects expecting a C-style string.
+     */
    operator const _Ch *() const { return GetData(); }

-    // From Haxxia with love
-    // NOTE:  The client is expected to nul-terminate the returned buffer!
+    /** Create a writable buffer for \a size characters.
+     *  From Haxxia with love!  This will release the current string buffer
+     *  reference and then create a new buffer with space for \a size
+     *  characters, plus one extra for the terminating '\0'.  The newly
+     *  allocated buffer is returned as a non-const pointer, so it can be
+     *  written to without having to use a \c const_cast.
+     *  \warning The caller is expected to null-terminate the returned buffer.
+     *           Not doing so may cause problems for functions and objects
+     *           expecting a null-terminated C-style string.
+     */
    _Ch *CreateWritableBuffer(size_t size)
    {
        if (IHaveACow())
@ -143,15 +180,30 @@ public:
    }
 };

+/** A plStringBuffer for storing fully-expanded Unicode data */
 typedef plStringBuffer<UniChar> plUnicodeBuffer;

-
+/** Unicode-capable and (mostly) binary safe string class.
+ *  plString stores SSO-optimized or reference counted strings (automatically
+ *  determined based on string length) for easy and performant string data
+ *  storage and manipulation.  The internal format of plString is UTF-8,
+ *  meaning it keeps all Unicode information from conversions.  plStrings
+ *  are safe to share without making explicit copies, since plStrings
+ *  follow the strings-are-immutable philosophy.  Anything which mutates
+ *  a plString object will do so in a new string buffer, allowing other
+ *  string objects to retain the old data without getting unexpected changes.
+ */
 class plString
 {
 public:
    enum {
+        /** Automatically determine the size of input (Requires input to be
+         *  correctly null-terminated).
+         */
        kSizeAuto = (size_t)(0x80000000)
    };
+
+    /** Represents a "null" plString object. */
    static const plString Null;

 private:
@ -164,21 +216,51 @@ private:
    void IConvertFromIso8859_1(const char *astr, size_t size);

 public:
+    /** Construct a valid, empty string. */
    plString() { }

+    /** Construct a string from a C-style string.
+     *  \note This constructor expects the input to be UTF-8 encoded.  For
+     *        conversion from ISO-8859-1 8-bit data, use FromIso8859_1().
+     */
    plString(const char *cstr) { IConvertFromUtf8(cstr, kSizeAuto); }
+
+    /** Copy constructor. */
    plString(const plString &copy) : fUtf8Buffer(copy.fUtf8Buffer) { }
+
+    /** Copy constructor from plStringBuffer<char>.
+     *  \note This constructor expects the input to be UTF-8 encoded.  For
+     *        conversion from ISO-8859-1 8-bit data, use FromIso8859_1().
+     */
    plString(const plStringBuffer<char> &init) { operator=(init); }
+
+    /** Construct a string from expanded Unicode data. */
    plString(const plUnicodeBuffer &init) { IConvertFromUtf32(init.GetData(), init.GetSize()); }

+    /** Assignment operator.  Same as plString(const char *). */
    plString &operator=(const char *cstr) { IConvertFromUtf8(cstr, kSizeAuto); return *this; }
+
+    /** Assignment operator.  Same as plString(const plString &). */
    plString &operator=(const plString &copy) { fUtf8Buffer = copy.fUtf8Buffer; return *this; }
+
+    /** Assignment operator.  Same as plString(const plStringBuffer<char> &). */
    plString &operator=(const plStringBuffer<char> &init);
+
+    /** Assignment operator.  Same as plString(const plUnicodeBuffer &). */
    plString &operator=(const plUnicodeBuffer &init) { IConvertFromUtf32(init.GetData(), init.GetSize()); return *this; }

+    /** Append UTF-8 data from a C-style string pointer to the end of this
+     *  string object.
+     *  \sa plStringStream
+     */
    plString &operator+=(const char *cstr) { return operator=(*this + cstr); }
+
+    /** Append the string \a str to the end of this string object.
+     *  \sa plStringStream
+     */
    plString &operator+=(const plString &str) { return operator=(*this + str); }

+    /** Create a new plString object from the UTF-8 formatted data in \a utf8. */
    static inline plString FromUtf8(const char *utf8, size_t size = kSizeAuto)
    {
        plString str;
@ -186,6 +268,7 @@ public:
        return str;
    }

+    /** Create a new plString object from the UTF-16 formatted data in \a utf16. */
    static inline plString FromUtf16(const uint16_t *utf16, size_t size = kSizeAuto)
    {
        plString str;
@ -193,6 +276,7 @@ public:
        return str;
    }

+    /** Create a new plString object from the \p wchar_t data in \a wstr. */
    static inline plString FromWchar(const wchar_t *wstr, size_t size = kSizeAuto)
    {
        plString str;
@ -200,6 +284,7 @@ public:
        return str;
    }

+    /** Create a new plString object from the ISO-8859-1 formatted data in \a astr. */
    static inline plString FromIso8859_1(const char *astr, size_t size = kSizeAuto)
    {
        plString str;
@ -207,123 +292,275 @@ public:
        return str;
    }

+    /** Return the internal UTF-8 data pointer for use in functions and objects
+     *  expecting C-style string pointers.  If this string is empty, returns
+     *  \a substitute instead.
+     */
    const char *c_str(const char *substitute = "") const
    { return IsEmpty() ? substitute : fUtf8Buffer.GetData(); }

+    /** Return the byte at position \a position.  Note that this may be in
+     *  the middle of a UTF-8 sequence -- if you want an actual Unicode
+     *  character, use the buffer returned from GetUnicodeArray() instead.
+     */
    char CharAt(size_t position) const { return c_str()[position]; }

+    /** Returns the internal UTF-8 data buffer object. */
    plStringBuffer<char> ToUtf8() const { return fUtf8Buffer; }
+
+    /** Convert this string's data to a UTF-16 string buffer. */
    plStringBuffer<uint16_t> ToUtf16() const;
+
+    /** Convert this string's data to a wchar_t string buffer.
+     *  \note Depending on your platform and compiler configuration, this
+     *        will either return UTF-16 or UTF-32 data -- it will never
+     *        return a non-unicode data buffer.
+     */
    plStringBuffer<wchar_t> ToWchar() const;
+
+    /** Convert this string's data as closely as possible to ISO-8859-1.
+     *  Unicode characters outside of the ISO-8859-1 range will be stored
+     *  in the buffer as a question mark ('?').
+     */
    plStringBuffer<char> ToIso8859_1() const;

-    // For use in displaying characters in a GUI
+    /** Convert the string's data to a fully expanded UTF-32 buffer.  This
+     *  makes it easy to operate on actual Unicode characters instead of
+     *  UTF-8 bytes (e.g. for use in rendering characters to a display).
+     */
    plUnicodeBuffer GetUnicodeArray() const;

+    /** Returns the size in number of bytes (excluding the null-terminator) of
+     *  this string.
+     */
    size_t GetSize() const { return fUtf8Buffer.GetSize(); }
+
+    /** Returns \c true if this string is empty (""). */
    bool IsEmpty() const { return fUtf8Buffer.GetSize() == 0; }

-    // TODO: Evaluate whether Plasma actually needs to distinguish between
-    // empty and NULL strings.  Ideally, only IsEmpty should be required.
+    /** Returns \c true if this string is "null".  Currently, this is just
+     *  a synonym for IsEmpty(), as plString makes no distinction between
+     *  null and empty strings.
+     *  \todo Evaluate whether Plasma actually needs to distinguish between
+     *        empty and NULL strings.  Ideally, only IsEmpty should be required.
+     */
    bool IsNull() const { return IsEmpty(); }

+    /** Convert the string data to an integer in base \a base.
+     *  If base is set to 0, this function behaves like strtol, which checks
+     *  for hex or octal prefixes (e.g. 0777 or 0x1234), and assumes base 10
+     *  if none are found.
+     */
    int ToInt(int base = 0) const;
+
+    /** Convert the string to an unsigned integer in base \a base.
+     *  If base is set to 0, this function behaves like strtoul, which checks
+     *  for hex or octal prefixes (e.g. 0777 or 0x1234), and assumes base 10
+     *  if none are found.
+     */
    unsigned int ToUInt(int base = 0) const;
+
+    /** Convert the string to a floating point value. */
    float ToFloat() const;
+
+    /** Convert the string to a double precision floating point value. */
    double ToDouble() const;

+    /** Construct a plString using a printf-like format string. */
    static plString Format(const char *fmt, ...);
+
+    /** Construct a plString using a printf-like format string.
+     *  This function should be called inside of vararg functions, such as
+     *  plString::Format().
+     */
    static plString IFormat(const char *fmt, va_list vptr);

    enum CaseSensitivity {
        kCaseSensitive, kCaseInsensitive
    };

+    /** Compare this string with \a str.
+     *  \return an integer which indicates:
+     *    \li \p =0 - the strings are equal
+     *    \li \p \<0 - this string is lexicographically less than \a str
+     *    \li \p \>0 - this string is lexicographically greater than \a str
+     */
    int Compare(const plString &str, CaseSensitivity sense = kCaseSensitive) const
    {
        return (sense == kCaseSensitive) ? strcmp(c_str(), str.c_str())
                                         : stricmp(c_str(), str.c_str());
    }

+    /** Compare this string with \a str.
+     *  \return an integer which indicates:
+     *    \li \p =0 - the strings are equal
+     *    \li \p \<0 - this string is lexicographically less than \a str
+     *    \li \p \>0 - this string is lexicographically greater than \a str
+     */
    int Compare(const char *str, CaseSensitivity sense = kCaseSensitive) const
    {
        return (sense == kCaseSensitive) ? strcmp(c_str(), str)
                                         : stricmp(c_str(), str);
    }

+    /** Compare up to but never exceeding the first \a count bytes of this
+     *  string with \a str.
+     *  \sa Compare(const plString &, CaseSensitivity) const
+     */
    int CompareN(const plString &str, size_t count, CaseSensitivity sense = kCaseSensitive) const
    {
        return (sense == kCaseSensitive) ? strncmp(c_str(), str.c_str(), count)
                                         : strnicmp(c_str(), str.c_str(), count);
    }

+    /** Compare up to but never exceeding the first \a count bytes of this
+     *  string with \a str.
+     *  \sa Compare(const char *, CaseSensitivity) const
+     */
    int CompareN(const char *str, size_t count, CaseSensitivity sense = kCaseSensitive) const
    {
        return (sense == kCaseSensitive) ? strncmp(c_str(), str, count)
                                         : strnicmp(c_str(), str, count);
    }

+    /** Shortcut for Compare(str, kCaseInsensitive). */
    int CompareI(const plString &str) const { return Compare(str, kCaseInsensitive); }
+
+    /** Shortcut for Compare(str, kCaseInsensitive). */
    int CompareI(const char *str) const { return Compare(str, kCaseInsensitive); }
+
+    /** Shortcut for CompareN(str, kCaseInsensitive). */
    int CompareNI(const plString &str, size_t count) const { return CompareN(str, count, kCaseInsensitive); }
+
+    /** Shortcut for CompareN(str, kCaseInsensitive). */
    int CompareNI(const char *str, size_t count) const { return CompareN(str, count, kCaseInsensitive); }

+    /** Operator overload for use in containers which depend on \c std::less. */
    bool operator<(const plString &other) const { return Compare(other) < 0; }
+
+    /** Test if this string contains the same string data as \a other. */
    bool operator==(const char *other) const { return Compare(other) == 0; }
+
+    /** Test if this string contains the same string data as \a other. */
    bool operator==(const plString &other) const { return Compare(other) == 0; }
+
+    /** Inverse of operator==(const char *) const. */
    bool operator!=(const char *other) const { return Compare(other) != 0; }
+
+    /** Inverse of operator==(const plString &) const. */
    bool operator!=(const plString &other) const { return Compare(other) != 0; }

+    /** Find the index of the first instance of \a ch in this string.
+     *  \return -1 if the character was not found.
+     */
    int Find(char ch, CaseSensitivity sense = kCaseSensitive) const;
+
+    /** Find the index of the last instance of \a ch in this string.
+     *  \return -1 if the character was not found.
+     */
    int FindLast(char ch, CaseSensitivity sense = kCaseSensitive) const;

+    /** Find the index of the first instance of \a str in this string.
+     *  \return -1 if the substring was not found.
+     */
    int Find(const char *str, CaseSensitivity sense = kCaseSensitive) const;
+
+    /** Find the index of the first instance of \a str in this string.
+     *  \return -1 if the substring was not found.
+     */
    int Find(const plString &str, CaseSensitivity sense = kCaseSensitive) const
    { return Find(str.c_str(), sense); }

+    /** Trim any characters in the supplied \a charset from the left of
+     *  this string.
+     */
    plString TrimLeft(const char *charset = WHITESPACE_CHARS) const;
+
+    /** Trim any characters in the supplied \a charset from the right of
+     *  this string.
+     */
    plString TrimRight(const char *charset = WHITESPACE_CHARS) const;
+
+    /** Trim any characters in the supplied \a charset from both ends of
+     *  this string.  Logically equivalent to (but more efficient than)
+     *  str.TrimLeft(charset).TrimRight(charset)
+     */
    plString Trim(const char *charset = WHITESPACE_CHARS) const;

+    /** Return a substring starting at index \a start, with up to \a size
+     *  characters from the start position.  If \a size is greater than the
+     *  number of characters left in the string after \a start, Substr will
+     *  return the remainder of the string.
+     */
    plString Substr(int start, size_t size = kSizeAuto) const;
+
+    /** Return a substring containing at most \a size characters from the left
+     *  of the string.  Equivalent to Substr(0, size).
+     */
    plString Left(size_t size) const { return Substr(0, size); }
+
+    /** Return a substring containing at most \a size characters from the right
+     *  of the string.  Equivalent to Substr(GetSize() - size, size).
+     */
    plString Right(size_t size) const { return Substr(GetSize() - size, size); }

+    /** Return a copy of this string with all occurances of \a from replaced
+     *  with \a to. */
    plString Replace(const char *from, const char *to) const;

-    // NOTE:  Does Compare(blah, kCaseInsensitive) make more sense?  If
-    //        so, use that instead -- it's faster and more efficient!
+    /** Return a copy of this string with all Latin-1 alphabetic characters
+     *  converted to upper case.
+     *  \sa CompareI()
+     */
    plString ToUpper() const;
+
+    /** Return a copy of this string with all Latin-1 alphabetic characters
+     *  converted to lower case.
+     *  \sa CompareI()
+     */
    plString ToLower() const;

-    // Should replace other tokenization methods.  The difference between Split
-    // and Tokenize is that Tokenize never returns a blank string (it strips
-    // all delimiters and only returns the pieces left between them), whereas
-    // Split will split on a full string, returning whatever is left between.
+    /** Split this string into pieces separated by the substring \a split.
+     *  This will return the complete contents of everything between split
+     *  markers, meaning that two subsequent markers will produce an empty
+     *  string in the returned vector.
+     *  \sa Tokenize()
+     */
    std::vector<plString> Split(const char *split, size_t maxSplits = kSizeAuto) const;
+
+    /** Split this string into tokens, delimited by \a delims.
+     *  Note that, unlike Split(), Tokenize will return only non-blank strings
+     *  after stripping out all delimiters between tokens.
+     *  \sa Split()
+     */
    std::vector<plString> Tokenize(const char *delims = WHITESPACE_CHARS) const;

+    /** Create a string initialized with \a count copies of the character \a c. */
    static plString Fill(size_t count, char c);

 public:
+    /** Functor which compares two strings case-sensitively for sorting. */
    struct less
    {
        bool operator()(const plString &_L, const plString &_R) const
        { return _L.Compare(_R, kCaseSensitive) < 0; }
    };

+    /** Functor which compares two strings case-insensitively for sorting. */
    struct less_i
    {
        bool operator()(const plString &_L, const plString &_R) const
        { return _L.Compare(_R, kCaseInsensitive) < 0; }
    };

+    /** Functor which compares two strings case-sensitively for equality. */
    struct equal
    {
        bool operator()(const plString &_L, const plString &_R) const
        { return _L.Compare(_R, kCaseSensitive) == 0; }
    };

+    /** Functor which compares two strings case-insensitively for equality. */
    struct equal_i
    {
        bool operator()(const plString &_L, const plString &_R) const
@ -336,37 +573,71 @@ private:
    friend plString operator+(const char *left, const plString &right);
 };

+/** Concatenation operator for plStrings. */
 plString operator+(const plString &left, const plString &right);
+
+/** Concatenation operator for plStrings and UTF-8 C-style string data. */
 plString operator+(const plString &left, const char *right);
+
+/** Concatenation operator for plStrings and UTF-8 C-style string data. */
 plString operator+(const char *left, const plString &right);


+/** Helper class for writing frequent data to a text buffer efficiently.
+ *  This should be used instead of plString::operator+=() for constructing
+ *  string data in pieces, as it keeps a running buffer instead of allocating
+ *  new storage for each append result.
+ */
 class plStringStream
 {
 public:
+    /** Construct a new empty string stream.  The first STRING_STACK_SIZE
+     *  bytes are allocated on the stack for further efficiency.
+     */
    plStringStream() : fLength(0) { }
+
+    /** Destructor, frees any allocated heap memory owned by the stream. */
    ~plStringStream() { if (ICanHasHeap()) delete [] fBuffer; }

+    /** Append string data to the end of the stream. */
    plStringStream &append(const char *data, size_t length);

+    /** Append UTF-8 C-style string data to the stream. */
    plStringStream &operator<<(const char *text);
+
+    /** Append a base-10 formatted signed integer to the stream. */
    plStringStream &operator<<(int num);
+
+    /** Append a base-10 formatted unsigned integer to the stream. */
    plStringStream &operator<<(unsigned int num);
+
+    /** Append a base-10 formatted float to the stream. */
    plStringStream &operator<<(float num) { return operator<<(static_cast<double>(num)); }
+
+    /** Append a base-10 formatted double to the stream. */
    plStringStream &operator<<(double num);
+
+    /** Append a single Latin-1 character to the stream. */
    plStringStream &operator<<(char ch) { return append(&ch, 1); }

+    /** Append the contents of \a text to the stream. */
    plStringStream &operator<<(const plString &text)
    {
        return append(text.c_str(), text.GetSize());
    }

-    const char *GetRawBuffer() const // WARNING:  Not null-terminated!
+    /** Returns a pointer to the beginning of the stream buffer.
+     *  \warning This pointer is not null-terminated.
+     */
+    const char *GetRawBuffer() const
    {
        return ICanHasHeap() ? fBuffer : fShort;
    }

+    /** Return the size (in bytes) of the stream's data. */
    size_t GetLength() const { return fLength; }
+
+    /** Convert the stream's data to a UTF-8 string. */
    plString GetString() { return plString::FromUtf8(GetRawBuffer(), fLength); }

 private:
@ -382,6 +653,7 @@ private:
    bool ICanHasHeap() const { return fLength > STRING_STACK_SIZE; }
 };

+/** \p strlen implementation for UniChar based C-style string buffers. */
 size_t ustrlen(const UniChar *ustr, size_t max = plString::kSizeAuto);

 #endif //plString_Defined