Browse Source

Add a Doxyfile and some plString documentation

Michael Hansen 12 years ago
parent
commit
dbb3b1a5b6
  1. 1
      .gitignore
  2. 1869
      Doxyfile
  3. 298
      Sources/Plasma/CoreLib/plString.h

1
.gitignore vendored

@ -17,6 +17,7 @@ cmake_install.cmake
install_manifest.txt
build/
build-*/
Docs/Doxygen/
*.aps
*.user

1869
Doxyfile

File diff suppressed because it is too large Load Diff

298
Sources/Plasma/CoreLib/plString.h

@ -46,12 +46,19 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
#include "HeadSpin.h"
#include <vector>
/** Single Unicode character code unit */
typedef unsigned int UniChar;
#define SSO_CHARS (16)
#define STRING_STACK_SIZE (256)
#define WHITESPACE_CHARS " \t\n\r"
/** Ref-counted string data buffer.
* This is used to store actual string data in any (unchecked) encoding format,
* including both the internal UTF-8 data of plString itself as well as the
* temporaries returned in the conversion operators.
* \sa plString
*/
template <typename _Ch>
class plStringBuffer
{
@ -83,8 +90,10 @@ private:
bool IHaveACow() const { return fSize >= SSO_CHARS; }
public:
/** Construct an empty string buffer. */
plStringBuffer() : fSize(0) { memset(fShort, 0, sizeof(fShort)); }
/** Copy constructor - adds a reference to the copied buffer */
plStringBuffer(const plStringBuffer<_Ch> &copy) : fSize(copy.fSize)
{
memcpy(fShort, copy.fShort, sizeof(fShort));
@ -92,6 +101,11 @@ public:
fData->AddRef();
}
/** Construct a string buffer which holds a COPY of the \a data, up to
* \a size characters. The terminating '\0' is added automatically,
* meaning this constructor is safe to use on buffers which are not
* already null-terminated.
*/
plStringBuffer(const _Ch *data, size_t size) : fSize(size)
{
memset(fShort, 0, sizeof(fShort));
@ -103,12 +117,18 @@ public:
fData = new StringRef(copyData);
}
/** Destructor. The ref-counted data will only be freed if no other
* string buffers still reference it.
*/
~plStringBuffer<_Ch>()
{
if (IHaveACow())
fData->DecRef();
}
/** Assignment operator. Changes the reference to point to the
* copied buffer in \a copy.
*/
plStringBuffer<_Ch> &operator=(const plStringBuffer<_Ch> &copy)
{
if (copy.IHaveACow())
@ -121,12 +141,29 @@ public:
return *this;
}
/** Returns a pointer to the referenced string buffer. */
const _Ch *GetData() const { return IHaveACow() ? fData->fStringData : fShort; }
/** Returns the number of characters (not including the '\0') in the
* referenced string buffer.
*/
size_t GetSize() const { return fSize; }
/** Cast operator. This is a shortcut for not needing to call GetData on
* buffer objects passed to methods or objects expecting a C-style string.
*/
operator const _Ch *() const { return GetData(); }
// From Haxxia with love
// NOTE: The client is expected to nul-terminate the returned buffer!
/** Create a writable buffer for \a size characters.
* From Haxxia with love! This will release the current string buffer
* reference and then create a new buffer with space for \a size
* characters, plus one extra for the terminating '\0'. The newly
* allocated buffer is returned as a non-const pointer, so it can be
* written to without having to use a \c const_cast.
* \warning The caller is expected to null-terminate the returned buffer.
* Not doing so may cause problems for functions and objects
* expecting a null-terminated C-style string.
*/
_Ch *CreateWritableBuffer(size_t size)
{
if (IHaveACow())
@ -143,15 +180,30 @@ public:
}
};
/** A plStringBuffer for storing fully-expanded Unicode data */
typedef plStringBuffer<UniChar> plUnicodeBuffer;
/** Unicode-capable and (mostly) binary safe string class.
* plString stores SSO-optimized or reference counted strings (automatically
* determined based on string length) for easy and performant string data
* storage and manipulation. The internal format of plString is UTF-8,
* meaning it keeps all Unicode information from conversions. plStrings
* are safe to share without making explicit copies, since plStrings
* follow the strings-are-immutable philosophy. Anything which mutates
* a plString object will do so in a new string buffer, allowing other
* string objects to retain the old data without getting unexpected changes.
*/
class plString
{
public:
enum {
/** Automatically determine the size of input (Requires input to be
* correctly null-terminated).
*/
kSizeAuto = (size_t)(0x80000000)
};
/** Represents a "null" plString object. */
static const plString Null;
private:
@ -164,21 +216,51 @@ private:
void IConvertFromIso8859_1(const char *astr, size_t size);
public:
/** Construct a valid, empty string. */
plString() { }
/** Construct a string from a C-style string.
* \note This constructor expects the input to be UTF-8 encoded. For
* conversion from ISO-8859-1 8-bit data, use FromIso8859_1().
*/
plString(const char *cstr) { IConvertFromUtf8(cstr, kSizeAuto); }
/** Copy constructor. */
plString(const plString &copy) : fUtf8Buffer(copy.fUtf8Buffer) { }
/** Copy constructor from plStringBuffer<char>.
* \note This constructor expects the input to be UTF-8 encoded. For
* conversion from ISO-8859-1 8-bit data, use FromIso8859_1().
*/
plString(const plStringBuffer<char> &init) { operator=(init); }
/** Construct a string from expanded Unicode data. */
plString(const plUnicodeBuffer &init) { IConvertFromUtf32(init.GetData(), init.GetSize()); }
/** Assignment operator. Same as plString(const char *). */
plString &operator=(const char *cstr) { IConvertFromUtf8(cstr, kSizeAuto); return *this; }
/** Assignment operator. Same as plString(const plString &). */
plString &operator=(const plString &copy) { fUtf8Buffer = copy.fUtf8Buffer; return *this; }
/** Assignment operator. Same as plString(const plStringBuffer<char> &). */
plString &operator=(const plStringBuffer<char> &init);
/** Assignment operator. Same as plString(const plUnicodeBuffer &). */
plString &operator=(const plUnicodeBuffer &init) { IConvertFromUtf32(init.GetData(), init.GetSize()); return *this; }
/** Append UTF-8 data from a C-style string pointer to the end of this
* string object.
* \sa plStringStream
*/
plString &operator+=(const char *cstr) { return operator=(*this + cstr); }
/** Append the string \a str to the end of this string object.
* \sa plStringStream
*/
plString &operator+=(const plString &str) { return operator=(*this + str); }
/** Create a new plString object from the UTF-8 formatted data in \a utf8. */
static inline plString FromUtf8(const char *utf8, size_t size = kSizeAuto)
{
plString str;
@ -186,6 +268,7 @@ public:
return str;
}
/** Create a new plString object from the UTF-16 formatted data in \a utf16. */
static inline plString FromUtf16(const uint16_t *utf16, size_t size = kSizeAuto)
{
plString str;
@ -193,6 +276,7 @@ public:
return str;
}
/** Create a new plString object from the \p wchar_t data in \a wstr. */
static inline plString FromWchar(const wchar_t *wstr, size_t size = kSizeAuto)
{
plString str;
@ -200,6 +284,7 @@ public:
return str;
}
/** Create a new plString object from the ISO-8859-1 formatted data in \a astr. */
static inline plString FromIso8859_1(const char *astr, size_t size = kSizeAuto)
{
plString str;
@ -207,123 +292,275 @@ public:
return str;
}
/** Return the internal UTF-8 data pointer for use in functions and objects
* expecting C-style string pointers. If this string is empty, returns
* \a substitute instead.
*/
const char *c_str(const char *substitute = "") const
{ return IsEmpty() ? substitute : fUtf8Buffer.GetData(); }
/** Return the byte at position \a position. Note that this may be in
* the middle of a UTF-8 sequence -- if you want an actual Unicode
* character, use the buffer returned from GetUnicodeArray() instead.
*/
char CharAt(size_t position) const { return c_str()[position]; }
/** Returns the internal UTF-8 data buffer object. */
plStringBuffer<char> ToUtf8() const { return fUtf8Buffer; }
/** Convert this string's data to a UTF-16 string buffer. */
plStringBuffer<uint16_t> ToUtf16() const;
/** Convert this string's data to a wchar_t string buffer.
* \note Depending on your platform and compiler configuration, this
* will either return UTF-16 or UTF-32 data -- it will never
* return a non-unicode data buffer.
*/
plStringBuffer<wchar_t> ToWchar() const;
/** Convert this string's data as closely as possible to ISO-8859-1.
* Unicode characters outside of the ISO-8859-1 range will be stored
* in the buffer as a question mark ('?').
*/
plStringBuffer<char> ToIso8859_1() const;
// For use in displaying characters in a GUI
/** Convert the string's data to a fully expanded UTF-32 buffer. This
* makes it easy to operate on actual Unicode characters instead of
* UTF-8 bytes (e.g. for use in rendering characters to a display).
*/
plUnicodeBuffer GetUnicodeArray() const;
/** Returns the size in number of bytes (excluding the null-terminator) of
* this string.
*/
size_t GetSize() const { return fUtf8Buffer.GetSize(); }
/** Returns \c true if this string is empty (""). */
bool IsEmpty() const { return fUtf8Buffer.GetSize() == 0; }
// TODO: Evaluate whether Plasma actually needs to distinguish between
// empty and NULL strings. Ideally, only IsEmpty should be required.
/** Returns \c true if this string is "null". Currently, this is just
* a synonym for IsEmpty(), as plString makes no distinction between
* null and empty strings.
* \todo Evaluate whether Plasma actually needs to distinguish between
* empty and NULL strings. Ideally, only IsEmpty should be required.
*/
bool IsNull() const { return IsEmpty(); }
/** Convert the string data to an integer in base \a base.
* If base is set to 0, this function behaves like strtol, which checks
* for hex or octal prefixes (e.g. 0777 or 0x1234), and assumes base 10
* if none are found.
*/
int ToInt(int base = 0) const;
/** Convert the string to an unsigned integer in base \a base.
* If base is set to 0, this function behaves like strtoul, which checks
* for hex or octal prefixes (e.g. 0777 or 0x1234), and assumes base 10
* if none are found.
*/
unsigned int ToUInt(int base = 0) const;
/** Convert the string to a floating point value. */
float ToFloat() const;
/** Convert the string to a double precision floating point value. */
double ToDouble() const;
/** Construct a plString using a printf-like format string. */
static plString Format(const char *fmt, ...);
/** Construct a plString using a printf-like format string.
* This function should be called inside of vararg functions, such as
* plString::Format().
*/
static plString IFormat(const char *fmt, va_list vptr);
enum CaseSensitivity {
kCaseSensitive, kCaseInsensitive
};
/** Compare this string with \a str.
* \return an integer which indicates:
* \li \p =0 - the strings are equal
* \li \p \<0 - this string is lexicographically less than \a str
* \li \p \>0 - this string is lexicographically greater than \a str
*/
int Compare(const plString &str, CaseSensitivity sense = kCaseSensitive) const
{
return (sense == kCaseSensitive) ? strcmp(c_str(), str.c_str())
: stricmp(c_str(), str.c_str());
}
/** Compare this string with \a str.
* \return an integer which indicates:
* \li \p =0 - the strings are equal
* \li \p \<0 - this string is lexicographically less than \a str
* \li \p \>0 - this string is lexicographically greater than \a str
*/
int Compare(const char *str, CaseSensitivity sense = kCaseSensitive) const
{
return (sense == kCaseSensitive) ? strcmp(c_str(), str)
: stricmp(c_str(), str);
}
/** Compare up to but never exceeding the first \a count bytes of this
* string with \a str.
* \sa Compare(const plString &, CaseSensitivity) const
*/
int CompareN(const plString &str, size_t count, CaseSensitivity sense = kCaseSensitive) const
{
return (sense == kCaseSensitive) ? strncmp(c_str(), str.c_str(), count)
: strnicmp(c_str(), str.c_str(), count);
}
/** Compare up to but never exceeding the first \a count bytes of this
* string with \a str.
* \sa Compare(const char *, CaseSensitivity) const
*/
int CompareN(const char *str, size_t count, CaseSensitivity sense = kCaseSensitive) const
{
return (sense == kCaseSensitive) ? strncmp(c_str(), str, count)
: strnicmp(c_str(), str, count);
}
/** Shortcut for Compare(str, kCaseInsensitive). */
int CompareI(const plString &str) const { return Compare(str, kCaseInsensitive); }
/** Shortcut for Compare(str, kCaseInsensitive). */
int CompareI(const char *str) const { return Compare(str, kCaseInsensitive); }
/** Shortcut for CompareN(str, kCaseInsensitive). */
int CompareNI(const plString &str, size_t count) const { return CompareN(str, count, kCaseInsensitive); }
/** Shortcut for CompareN(str, kCaseInsensitive). */
int CompareNI(const char *str, size_t count) const { return CompareN(str, count, kCaseInsensitive); }
/** Operator overload for use in containers which depend on \c std::less. */
bool operator<(const plString &other) const { return Compare(other) < 0; }
/** Test if this string contains the same string data as \a other. */
bool operator==(const char *other) const { return Compare(other) == 0; }
/** Test if this string contains the same string data as \a other. */
bool operator==(const plString &other) const { return Compare(other) == 0; }
/** Inverse of operator==(const char *) const. */
bool operator!=(const char *other) const { return Compare(other) != 0; }
/** Inverse of operator==(const plString &) const. */
bool operator!=(const plString &other) const { return Compare(other) != 0; }
/** Find the index of the first instance of \a ch in this string.
* \return -1 if the character was not found.
*/
int Find(char ch, CaseSensitivity sense = kCaseSensitive) const;
/** Find the index of the last instance of \a ch in this string.
* \return -1 if the character was not found.
*/
int FindLast(char ch, CaseSensitivity sense = kCaseSensitive) const;
/** Find the index of the first instance of \a str in this string.
* \return -1 if the substring was not found.
*/
int Find(const char *str, CaseSensitivity sense = kCaseSensitive) const;
/** Find the index of the first instance of \a str in this string.
* \return -1 if the substring was not found.
*/
int Find(const plString &str, CaseSensitivity sense = kCaseSensitive) const
{ return Find(str.c_str(), sense); }
/** Trim any characters in the supplied \a charset from the left of
* this string.
*/
plString TrimLeft(const char *charset = WHITESPACE_CHARS) const;
/** Trim any characters in the supplied \a charset from the right of
* this string.
*/
plString TrimRight(const char *charset = WHITESPACE_CHARS) const;
/** Trim any characters in the supplied \a charset from both ends of
* this string. Logically equivalent to (but more efficient than)
* str.TrimLeft(charset).TrimRight(charset)
*/
plString Trim(const char *charset = WHITESPACE_CHARS) const;
/** Return a substring starting at index \a start, with up to \a size
* characters from the start position. If \a size is greater than the
* number of characters left in the string after \a start, Substr will
* return the remainder of the string.
*/
plString Substr(int start, size_t size = kSizeAuto) const;
/** Return a substring containing at most \a size characters from the left
* of the string. Equivalent to Substr(0, size).
*/
plString Left(size_t size) const { return Substr(0, size); }
/** Return a substring containing at most \a size characters from the right
* of the string. Equivalent to Substr(GetSize() - size, size).
*/
plString Right(size_t size) const { return Substr(GetSize() - size, size); }
/** Return a copy of this string with all occurances of \a from replaced
* with \a to. */
plString Replace(const char *from, const char *to) const;
// NOTE: Does Compare(blah, kCaseInsensitive) make more sense? If
// so, use that instead -- it's faster and more efficient!
/** Return a copy of this string with all Latin-1 alphabetic characters
* converted to upper case.
* \sa CompareI()
*/
plString ToUpper() const;
/** Return a copy of this string with all Latin-1 alphabetic characters
* converted to lower case.
* \sa CompareI()
*/
plString ToLower() const;
// Should replace other tokenization methods. The difference between Split
// and Tokenize is that Tokenize never returns a blank string (it strips
// all delimiters and only returns the pieces left between them), whereas
// Split will split on a full string, returning whatever is left between.
/** Split this string into pieces separated by the substring \a split.
* This will return the complete contents of everything between split
* markers, meaning that two subsequent markers will produce an empty
* string in the returned vector.
* \sa Tokenize()
*/
std::vector<plString> Split(const char *split, size_t maxSplits = kSizeAuto) const;
/** Split this string into tokens, delimited by \a delims.
* Note that, unlike Split(), Tokenize will return only non-blank strings
* after stripping out all delimiters between tokens.
* \sa Split()
*/
std::vector<plString> Tokenize(const char *delims = WHITESPACE_CHARS) const;
/** Create a string initialized with \a count copies of the character \a c. */
static plString Fill(size_t count, char c);
public:
/** Functor which compares two strings case-sensitively for sorting. */
struct less
{
bool operator()(const plString &_L, const plString &_R) const
{ return _L.Compare(_R, kCaseSensitive) < 0; }
};
/** Functor which compares two strings case-insensitively for sorting. */
struct less_i
{
bool operator()(const plString &_L, const plString &_R) const
{ return _L.Compare(_R, kCaseInsensitive) < 0; }
};
/** Functor which compares two strings case-sensitively for equality. */
struct equal
{
bool operator()(const plString &_L, const plString &_R) const
{ return _L.Compare(_R, kCaseSensitive) == 0; }
};
/** Functor which compares two strings case-insensitively for equality. */
struct equal_i
{
bool operator()(const plString &_L, const plString &_R) const
@ -336,37 +573,71 @@ private:
friend plString operator+(const char *left, const plString &right);
};
/** Concatenation operator for plStrings. */
plString operator+(const plString &left, const plString &right);
/** Concatenation operator for plStrings and UTF-8 C-style string data. */
plString operator+(const plString &left, const char *right);
/** Concatenation operator for plStrings and UTF-8 C-style string data. */
plString operator+(const char *left, const plString &right);
/** Helper class for writing frequent data to a text buffer efficiently.
* This should be used instead of plString::operator+=() for constructing
* string data in pieces, as it keeps a running buffer instead of allocating
* new storage for each append result.
*/
class plStringStream
{
public:
/** Construct a new empty string stream. The first STRING_STACK_SIZE
* bytes are allocated on the stack for further efficiency.
*/
plStringStream() : fLength(0) { }
/** Destructor, frees any allocated heap memory owned by the stream. */
~plStringStream() { if (ICanHasHeap()) delete [] fBuffer; }
/** Append string data to the end of the stream. */
plStringStream &append(const char *data, size_t length);
/** Append UTF-8 C-style string data to the stream. */
plStringStream &operator<<(const char *text);
/** Append a base-10 formatted signed integer to the stream. */
plStringStream &operator<<(int num);
/** Append a base-10 formatted unsigned integer to the stream. */
plStringStream &operator<<(unsigned int num);
/** Append a base-10 formatted float to the stream. */
plStringStream &operator<<(float num) { return operator<<(static_cast<double>(num)); }
/** Append a base-10 formatted double to the stream. */
plStringStream &operator<<(double num);
/** Append a single Latin-1 character to the stream. */
plStringStream &operator<<(char ch) { return append(&ch, 1); }
/** Append the contents of \a text to the stream. */
plStringStream &operator<<(const plString &text)
{
return append(text.c_str(), text.GetSize());
}
const char *GetRawBuffer() const // WARNING: Not null-terminated!
/** Returns a pointer to the beginning of the stream buffer.
* \warning This pointer is not null-terminated.
*/
const char *GetRawBuffer() const
{
return ICanHasHeap() ? fBuffer : fShort;
}
/** Return the size (in bytes) of the stream's data. */
size_t GetLength() const { return fLength; }
/** Convert the stream's data to a UTF-8 string. */
plString GetString() { return plString::FromUtf8(GetRawBuffer(), fLength); }
private:
@ -382,6 +653,7 @@ private:
bool ICanHasHeap() const { return fLength > STRING_STACK_SIZE; }
};
/** \p strlen implementation for UniChar based C-style string buffers. */
size_t ustrlen(const UniChar *ustr, size_t max = plString::kSizeAuto);
#endif //plString_Defined

Loading…
Cancel
Save