From 56e332a3c37e9eb96b4d99835a9d8545466e35e8 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Sun, 20 Jan 2013 20:44:17 -0800 Subject: [PATCH] Add basic regex functionality to plString --- CMakeLists.txt | 1 + README.rst | 1 + Sources/Plasma/Apps/plClient/CMakeLists.txt | 4 -- Sources/Plasma/CoreLib/CMakeLists.txt | 2 + Sources/Plasma/CoreLib/plString.cpp | 66 +++++++++++++++++++++ Sources/Plasma/CoreLib/plString.h | 14 +++++ cmake/FindPCRE.cmake | 21 +++++++ 7 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 cmake/FindPCRE.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index e1cd6b3b..5459d713 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ find_package(Ogg REQUIRED) #TODO: Not required if we aren't building the clie find_package(Vorbis REQUIRED) #TODO: Not required if we aren't building the client find_package(Speex REQUIRED) #TODO: Not required if we aren't building the client find_package(CURL REQUIRED) +find_package(PCRE REQUIRED) if(WIN32) find_package(PhysX REQUIRED) #TODO: Not required if we aren't building the client diff --git a/README.rst b/README.rst index 5ae44015..fa8b8e4e 100644 --- a/README.rst +++ b/README.rst @@ -31,6 +31,7 @@ Plasma currently utilizes the following third-party libraries: - libPNG - http://www.libpng.org/ - speex - http://www.speex.org/downloads/ - zlib - http://zlib.net/ +- PCRE - http://www.pcre.org/ - PyGTK - http://www.pygtk.org/downloads.html - PIL - http://www.pythonware.com/products/pil/ diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 9d09f5d8..1a7a308a 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -172,10 +172,6 @@ target_link_libraries(plClient ${Vorbis_LIBRARIES}) target_link_libraries(plClient ${DirectX_LIBRARIES}) target_link_libraries(plClient ${CURL_LIBRARY}) -if(Bink_SDK_AVAILABLE) - target_link_libraries(plClient ${Bink_LIBRARIES}) -endif() - if(USE_VLD) target_link_libraries(plClient ${VLD_LIBRARY}) endif() diff --git a/Sources/Plasma/CoreLib/CMakeLists.txt b/Sources/Plasma/CoreLib/CMakeLists.txt index 8ffe5708..4b027e7f 100644 --- a/Sources/Plasma/CoreLib/CMakeLists.txt +++ b/Sources/Plasma/CoreLib/CMakeLists.txt @@ -97,6 +97,8 @@ set(CoreLib_HEADERS ) add_library(CoreLib STATIC ${CoreLib_SOURCES} ${CoreLib_HEADERS}) +target_link_libraries(CoreLib ${PCRE_LIBRARY}) + if(UNIX) target_link_libraries(CoreLib pthread) endif(UNIX) diff --git a/Sources/Plasma/CoreLib/plString.cpp b/Sources/Plasma/CoreLib/plString.cpp index 5b0be23b..573b4897 100644 --- a/Sources/Plasma/CoreLib/plString.cpp +++ b/Sources/Plasma/CoreLib/plString.cpp @@ -46,6 +46,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #include #include +#include +#include const plString plString::Null; @@ -600,6 +602,70 @@ int plString::Find(const char *str, CaseSensitivity sense) const } } +bool plString::REMatch(const char *pattern, CaseSensitivity sense) const +{ + int opts = PCRE_UTF8; + if (sense == kCaseInsensitive) + opts |= PCRE_CASELESS; + + plString pat_full = plString::Format("(?:%s)\\z", pattern); + const char *errptr; + int erroffs; + std::unique_ptr> + re(pcre_compile(pat_full.c_str(), opts, &errptr, &erroffs, nullptr), pcre_free); + if (!re.get()) { + hsAssert(0, plString::Format("Invalid Regex pattern: %s (at %d)", errptr, erroffs).c_str()); + return false; + } + + int result = pcre_exec(re.get(), nullptr, c_str(), GetSize(), 0, + PCRE_ANCHORED, nullptr, 0); + if (result >= 0) + return true; + + hsAssert(result == PCRE_ERROR_NOMATCH, plString::Format("Regex match error: %d", result).c_str()); + return false; +} + +std::vector plString::RESearch(const char *pattern, + CaseSensitivity sense) const +{ + int opts = PCRE_UTF8; + if (sense == kCaseInsensitive) + opts |= PCRE_CASELESS; + + const char *errptr; + int erroffs; + std::unique_ptr> + re(pcre_compile(pattern, opts, &errptr, &erroffs, nullptr), pcre_free); + if (!re.get()) { + hsAssert(0, plString::Format("Invalid Regex pattern: %s (at %d)", errptr, erroffs).c_str()); + return std::vector(); + } + + int ncaps = 0; + pcre_fullinfo(re.get(), nullptr, PCRE_INFO_CAPTURECOUNT, &ncaps); + + ncaps += 1; // For the whole-pattern capture + std::unique_ptr outvec(new int[ncaps * 3]); + memset(outvec.get(), -1, sizeof(int) * ncaps * 3); + int result = pcre_exec(re.get(), nullptr, c_str(), GetSize(), 0, 0, + outvec.get(), ncaps * 3); + if (result >= 0) { + std::vector caps; + caps.resize(ncaps); + for (int i = 0; i < ncaps; ++i) { + int start = outvec.get()[i*2], end = outvec.get()[i*2+1]; + if (start >= 0) + caps[i] = Substr(start, end - start); + } + return caps; + } + + hsAssert(result == PCRE_ERROR_NOMATCH, plString::Format("Regex search error: %d", result).c_str()); + return std::vector(); +} + static bool in_set(char key, const char *charset) { for (const char *cs = charset; *cs; ++cs) { diff --git a/Sources/Plasma/CoreLib/plString.h b/Sources/Plasma/CoreLib/plString.h index 66467297..27b4352b 100644 --- a/Sources/Plasma/CoreLib/plString.h +++ b/Sources/Plasma/CoreLib/plString.h @@ -479,6 +479,20 @@ public: int Find(const plString &str, CaseSensitivity sense = kCaseSensitive) const { return Find(str.c_str(), sense); } + /** Check that this string matches the specified regular expression. + * This with only return true if the whole string can be matched + * by \a pattern. + */ + bool REMatch(const char *pattern, CaseSensitivity sense = kCaseSensitive) const; + + /** Search for substrings which match the specified regular expression. + * If capture groups are specified in the pattern, they will be + * returned as additional strings in the returned vector, starting at + * index 1 (index 0 contains the whole match). If the pattern was not + * found, this returns an empty vector. + */ + std::vector RESearch(const char *pattern, CaseSensitivity sense = kCaseSensitive) const; + /** Trim any characters in the supplied \a charset from the left of * this string. */ diff --git a/cmake/FindPCRE.cmake b/cmake/FindPCRE.cmake new file mode 100644 index 00000000..e1d3745f --- /dev/null +++ b/cmake/FindPCRE.cmake @@ -0,0 +1,21 @@ +if(PCRE_INCLUDE_DIR AND PCRE_LIBRARY) + set(PCRE_FIND_QUIETLY TRUE) +endif() + +find_path(PCRE_INCLUDE_DIR pcre.h) +find_library(PCRE_LIBRARY NAMES pcre) +set(PCRE_LIBRARIES ${PCRE_LIBRARY}) + +if(PCRE_INCLUDE_DIR AND PCRE_LIBRARY) + set(PCRE_FOUND TRUE) +endif() + +if(PCRE_FOUND) + if(NOT PCRE_FIND_QUIETLY) + message(STATUS "Found Perl Compatible Regular Expressions library: ${PCRE_INCLUDE_DIR}") + endif() +else() + if(PCRE_FIND_REQUIRED) + message(FATAL_ERROR "Could not find Perl Compatible Regular Expressions library") + endif() +endif()