From 48d2b7c814d37e423fab2fa90066d9ab333166c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Sat, 30 Sep 2017 20:30:47 -0700 Subject: [PATCH] Added URL parser. --- include/bx/url.h | 56 ++++++++++++++++ scripts/bx.lua | 3 + src/amalgamated.cpp | 1 + src/url.cpp | 154 ++++++++++++++++++++++++++++++++++++++++++++ tests/url_test.cpp | 69 ++++++++++++++++++++ 5 files changed, 283 insertions(+) create mode 100644 include/bx/url.h create mode 100644 src/url.cpp create mode 100644 tests/url_test.cpp diff --git a/include/bx/url.h b/include/bx/url.h new file mode 100644 index 0000000..e18bd1f --- /dev/null +++ b/include/bx/url.h @@ -0,0 +1,56 @@ +/* + * Copyright 2010-2017 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bx#license-bsd-2-clause + */ + +#ifndef BX_URL_H_HEADER_GUARD +#define BX_URL_H_HEADER_GUARD + +#include "string.h" + +namespace bx +{ + /// + struct UrlToken + { + enum Enum + { + Scheme, + UserName, + Password, + Host, + Port, + Path, + Query, + Fragment, + + Count + }; + }; + + /// + class UrlView + { + public: + /// + UrlView(); + + /// + void clear(); + + /// + bool parse(const StringView& _url); + + /// + const StringView& get(UrlToken::Enum _token) const; + + private: + StringView m_tokens[UrlToken::Count]; + }; + + /// + void urlEncode(const char* _str, char* _buf, uint32_t _bufSize); + +} // namespace bx + +#endif // BX_URL_H_HEADER_GUARD diff --git a/scripts/bx.lua b/scripts/bx.lua index f7021fb..f7bc1dc 100644 --- a/scripts/bx.lua +++ b/scripts/bx.lua @@ -31,8 +31,10 @@ project "bx" path.join(BX_DIR, "src/crtnone.cpp"), path.join(BX_DIR, "src/debug.cpp"), path.join(BX_DIR, "src/dtoa.cpp"), + path.join(BX_DIR, "src/easing.cpp"), path.join(BX_DIR, "src/file.cpp"), path.join(BX_DIR, "src/filepath.cpp"), + path.join(BX_DIR, "src/hash.cpp"), path.join(BX_DIR, "src/math.cpp"), path.join(BX_DIR, "src/mutex.cpp"), path.join(BX_DIR, "src/os.cpp"), @@ -42,6 +44,7 @@ project "bx" path.join(BX_DIR, "src/string.cpp"), path.join(BX_DIR, "src/thread.cpp"), path.join(BX_DIR, "src/timer.cpp"), + path.join(BX_DIR, "src/url.cpp"), } else excludes { diff --git a/src/amalgamated.cpp b/src/amalgamated.cpp index 6ea0796..c931472 100644 --- a/src/amalgamated.cpp +++ b/src/amalgamated.cpp @@ -22,3 +22,4 @@ #include "string.cpp" #include "thread.cpp" #include "timer.cpp" +#include "url.cpp" diff --git a/src/url.cpp b/src/url.cpp new file mode 100644 index 0000000..b0b3da1 --- /dev/null +++ b/src/url.cpp @@ -0,0 +1,154 @@ +/* + * Copyright 2011-2017 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bnet#license-bsd-2-clause + */ + +#include + +namespace bx +{ + UrlView::UrlView() + { + } + + void UrlView::clear() + { + for (uint32_t ii = 0; ii < UrlToken::Count; ++ii) + { + m_tokens[ii].clear(); + } + } + + bool UrlView::parse(const StringView& _url) + { + clear(); + + const char* start = _url.getPtr(); + const char* term = _url.getTerm(); + const char* schemeEnd = strFind(StringView(start, term), "://"); + const char* hostStart = NULL != schemeEnd ? schemeEnd+3 : start; + const char* pathStart = strFind(StringView(hostStart, term), '/'); + + if (NULL == schemeEnd + && NULL == pathStart) + { + return false; + } + + if (NULL != schemeEnd + && (NULL == pathStart || pathStart > schemeEnd) ) + { + StringView scheme(start, schemeEnd); + + if (!isAlpha(scheme) ) + { + return false; + } + + m_tokens[UrlToken::Scheme].set(scheme); + } + + if (NULL != pathStart) + { + const char* queryStart = strFind(StringView(pathStart, term), '?'); + const char* fragmentStart = strFind(StringView(pathStart, term), '#'); + + if (NULL != fragmentStart + && fragmentStart < queryStart) + { + return false; + } + + m_tokens[UrlToken::Path].set(pathStart + , NULL != queryStart ? queryStart + : NULL != fragmentStart ? fragmentStart + : term + ); + + if (NULL != queryStart) + { + m_tokens[UrlToken::Query].set(queryStart+1 + , NULL != fragmentStart ? fragmentStart + : term + ); + } + + if (NULL != fragmentStart) + { + m_tokens[UrlToken::Fragment].set(fragmentStart+1, term); + } + + term = pathStart; + } + + const char* userPassEnd = strFind(StringView(hostStart, term), '@'); + const char* userPassStart = NULL != userPassEnd ? hostStart : NULL; + hostStart = NULL != userPassEnd ? userPassEnd+1 : hostStart; + const char* portStart = strFind(StringView(hostStart, term), ':'); + + m_tokens[UrlToken::Host].set(hostStart, NULL != portStart ? portStart : term); + + if (NULL != portStart) + { + m_tokens[UrlToken::Port].set(portStart+1, term); + } + + if (NULL != userPassStart) + { + const char* passStart = strFind(StringView(userPassStart, userPassEnd), ':'); + + m_tokens[UrlToken::UserName].set(userPassStart + , NULL != passStart ? passStart + : userPassEnd + ); + + if (NULL != passStart) + { + m_tokens[UrlToken::Password].set(passStart+1, userPassEnd); + } + } + + return true; + } + + const StringView& UrlView::get(UrlToken::Enum _token) const + { + return m_tokens[_token]; + } + + static char toHex(char _nible) + { + return "0123456789ABCDEF"[_nible&0xf]; + } + + // https://secure.wikimedia.org/wikipedia/en/wiki/URL_encoding + void urlEncode(const char* _str, char* _buf, uint32_t _bufSize) + { + _bufSize--; // need space for zero terminator + + uint32_t ii = 0; + for (char ch = *_str++ + ; '\0' != ch && ii < _bufSize + ; ch = *_str++ + ) + { + if (isAlphaNum(ch) + || ch == '-' + || ch == '_' + || ch == '.' + || ch == '~') + { + _buf[ii++] = ch; + } + else if (ii+3 < _bufSize) + { + _buf[ii++] = '%'; + _buf[ii++] = toHex(ch>>4); + _buf[ii++] = toHex(ch); + } + } + + _buf[ii] = '\0'; + } + +} // namespace bx diff --git a/tests/url_test.cpp b/tests/url_test.cpp new file mode 100644 index 0000000..74d1ac0 --- /dev/null +++ b/tests/url_test.cpp @@ -0,0 +1,69 @@ +/* + * Copyright 2010-2017 Branimir Karadzic. All rights reserved. + * License: https://github.com/bkaradzic/bx#license-bsd-2-clause + */ + +#include "test.h" +#include +#include + +struct UrlTest +{ + bool result; + const char* url; + const char* tokens[bx::UrlToken::Count]; +}; + +static const UrlTest s_urlTest[] = +{ + { true + , "scheme://username:password@host.rs:80/this/is/path/index.php?query=\"value\"#fragment", + { "scheme", "username", "password", "host.rs", "80", "/this/is/path/index.php", "query=\"value\"", "fragment" } + }, + { true + , "scheme://host.rs/", + { "scheme", "", "", "host.rs", "", "/", "", "" }, + }, + { true + , "scheme://host.rs:1389/", + { "scheme", "", "", "host.rs", "1389", "/", "", "" }, + }, + { true + , "host.rs/abvgd.html", + { "", "", "", "host.rs", "", "/abvgd.html", "", "" }, + }, + { true + , "https://192.168.0.1:8080/", + { "https", "", "", "192.168.0.1", "8080", "/", "", "" }, + }, + + { true + , "file:///d:/tmp/archive.tar.gz", + { "file", "", "", "", "", "/d:/tmp/archive.tar.gz", "", "" }, + }, +}; + +TEST_CASE("tokenizeUrl", "") +{ + bx::UrlView url; + + for (uint32_t ii = 0; ii < BX_COUNTOF(s_urlTest); ++ii) + { + const UrlTest& urlTest = s_urlTest[ii]; + + bool result = url.parse(urlTest.url); + REQUIRE(urlTest.result == result); + + if (result) + { + for (uint32_t token = 0; token < bx::UrlToken::Count; ++token) + { +// char tmp[1024]; +// strCopy(tmp, BX_COUNTOF(tmp), url.get(bx::UrlToken::Enum(token)) ); +// printf("`%s`, expected: `%s`\n", tmp, urlTest.tokens[token]); + + REQUIRE(0 == bx::strCmp(urlTest.tokens[token], url.get(bx::UrlToken::Enum(token)) ) ); + } + } + } +}