Added URL parser.

This commit is contained in:
Branimir Karadžić
2017-09-30 20:30:47 -07:00
parent 3e132964d2
commit 48d2b7c814
5 changed files with 283 additions and 0 deletions

56
include/bx/url.h Normal file
View File

@@ -0,0 +1,56 @@
/*
* Copyright 2010-2017 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#ifndef BX_URL_H_HEADER_GUARD
#define BX_URL_H_HEADER_GUARD
#include "string.h"
namespace bx
{
///
struct UrlToken
{
enum Enum
{
Scheme,
UserName,
Password,
Host,
Port,
Path,
Query,
Fragment,
Count
};
};
///
class UrlView
{
public:
///
UrlView();
///
void clear();
///
bool parse(const StringView& _url);
///
const StringView& get(UrlToken::Enum _token) const;
private:
StringView m_tokens[UrlToken::Count];
};
///
void urlEncode(const char* _str, char* _buf, uint32_t _bufSize);
} // namespace bx
#endif // BX_URL_H_HEADER_GUARD

View File

@@ -31,8 +31,10 @@ project "bx"
path.join(BX_DIR, "src/crtnone.cpp"),
path.join(BX_DIR, "src/debug.cpp"),
path.join(BX_DIR, "src/dtoa.cpp"),
path.join(BX_DIR, "src/easing.cpp"),
path.join(BX_DIR, "src/file.cpp"),
path.join(BX_DIR, "src/filepath.cpp"),
path.join(BX_DIR, "src/hash.cpp"),
path.join(BX_DIR, "src/math.cpp"),
path.join(BX_DIR, "src/mutex.cpp"),
path.join(BX_DIR, "src/os.cpp"),
@@ -42,6 +44,7 @@ project "bx"
path.join(BX_DIR, "src/string.cpp"),
path.join(BX_DIR, "src/thread.cpp"),
path.join(BX_DIR, "src/timer.cpp"),
path.join(BX_DIR, "src/url.cpp"),
}
else
excludes {

View File

@@ -22,3 +22,4 @@
#include "string.cpp"
#include "thread.cpp"
#include "timer.cpp"
#include "url.cpp"

154
src/url.cpp Normal file
View File

@@ -0,0 +1,154 @@
/*
* Copyright 2011-2017 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bnet#license-bsd-2-clause
*/
#include <bx/url.h>
namespace bx
{
UrlView::UrlView()
{
}
void UrlView::clear()
{
for (uint32_t ii = 0; ii < UrlToken::Count; ++ii)
{
m_tokens[ii].clear();
}
}
bool UrlView::parse(const StringView& _url)
{
clear();
const char* start = _url.getPtr();
const char* term = _url.getTerm();
const char* schemeEnd = strFind(StringView(start, term), "://");
const char* hostStart = NULL != schemeEnd ? schemeEnd+3 : start;
const char* pathStart = strFind(StringView(hostStart, term), '/');
if (NULL == schemeEnd
&& NULL == pathStart)
{
return false;
}
if (NULL != schemeEnd
&& (NULL == pathStart || pathStart > schemeEnd) )
{
StringView scheme(start, schemeEnd);
if (!isAlpha(scheme) )
{
return false;
}
m_tokens[UrlToken::Scheme].set(scheme);
}
if (NULL != pathStart)
{
const char* queryStart = strFind(StringView(pathStart, term), '?');
const char* fragmentStart = strFind(StringView(pathStart, term), '#');
if (NULL != fragmentStart
&& fragmentStart < queryStart)
{
return false;
}
m_tokens[UrlToken::Path].set(pathStart
, NULL != queryStart ? queryStart
: NULL != fragmentStart ? fragmentStart
: term
);
if (NULL != queryStart)
{
m_tokens[UrlToken::Query].set(queryStart+1
, NULL != fragmentStart ? fragmentStart
: term
);
}
if (NULL != fragmentStart)
{
m_tokens[UrlToken::Fragment].set(fragmentStart+1, term);
}
term = pathStart;
}
const char* userPassEnd = strFind(StringView(hostStart, term), '@');
const char* userPassStart = NULL != userPassEnd ? hostStart : NULL;
hostStart = NULL != userPassEnd ? userPassEnd+1 : hostStart;
const char* portStart = strFind(StringView(hostStart, term), ':');
m_tokens[UrlToken::Host].set(hostStart, NULL != portStart ? portStart : term);
if (NULL != portStart)
{
m_tokens[UrlToken::Port].set(portStart+1, term);
}
if (NULL != userPassStart)
{
const char* passStart = strFind(StringView(userPassStart, userPassEnd), ':');
m_tokens[UrlToken::UserName].set(userPassStart
, NULL != passStart ? passStart
: userPassEnd
);
if (NULL != passStart)
{
m_tokens[UrlToken::Password].set(passStart+1, userPassEnd);
}
}
return true;
}
const StringView& UrlView::get(UrlToken::Enum _token) const
{
return m_tokens[_token];
}
static char toHex(char _nible)
{
return "0123456789ABCDEF"[_nible&0xf];
}
// https://secure.wikimedia.org/wikipedia/en/wiki/URL_encoding
void urlEncode(const char* _str, char* _buf, uint32_t _bufSize)
{
_bufSize--; // need space for zero terminator
uint32_t ii = 0;
for (char ch = *_str++
; '\0' != ch && ii < _bufSize
; ch = *_str++
)
{
if (isAlphaNum(ch)
|| ch == '-'
|| ch == '_'
|| ch == '.'
|| ch == '~')
{
_buf[ii++] = ch;
}
else if (ii+3 < _bufSize)
{
_buf[ii++] = '%';
_buf[ii++] = toHex(ch>>4);
_buf[ii++] = toHex(ch);
}
}
_buf[ii] = '\0';
}
} // namespace bx

69
tests/url_test.cpp Normal file
View File

@@ -0,0 +1,69 @@
/*
* Copyright 2010-2017 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
*/
#include "test.h"
#include <bx/string.h>
#include <bx/url.h>
struct UrlTest
{
bool result;
const char* url;
const char* tokens[bx::UrlToken::Count];
};
static const UrlTest s_urlTest[] =
{
{ true
, "scheme://username:password@host.rs:80/this/is/path/index.php?query=\"value\"#fragment",
{ "scheme", "username", "password", "host.rs", "80", "/this/is/path/index.php", "query=\"value\"", "fragment" }
},
{ true
, "scheme://host.rs/",
{ "scheme", "", "", "host.rs", "", "/", "", "" },
},
{ true
, "scheme://host.rs:1389/",
{ "scheme", "", "", "host.rs", "1389", "/", "", "" },
},
{ true
, "host.rs/abvgd.html",
{ "", "", "", "host.rs", "", "/abvgd.html", "", "" },
},
{ true
, "https://192.168.0.1:8080/",
{ "https", "", "", "192.168.0.1", "8080", "/", "", "" },
},
{ true
, "file:///d:/tmp/archive.tar.gz",
{ "file", "", "", "", "", "/d:/tmp/archive.tar.gz", "", "" },
},
};
TEST_CASE("tokenizeUrl", "")
{
bx::UrlView url;
for (uint32_t ii = 0; ii < BX_COUNTOF(s_urlTest); ++ii)
{
const UrlTest& urlTest = s_urlTest[ii];
bool result = url.parse(urlTest.url);
REQUIRE(urlTest.result == result);
if (result)
{
for (uint32_t token = 0; token < bx::UrlToken::Count; ++token)
{
// char tmp[1024];
// strCopy(tmp, BX_COUNTOF(tmp), url.get(bx::UrlToken::Enum(token)) );
// printf("`%s`, expected: `%s`\n", tmp, urlTest.tokens[token]);
REQUIRE(0 == bx::strCmp(urlTest.tokens[token], url.get(bx::UrlToken::Enum(token)) ) );
}
}
}
}