183 lines
6.9 KiB
Plaintext
183 lines
6.9 KiB
Plaintext
|
#ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
|
||
|
#define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
|
||
|
|
||
|
#include <boost/assert.hpp>
|
||
|
#include <boost/range/iterator_range_core.hpp>
|
||
|
|
||
|
#include <utility>
|
||
|
|
||
|
namespace boost { namespace property_tree {
|
||
|
namespace json_parser { namespace detail
|
||
|
{
|
||
|
|
||
|
struct external_wide_encoding
|
||
|
{
|
||
|
typedef wchar_t external_char;
|
||
|
|
||
|
bool is_nl(wchar_t c) const { return c == L'\n'; }
|
||
|
bool is_ws(wchar_t c) const {
|
||
|
return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r';
|
||
|
}
|
||
|
|
||
|
bool is_minus(wchar_t c) const { return c == L'-'; }
|
||
|
bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; }
|
||
|
bool is_dot(wchar_t c) const { return c == L'.'; }
|
||
|
bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; }
|
||
|
bool is_0(wchar_t c) const { return c == L'0'; }
|
||
|
bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; }
|
||
|
bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; }
|
||
|
|
||
|
bool is_quote(wchar_t c) const { return c == L'"'; }
|
||
|
bool is_backslash(wchar_t c) const { return c == L'\\'; }
|
||
|
bool is_slash(wchar_t c) const { return c == L'/'; }
|
||
|
|
||
|
bool is_comma(wchar_t c) const { return c == L','; }
|
||
|
bool is_open_bracket(wchar_t c) const { return c == L'['; }
|
||
|
bool is_close_bracket(wchar_t c) const { return c == L']'; }
|
||
|
bool is_colon(wchar_t c) const { return c == L':'; }
|
||
|
bool is_open_brace(wchar_t c) const { return c == L'{'; }
|
||
|
bool is_close_brace(wchar_t c) const { return c == L'}'; }
|
||
|
|
||
|
bool is_a(wchar_t c) const { return c == L'a'; }
|
||
|
bool is_b(wchar_t c) const { return c == L'b'; }
|
||
|
bool is_e(wchar_t c) const { return c == L'e'; }
|
||
|
bool is_f(wchar_t c) const { return c == L'f'; }
|
||
|
bool is_l(wchar_t c) const { return c == L'l'; }
|
||
|
bool is_n(wchar_t c) const { return c == L'n'; }
|
||
|
bool is_r(wchar_t c) const { return c == L'r'; }
|
||
|
bool is_s(wchar_t c) const { return c == L's'; }
|
||
|
bool is_t(wchar_t c) const { return c == L't'; }
|
||
|
bool is_u(wchar_t c) const { return c == L'u'; }
|
||
|
|
||
|
int decode_hexdigit(wchar_t c) {
|
||
|
if (c >= L'0' && c <= L'9') return c - L'0';
|
||
|
if (c >= L'A' && c <= L'F') return c - L'A' + 10;
|
||
|
if (c >= L'a' && c <= L'f') return c - L'a' + 10;
|
||
|
return -1;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <bool B> struct is_utf16 {};
|
||
|
|
||
|
class wide_wide_encoding : public external_wide_encoding
|
||
|
{
|
||
|
typedef is_utf16<sizeof(wchar_t) == 2> test_utf16;
|
||
|
public:
|
||
|
typedef wchar_t internal_char;
|
||
|
|
||
|
template <typename Iterator>
|
||
|
boost::iterator_range<Iterator>
|
||
|
to_internal(Iterator first, Iterator last) const {
|
||
|
return boost::make_iterator_range(first, last);
|
||
|
}
|
||
|
|
||
|
wchar_t to_internal_trivial(wchar_t c) const {
|
||
|
BOOST_ASSERT(!is_surrogate_high(c) && !is_surrogate_low(c));
|
||
|
return c;
|
||
|
}
|
||
|
|
||
|
template <typename Iterator, typename Sentinel,
|
||
|
typename EncodingErrorFn>
|
||
|
void skip_codepoint(Iterator& cur, Sentinel end,
|
||
|
EncodingErrorFn error_fn) const {
|
||
|
transcode_codepoint(cur, end, DoNothing(), error_fn);
|
||
|
}
|
||
|
|
||
|
template <typename Iterator, typename Sentinel, typename TranscodedFn,
|
||
|
typename EncodingErrorFn>
|
||
|
void transcode_codepoint(Iterator& cur, Sentinel end,
|
||
|
TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
|
||
|
return transcode_codepoint(cur, end, transcoded_fn, error_fn,
|
||
|
test_utf16());
|
||
|
}
|
||
|
|
||
|
template <typename TranscodedFn>
|
||
|
void feed_codepoint(unsigned codepoint,
|
||
|
TranscodedFn transcoded_fn) const {
|
||
|
feed_codepoint(codepoint, transcoded_fn, test_utf16());
|
||
|
}
|
||
|
|
||
|
template <typename Iterator, typename Sentinel>
|
||
|
void skip_introduction(Iterator& cur, Sentinel end) const {
|
||
|
// Endianness is already decoded at this level.
|
||
|
if (cur != end && *cur == 0xfeff) {
|
||
|
++cur;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
struct DoNothing {
|
||
|
void operator ()(wchar_t) const {}
|
||
|
};
|
||
|
|
||
|
template <typename Iterator, typename Sentinel, typename TranscodedFn,
|
||
|
typename EncodingErrorFn>
|
||
|
void transcode_codepoint(Iterator& cur, Sentinel,
|
||
|
TranscodedFn transcoded_fn,
|
||
|
EncodingErrorFn error_fn,
|
||
|
is_utf16<false>) const {
|
||
|
wchar_t c = *cur;
|
||
|
if (c < 0x20) {
|
||
|
error_fn();
|
||
|
}
|
||
|
transcoded_fn(c);
|
||
|
++cur;
|
||
|
}
|
||
|
template <typename Iterator, typename Sentinel, typename TranscodedFn,
|
||
|
typename EncodingErrorFn>
|
||
|
void transcode_codepoint(Iterator& cur, Sentinel end,
|
||
|
TranscodedFn transcoded_fn,
|
||
|
EncodingErrorFn error_fn,
|
||
|
is_utf16<true>) const {
|
||
|
wchar_t c = *cur;
|
||
|
if (c < 0x20) {
|
||
|
error_fn();
|
||
|
}
|
||
|
if (is_surrogate_low(c)) {
|
||
|
error_fn();
|
||
|
}
|
||
|
transcoded_fn(c);
|
||
|
++cur;
|
||
|
if (is_surrogate_high(c)) {
|
||
|
if (cur == end) {
|
||
|
error_fn();
|
||
|
}
|
||
|
c = *cur;
|
||
|
if (!is_surrogate_low(c)) {
|
||
|
error_fn();
|
||
|
}
|
||
|
transcoded_fn(c);
|
||
|
++cur;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template <typename TranscodedFn>
|
||
|
void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
|
||
|
is_utf16<false>) const {
|
||
|
transcoded_fn(static_cast<wchar_t>(codepoint));
|
||
|
}
|
||
|
template <typename TranscodedFn>
|
||
|
void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
|
||
|
is_utf16<true>) const {
|
||
|
if (codepoint < 0x10000) {
|
||
|
transcoded_fn(static_cast<wchar_t>(codepoint));
|
||
|
} else {
|
||
|
codepoint -= 0x10000;
|
||
|
transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800));
|
||
|
transcoded_fn(static_cast<wchar_t>(
|
||
|
(codepoint & 0x3ff) | 0xdc00));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static bool is_surrogate_high(unsigned codepoint) {
|
||
|
return (codepoint & 0xfc00) == 0xd800;
|
||
|
}
|
||
|
static bool is_surrogate_low(unsigned codepoint) {
|
||
|
return (codepoint & 0xfc00) == 0xdc00;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
}}}}
|
||
|
|
||
|
#endif
|