531 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			531 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
#ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
 | 
						|
#define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
 | 
						|
 | 
						|
#include <boost/property_tree/json_parser/error.hpp>
 | 
						|
 | 
						|
#include <boost/ref.hpp>
 | 
						|
#include <boost/bind.hpp>
 | 
						|
#include <boost/format.hpp>
 | 
						|
 | 
						|
#include <iterator>
 | 
						|
#include <sstream>
 | 
						|
#include <string>
 | 
						|
 | 
						|
namespace boost { namespace property_tree {
 | 
						|
    namespace json_parser { namespace detail
 | 
						|
{
 | 
						|
 | 
						|
    template <typename Encoding, typename Iterator, typename Sentinel>
 | 
						|
    class source
 | 
						|
    {
 | 
						|
    public:
 | 
						|
        typedef typename std::iterator_traits<Iterator>::value_type
 | 
						|
            code_unit;
 | 
						|
        typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
 | 
						|
 | 
						|
        explicit source(Encoding& encoding) : encoding(encoding) {}
 | 
						|
 | 
						|
        template <typename Range>
 | 
						|
        void set_input(const std::string& filename, const Range& r)
 | 
						|
        {
 | 
						|
            this->filename = filename;
 | 
						|
            cur = r.begin();
 | 
						|
            end = r.end();
 | 
						|
            // Note that there is no backtracking, so if e.g. a UTF-8 file
 | 
						|
            // starts with something that initially looks like a BOM but isn't,
 | 
						|
            // there's trouble.
 | 
						|
            // However, no valid JSON file can start with a UTF-8 EF byte.
 | 
						|
            encoding.skip_introduction(cur, end);
 | 
						|
            line = 1;
 | 
						|
            offset = 0;
 | 
						|
        }
 | 
						|
 | 
						|
        bool done() const { return cur == end; }
 | 
						|
 | 
						|
        void parse_error(const char* msg) {
 | 
						|
            BOOST_PROPERTY_TREE_THROW(
 | 
						|
                json_parser_error(msg, filename, line));
 | 
						|
        }
 | 
						|
 | 
						|
        void next() {
 | 
						|
            if (encoding.is_nl(*cur)) {
 | 
						|
                ++line;
 | 
						|
                offset = 0;
 | 
						|
            } else {
 | 
						|
                ++offset;
 | 
						|
            }
 | 
						|
            ++cur;
 | 
						|
        }
 | 
						|
 | 
						|
        template <typename Action>
 | 
						|
        bool have(encoding_predicate p, Action& a) {
 | 
						|
            bool found = cur != end && (encoding.*p)(*cur);
 | 
						|
            if (found) {
 | 
						|
                a(*cur);
 | 
						|
                next();
 | 
						|
            }
 | 
						|
            return found;
 | 
						|
        }
 | 
						|
 | 
						|
        bool have(encoding_predicate p) {
 | 
						|
            DoNothing n;
 | 
						|
            return have(p, n);
 | 
						|
        }
 | 
						|
 | 
						|
        template <typename Action>
 | 
						|
        void expect(encoding_predicate p, const char* msg, Action& a) {
 | 
						|
            if (!have(p, a)) {
 | 
						|
                parse_error(msg);
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        void expect(encoding_predicate p, const char* msg) {
 | 
						|
            DoNothing n;
 | 
						|
            expect(p, msg, n);
 | 
						|
        }
 | 
						|
 | 
						|
        code_unit need_cur(const char* msg) {
 | 
						|
            if (cur == end) {
 | 
						|
                parse_error(msg);
 | 
						|
            }
 | 
						|
            return *cur;
 | 
						|
        }
 | 
						|
 | 
						|
        Iterator& raw_cur() { return cur; }
 | 
						|
        Sentinel raw_end() { return end; }
 | 
						|
 | 
						|
    private:
 | 
						|
        struct DoNothing {
 | 
						|
            void operator ()(code_unit) const {}
 | 
						|
        };
 | 
						|
 | 
						|
        Encoding& encoding;
 | 
						|
        Iterator cur;
 | 
						|
        Sentinel end;
 | 
						|
        std::string filename;
 | 
						|
        int line;
 | 
						|
        int offset;
 | 
						|
    };
 | 
						|
 | 
						|
    template <typename Callbacks, typename Encoding, typename Iterator,
 | 
						|
        typename = typename std::iterator_traits<Iterator>
 | 
						|
            ::iterator_category>
 | 
						|
    class number_callback_adapter
 | 
						|
    {
 | 
						|
    public:
 | 
						|
        number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
 | 
						|
                                Iterator& cur)
 | 
						|
            : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
 | 
						|
        {}
 | 
						|
 | 
						|
        void operator ()(typename Encoding::external_char) {}
 | 
						|
 | 
						|
        void finish() const {
 | 
						|
            callbacks.on_number(encoding.to_internal(first, cur));
 | 
						|
        }
 | 
						|
 | 
						|
    private:
 | 
						|
        number_callback_adapter(const number_callback_adapter&);
 | 
						|
 | 
						|
        Callbacks& callbacks;
 | 
						|
        Encoding& encoding;
 | 
						|
        Iterator first;
 | 
						|
        Iterator& cur;
 | 
						|
    };
 | 
						|
 | 
						|
    template <typename Callbacks, typename Encoding, typename Iterator>
 | 
						|
    class number_callback_adapter<Callbacks, Encoding, Iterator,
 | 
						|
                                  std::input_iterator_tag>
 | 
						|
    {
 | 
						|
    public:
 | 
						|
        number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
 | 
						|
                                Iterator&)
 | 
						|
            : callbacks(callbacks), encoding(encoding), first(true)
 | 
						|
        {}
 | 
						|
 | 
						|
        void operator ()(typename Encoding::external_char c) {
 | 
						|
            if (first) {
 | 
						|
                callbacks.on_begin_number();
 | 
						|
                first = false;
 | 
						|
            }
 | 
						|
            callbacks.on_digit(encoding.to_internal_trivial(c));
 | 
						|
        }
 | 
						|
 | 
						|
        void finish() const {
 | 
						|
            callbacks.on_end_number();
 | 
						|
        }
 | 
						|
    private:
 | 
						|
        number_callback_adapter(const number_callback_adapter&);
 | 
						|
 | 
						|
        Callbacks& callbacks;
 | 
						|
        Encoding& encoding;
 | 
						|
        bool first;
 | 
						|
    };
 | 
						|
 | 
						|
    template <typename Callbacks, typename Encoding, typename Iterator,
 | 
						|
        typename = typename std::iterator_traits<Iterator>
 | 
						|
            ::iterator_category>
 | 
						|
    class string_callback_adapter
 | 
						|
    {
 | 
						|
    public:
 | 
						|
        string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
 | 
						|
                                Iterator& cur)
 | 
						|
            : callbacks(callbacks), encoding(encoding), cur(cur),
 | 
						|
              run_begin(cur)
 | 
						|
        {}
 | 
						|
 | 
						|
        void start_run() {
 | 
						|
            run_begin = cur;
 | 
						|
        }
 | 
						|
 | 
						|
        void finish_run() {
 | 
						|
            callbacks.on_code_units(encoding.to_internal(run_begin, cur));
 | 
						|
        }
 | 
						|
 | 
						|
        template <typename Sentinel, typename EncodingErrorFn>
 | 
						|
        void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
 | 
						|
            encoding.skip_codepoint(cur, end, error_fn);
 | 
						|
        }
 | 
						|
 | 
						|
    private:
 | 
						|
        string_callback_adapter(const string_callback_adapter&);
 | 
						|
 | 
						|
        Callbacks& callbacks;
 | 
						|
        Encoding& encoding;
 | 
						|
        Iterator& cur;
 | 
						|
        Iterator run_begin;
 | 
						|
    };
 | 
						|
 | 
						|
    template <typename Callbacks, typename Encoding, typename Iterator>
 | 
						|
    class string_callback_adapter<Callbacks, Encoding, Iterator,
 | 
						|
                                  std::input_iterator_tag>
 | 
						|
    {
 | 
						|
    public:
 | 
						|
        string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
 | 
						|
                                Iterator& cur)
 | 
						|
            : callbacks(callbacks), encoding(encoding), cur(cur)
 | 
						|
        {}
 | 
						|
 | 
						|
        void start_run() {}
 | 
						|
 | 
						|
        void finish_run() {}
 | 
						|
 | 
						|
        template <typename Sentinel, typename EncodingErrorFn>
 | 
						|
        void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
 | 
						|
            encoding.transcode_codepoint(cur, end,
 | 
						|
                boost::bind(&Callbacks::on_code_unit,
 | 
						|
                            boost::ref(callbacks), _1),
 | 
						|
                error_fn);
 | 
						|
        }
 | 
						|
 | 
						|
    private:
 | 
						|
        string_callback_adapter(const string_callback_adapter&);
 | 
						|
 | 
						|
        Callbacks& callbacks;
 | 
						|
        Encoding& encoding;
 | 
						|
        Iterator& cur;
 | 
						|
    };
 | 
						|
 | 
						|
    template <typename Callbacks, typename Encoding, typename Iterator,
 | 
						|
              typename Sentinel>
 | 
						|
    class parser
 | 
						|
    {
 | 
						|
        typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
 | 
						|
            number_adapter;
 | 
						|
        typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
 | 
						|
            string_adapter;
 | 
						|
        typedef detail::source<Encoding, Iterator, Sentinel> source;
 | 
						|
        typedef typename source::code_unit code_unit;
 | 
						|
 | 
						|
    public:
 | 
						|
        parser(Callbacks& callbacks, Encoding& encoding)
 | 
						|
            : callbacks(callbacks), encoding(encoding), src(encoding)
 | 
						|
        {}
 | 
						|
 | 
						|
        template <typename Range>
 | 
						|
        void set_input(const std::string& filename, const Range& r) {
 | 
						|
            src.set_input(filename, r);
 | 
						|
        }
 | 
						|
 | 
						|
        void finish() {
 | 
						|
            skip_ws();
 | 
						|
            if (!src.done()) {
 | 
						|
                parse_error("garbage after data");
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        void parse_value() {
 | 
						|
            if (parse_object()) return;
 | 
						|
            if (parse_array()) return;
 | 
						|
            if (parse_string()) return;
 | 
						|
            if (parse_boolean()) return;
 | 
						|
            if (parse_null()) return;
 | 
						|
            if (parse_number()) return;
 | 
						|
            parse_error("expected value");
 | 
						|
        }
 | 
						|
 | 
						|
        bool parse_null() {
 | 
						|
            skip_ws();
 | 
						|
            if (!have(&Encoding::is_n)) {
 | 
						|
                return false;
 | 
						|
            }
 | 
						|
            expect(&Encoding::is_u, "expected 'null'");
 | 
						|
            expect(&Encoding::is_l, "expected 'null'");
 | 
						|
            expect(&Encoding::is_l, "expected 'null'");
 | 
						|
            callbacks.on_null();
 | 
						|
            return true;
 | 
						|
        }
 | 
						|
 | 
						|
        bool parse_boolean() {
 | 
						|
            skip_ws();
 | 
						|
            if (have(&Encoding::is_t)) {
 | 
						|
                expect(&Encoding::is_r, "expected 'true'");
 | 
						|
                expect(&Encoding::is_u, "expected 'true'");
 | 
						|
                expect(&Encoding::is_e, "expected 'true'");
 | 
						|
                callbacks.on_boolean(true);
 | 
						|
                return true;
 | 
						|
            }
 | 
						|
            if (have(&Encoding::is_f)) {
 | 
						|
                expect(&Encoding::is_a, "expected 'false'");
 | 
						|
                expect(&Encoding::is_l, "expected 'false'");
 | 
						|
                expect(&Encoding::is_s, "expected 'false'");
 | 
						|
                expect(&Encoding::is_e, "expected 'false'");
 | 
						|
                callbacks.on_boolean(false);
 | 
						|
                return true;
 | 
						|
            }
 | 
						|
            return false;
 | 
						|
        }
 | 
						|
 | 
						|
        bool parse_number() {
 | 
						|
            skip_ws();
 | 
						|
 | 
						|
            number_adapter adapter(callbacks, encoding, src.raw_cur());
 | 
						|
            bool started = false;
 | 
						|
            if (have(&Encoding::is_minus, adapter)) {
 | 
						|
                started = true;
 | 
						|
            }
 | 
						|
            if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) {
 | 
						|
                if (started) {
 | 
						|
                    parse_error("expected digits after -");
 | 
						|
                }
 | 
						|
                return false;
 | 
						|
            }
 | 
						|
            parse_frac_part(adapter);
 | 
						|
            parse_exp_part(adapter);
 | 
						|
            adapter.finish();
 | 
						|
            return true;
 | 
						|
        }
 | 
						|
 | 
						|
        bool parse_string() {
 | 
						|
            skip_ws();
 | 
						|
 | 
						|
            if (!have(&Encoding::is_quote)) {
 | 
						|
                return false;
 | 
						|
            }
 | 
						|
 | 
						|
            callbacks.on_begin_string();
 | 
						|
            string_adapter adapter(callbacks, encoding, src.raw_cur());
 | 
						|
            while (!encoding.is_quote(need_cur("unterminated string"))) {
 | 
						|
                if (encoding.is_backslash(*src.raw_cur())) {
 | 
						|
                    adapter.finish_run();
 | 
						|
                    next();
 | 
						|
                    parse_escape();
 | 
						|
                    adapter.start_run();
 | 
						|
                } else {
 | 
						|
                    adapter.process_codepoint(src.raw_end(),
 | 
						|
                        boost::bind(&parser::parse_error,
 | 
						|
                                    this, "invalid code sequence"));
 | 
						|
                }
 | 
						|
            }
 | 
						|
            adapter.finish_run();
 | 
						|
            callbacks.on_end_string();
 | 
						|
            next();
 | 
						|
            return true;
 | 
						|
        }
 | 
						|
 | 
						|
        bool parse_array() {
 | 
						|
            skip_ws();
 | 
						|
 | 
						|
            if (!have(&Encoding::is_open_bracket)) {
 | 
						|
                return false;
 | 
						|
            }
 | 
						|
 | 
						|
            callbacks.on_begin_array();
 | 
						|
            skip_ws();
 | 
						|
            if (have(&Encoding::is_close_bracket)) {
 | 
						|
                callbacks.on_end_array();
 | 
						|
                return true;
 | 
						|
            }
 | 
						|
            do {
 | 
						|
                parse_value();
 | 
						|
                skip_ws();
 | 
						|
            } while (have(&Encoding::is_comma));
 | 
						|
            expect(&Encoding::is_close_bracket, "expected ']' or ','");
 | 
						|
            callbacks.on_end_array();
 | 
						|
            return true;
 | 
						|
        }
 | 
						|
 | 
						|
        bool parse_object() {
 | 
						|
            skip_ws();
 | 
						|
 | 
						|
            if (!have(&Encoding::is_open_brace)) {
 | 
						|
                return false;
 | 
						|
            }
 | 
						|
 | 
						|
            callbacks.on_begin_object();
 | 
						|
            skip_ws();
 | 
						|
            if (have(&Encoding::is_close_brace)) {
 | 
						|
                callbacks.on_end_object();
 | 
						|
                return true;
 | 
						|
            }
 | 
						|
            do {
 | 
						|
                if (!parse_string()) {
 | 
						|
                    parse_error("expected key string");
 | 
						|
                }
 | 
						|
                skip_ws();
 | 
						|
                expect(&Encoding::is_colon, "expected ':'");
 | 
						|
                parse_value();
 | 
						|
                skip_ws();
 | 
						|
            } while (have(&Encoding::is_comma));
 | 
						|
            expect(&Encoding::is_close_brace, "expected '}' or ','");
 | 
						|
            callbacks.on_end_object();
 | 
						|
            return true;
 | 
						|
        }
 | 
						|
 | 
						|
    private:
 | 
						|
        typedef typename source::encoding_predicate encoding_predicate;
 | 
						|
 | 
						|
        void parse_error(const char* msg) { src.parse_error(msg); }
 | 
						|
        void next() { src.next(); }
 | 
						|
        template <typename Action>
 | 
						|
        bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
 | 
						|
        bool have(encoding_predicate p) { return src.have(p); }
 | 
						|
        template <typename Action>
 | 
						|
        void expect(encoding_predicate p, const char* msg, Action& a) {
 | 
						|
            src.expect(p, msg, a);
 | 
						|
        }
 | 
						|
        void expect(encoding_predicate p, const char* msg) {
 | 
						|
            src.expect(p, msg);
 | 
						|
        }
 | 
						|
        code_unit need_cur(const char* msg) { return src.need_cur(msg); }
 | 
						|
 | 
						|
        void skip_ws() {
 | 
						|
            while (have(&Encoding::is_ws)) {
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        bool parse_int_part(number_adapter& action) {
 | 
						|
            if (!have(&Encoding::is_digit0, action)) {
 | 
						|
                return false;
 | 
						|
            }
 | 
						|
            parse_digits(action);
 | 
						|
            return true;
 | 
						|
        }
 | 
						|
 | 
						|
        void parse_frac_part(number_adapter& action) {
 | 
						|
            if (!have(&Encoding::is_dot, action)) {
 | 
						|
                return;
 | 
						|
            }
 | 
						|
            expect(&Encoding::is_digit, "need at least one digit after '.'",
 | 
						|
                   action);
 | 
						|
            parse_digits(action);
 | 
						|
        }
 | 
						|
 | 
						|
        void parse_exp_part(number_adapter& action) {
 | 
						|
            if (!have(&Encoding::is_eE, action)) {
 | 
						|
                return;
 | 
						|
            }
 | 
						|
            have(&Encoding::is_plusminus, action);
 | 
						|
            expect(&Encoding::is_digit, "need at least one digit in exponent",
 | 
						|
                   action);
 | 
						|
            parse_digits(action);
 | 
						|
        }
 | 
						|
 | 
						|
        void parse_digits(number_adapter& action) {
 | 
						|
            while (have(&Encoding::is_digit, action)) {
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        void parse_escape() {
 | 
						|
            if (have(&Encoding::is_quote)) {
 | 
						|
                feed(0x22);
 | 
						|
            } else if (have(&Encoding::is_backslash)) {
 | 
						|
                feed(0x5c);
 | 
						|
            } else if (have(&Encoding::is_slash)) {
 | 
						|
                feed(0x2f);
 | 
						|
            } else if (have(&Encoding::is_b)) {
 | 
						|
                feed(0x08); // backspace
 | 
						|
            } else if (have(&Encoding::is_f)) {
 | 
						|
                feed(0x0c); // formfeed
 | 
						|
            } else if (have(&Encoding::is_n)) {
 | 
						|
                feed(0x0a); // line feed
 | 
						|
            } else if (have(&Encoding::is_r)) {
 | 
						|
                feed(0x0d); // carriage return
 | 
						|
            } else if (have(&Encoding::is_t)) {
 | 
						|
                feed(0x09); // horizontal tab
 | 
						|
            } else if (have(&Encoding::is_u)) {
 | 
						|
                parse_codepoint_ref();
 | 
						|
            } else {
 | 
						|
                parse_error("invalid escape sequence");
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        unsigned parse_hex_quad() {
 | 
						|
            unsigned codepoint = 0;
 | 
						|
            for (int i = 0; i < 4; ++i) {
 | 
						|
                int value = encoding.decode_hexdigit(
 | 
						|
                    need_cur("invalid escape sequence"));
 | 
						|
                if (value < 0) {
 | 
						|
                    parse_error("invalid escape sequence");
 | 
						|
                }
 | 
						|
                codepoint *= 16;
 | 
						|
                codepoint += value;
 | 
						|
                next();
 | 
						|
            }
 | 
						|
            return codepoint;
 | 
						|
        }
 | 
						|
 | 
						|
        static bool is_surrogate_high(unsigned codepoint) {
 | 
						|
            return (codepoint & 0xfc00) == 0xd800;
 | 
						|
        }
 | 
						|
        static bool is_surrogate_low(unsigned codepoint) {
 | 
						|
            return (codepoint & 0xfc00) == 0xdc00;
 | 
						|
        }
 | 
						|
        static unsigned combine_surrogates(unsigned high, unsigned low) {
 | 
						|
            return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
 | 
						|
        }
 | 
						|
 | 
						|
        void parse_codepoint_ref() {
 | 
						|
            unsigned codepoint = parse_hex_quad();
 | 
						|
            if (is_surrogate_low(codepoint)) {
 | 
						|
                parse_error("invalid codepoint, stray low surrogate");
 | 
						|
            }
 | 
						|
            if (is_surrogate_high(codepoint)) {
 | 
						|
                expect(&Encoding::is_backslash,
 | 
						|
                    "invalid codepoint, stray high surrogate");
 | 
						|
                expect(&Encoding::is_u,
 | 
						|
                    "expected codepoint reference after high surrogate");
 | 
						|
                int low = parse_hex_quad();
 | 
						|
                if (!is_surrogate_low(low)) {
 | 
						|
                    parse_error("expected low surrogate after high surrogate");
 | 
						|
                }
 | 
						|
                codepoint = combine_surrogates(codepoint, low);
 | 
						|
            }
 | 
						|
            feed(codepoint);
 | 
						|
        }
 | 
						|
 | 
						|
        void feed(unsigned codepoint) {
 | 
						|
            encoding.feed_codepoint(codepoint,
 | 
						|
                                    boost::bind(&Callbacks::on_code_unit,
 | 
						|
                                                boost::ref(callbacks), _1));
 | 
						|
        }
 | 
						|
 | 
						|
        Callbacks& callbacks;
 | 
						|
        Encoding& encoding;
 | 
						|
        source src;
 | 
						|
    };
 | 
						|
 | 
						|
}}}}
 | 
						|
 | 
						|
#endif
 |