531 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			531 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP | ||
|  | #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP | ||
|  | 
 | ||
|  | #include <boost/property_tree/json_parser/error.hpp> | ||
|  | 
 | ||
|  | #include <boost/ref.hpp> | ||
|  | #include <boost/bind.hpp> | ||
|  | #include <boost/format.hpp> | ||
|  | 
 | ||
|  | #include <iterator> | ||
|  | #include <sstream> | ||
|  | #include <string> | ||
|  | 
 | ||
|  | namespace boost { namespace property_tree { | ||
|  |     namespace json_parser { namespace detail | ||
|  | { | ||
|  | 
 | ||
|  |     template <typename Encoding, typename Iterator, typename Sentinel> | ||
|  |     class source | ||
|  |     { | ||
|  |     public: | ||
|  |         typedef typename std::iterator_traits<Iterator>::value_type | ||
|  |             code_unit; | ||
|  |         typedef bool (Encoding::*encoding_predicate)(code_unit c) const; | ||
|  | 
 | ||
|  |         explicit source(Encoding& encoding) : encoding(encoding) {} | ||
|  | 
 | ||
|  |         template <typename Range> | ||
|  |         void set_input(const std::string& filename, const Range& r) | ||
|  |         { | ||
|  |             this->filename = filename; | ||
|  |             cur = r.begin(); | ||
|  |             end = r.end(); | ||
|  |             // Note that there is no backtracking, so if e.g. a UTF-8 file | ||
|  |             // starts with something that initially looks like a BOM but isn't, | ||
|  |             // there's trouble. | ||
|  |             // However, no valid JSON file can start with a UTF-8 EF byte. | ||
|  |             encoding.skip_introduction(cur, end); | ||
|  |             line = 1; | ||
|  |             offset = 0; | ||
|  |         } | ||
|  | 
 | ||
|  |         bool done() const { return cur == end; } | ||
|  | 
 | ||
|  |         void parse_error(const char* msg) { | ||
|  |             BOOST_PROPERTY_TREE_THROW( | ||
|  |                 json_parser_error(msg, filename, line)); | ||
|  |         } | ||
|  | 
 | ||
|  |         void next() { | ||
|  |             if (encoding.is_nl(*cur)) { | ||
|  |                 ++line; | ||
|  |                 offset = 0; | ||
|  |             } else { | ||
|  |                 ++offset; | ||
|  |             } | ||
|  |             ++cur; | ||
|  |         } | ||
|  | 
 | ||
|  |         template <typename Action> | ||
|  |         bool have(encoding_predicate p, Action& a) { | ||
|  |             bool found = cur != end && (encoding.*p)(*cur); | ||
|  |             if (found) { | ||
|  |                 a(*cur); | ||
|  |                 next(); | ||
|  |             } | ||
|  |             return found; | ||
|  |         } | ||
|  | 
 | ||
|  |         bool have(encoding_predicate p) { | ||
|  |             DoNothing n; | ||
|  |             return have(p, n); | ||
|  |         } | ||
|  | 
 | ||
|  |         template <typename Action> | ||
|  |         void expect(encoding_predicate p, const char* msg, Action& a) { | ||
|  |             if (!have(p, a)) { | ||
|  |                 parse_error(msg); | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         void expect(encoding_predicate p, const char* msg) { | ||
|  |             DoNothing n; | ||
|  |             expect(p, msg, n); | ||
|  |         } | ||
|  | 
 | ||
|  |         code_unit need_cur(const char* msg) { | ||
|  |             if (cur == end) { | ||
|  |                 parse_error(msg); | ||
|  |             } | ||
|  |             return *cur; | ||
|  |         } | ||
|  | 
 | ||
|  |         Iterator& raw_cur() { return cur; } | ||
|  |         Sentinel raw_end() { return end; } | ||
|  | 
 | ||
|  |     private: | ||
|  |         struct DoNothing { | ||
|  |             void operator ()(code_unit) const {} | ||
|  |         }; | ||
|  | 
 | ||
|  |         Encoding& encoding; | ||
|  |         Iterator cur; | ||
|  |         Sentinel end; | ||
|  |         std::string filename; | ||
|  |         int line; | ||
|  |         int offset; | ||
|  |     }; | ||
|  | 
 | ||
|  |     template <typename Callbacks, typename Encoding, typename Iterator, | ||
|  |         typename = typename std::iterator_traits<Iterator> | ||
|  |             ::iterator_category> | ||
|  |     class number_callback_adapter | ||
|  |     { | ||
|  |     public: | ||
|  |         number_callback_adapter(Callbacks& callbacks, Encoding& encoding, | ||
|  |                                 Iterator& cur) | ||
|  |             : callbacks(callbacks), encoding(encoding), first(cur), cur(cur) | ||
|  |         {} | ||
|  | 
 | ||
|  |         void operator ()(typename Encoding::external_char) {} | ||
|  | 
 | ||
|  |         void finish() const { | ||
|  |             callbacks.on_number(encoding.to_internal(first, cur)); | ||
|  |         } | ||
|  | 
 | ||
|  |     private: | ||
|  |         number_callback_adapter(const number_callback_adapter&); | ||
|  | 
 | ||
|  |         Callbacks& callbacks; | ||
|  |         Encoding& encoding; | ||
|  |         Iterator first; | ||
|  |         Iterator& cur; | ||
|  |     }; | ||
|  | 
 | ||
|  |     template <typename Callbacks, typename Encoding, typename Iterator> | ||
|  |     class number_callback_adapter<Callbacks, Encoding, Iterator, | ||
|  |                                   std::input_iterator_tag> | ||
|  |     { | ||
|  |     public: | ||
|  |         number_callback_adapter(Callbacks& callbacks, Encoding& encoding, | ||
|  |                                 Iterator&) | ||
|  |             : callbacks(callbacks), encoding(encoding), first(true) | ||
|  |         {} | ||
|  | 
 | ||
|  |         void operator ()(typename Encoding::external_char c) { | ||
|  |             if (first) { | ||
|  |                 callbacks.on_begin_number(); | ||
|  |                 first = false; | ||
|  |             } | ||
|  |             callbacks.on_digit(encoding.to_internal_trivial(c)); | ||
|  |         } | ||
|  | 
 | ||
|  |         void finish() const { | ||
|  |             callbacks.on_end_number(); | ||
|  |         } | ||
|  |     private: | ||
|  |         number_callback_adapter(const number_callback_adapter&); | ||
|  | 
 | ||
|  |         Callbacks& callbacks; | ||
|  |         Encoding& encoding; | ||
|  |         bool first; | ||
|  |     }; | ||
|  | 
 | ||
|  |     template <typename Callbacks, typename Encoding, typename Iterator, | ||
|  |         typename = typename std::iterator_traits<Iterator> | ||
|  |             ::iterator_category> | ||
|  |     class string_callback_adapter | ||
|  |     { | ||
|  |     public: | ||
|  |         string_callback_adapter(Callbacks& callbacks, Encoding& encoding, | ||
|  |                                 Iterator& cur) | ||
|  |             : callbacks(callbacks), encoding(encoding), cur(cur), | ||
|  |               run_begin(cur) | ||
|  |         {} | ||
|  | 
 | ||
|  |         void start_run() { | ||
|  |             run_begin = cur; | ||
|  |         } | ||
|  | 
 | ||
|  |         void finish_run() { | ||
|  |             callbacks.on_code_units(encoding.to_internal(run_begin, cur)); | ||
|  |         } | ||
|  | 
 | ||
|  |         template <typename Sentinel, typename EncodingErrorFn> | ||
|  |         void process_codepoint(Sentinel end, EncodingErrorFn error_fn) { | ||
|  |             encoding.skip_codepoint(cur, end, error_fn); | ||
|  |         } | ||
|  | 
 | ||
|  |     private: | ||
|  |         string_callback_adapter(const string_callback_adapter&); | ||
|  | 
 | ||
|  |         Callbacks& callbacks; | ||
|  |         Encoding& encoding; | ||
|  |         Iterator& cur; | ||
|  |         Iterator run_begin; | ||
|  |     }; | ||
|  | 
 | ||
|  |     template <typename Callbacks, typename Encoding, typename Iterator> | ||
|  |     class string_callback_adapter<Callbacks, Encoding, Iterator, | ||
|  |                                   std::input_iterator_tag> | ||
|  |     { | ||
|  |     public: | ||
|  |         string_callback_adapter(Callbacks& callbacks, Encoding& encoding, | ||
|  |                                 Iterator& cur) | ||
|  |             : callbacks(callbacks), encoding(encoding), cur(cur) | ||
|  |         {} | ||
|  | 
 | ||
|  |         void start_run() {} | ||
|  | 
 | ||
|  |         void finish_run() {} | ||
|  | 
 | ||
|  |         template <typename Sentinel, typename EncodingErrorFn> | ||
|  |         void process_codepoint(Sentinel end, EncodingErrorFn error_fn) { | ||
|  |             encoding.transcode_codepoint(cur, end, | ||
|  |                 boost::bind(&Callbacks::on_code_unit, | ||
|  |                             boost::ref(callbacks), _1), | ||
|  |                 error_fn); | ||
|  |         } | ||
|  | 
 | ||
|  |     private: | ||
|  |         string_callback_adapter(const string_callback_adapter&); | ||
|  | 
 | ||
|  |         Callbacks& callbacks; | ||
|  |         Encoding& encoding; | ||
|  |         Iterator& cur; | ||
|  |     }; | ||
|  | 
 | ||
|  |     template <typename Callbacks, typename Encoding, typename Iterator, | ||
|  |               typename Sentinel> | ||
|  |     class parser | ||
|  |     { | ||
|  |         typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator> | ||
|  |             number_adapter; | ||
|  |         typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator> | ||
|  |             string_adapter; | ||
|  |         typedef detail::source<Encoding, Iterator, Sentinel> source; | ||
|  |         typedef typename source::code_unit code_unit; | ||
|  | 
 | ||
|  |     public: | ||
|  |         parser(Callbacks& callbacks, Encoding& encoding) | ||
|  |             : callbacks(callbacks), encoding(encoding), src(encoding) | ||
|  |         {} | ||
|  | 
 | ||
|  |         template <typename Range> | ||
|  |         void set_input(const std::string& filename, const Range& r) { | ||
|  |             src.set_input(filename, r); | ||
|  |         } | ||
|  | 
 | ||
|  |         void finish() { | ||
|  |             skip_ws(); | ||
|  |             if (!src.done()) { | ||
|  |                 parse_error("garbage after data"); | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         void parse_value() { | ||
|  |             if (parse_object()) return; | ||
|  |             if (parse_array()) return; | ||
|  |             if (parse_string()) return; | ||
|  |             if (parse_boolean()) return; | ||
|  |             if (parse_null()) return; | ||
|  |             if (parse_number()) return; | ||
|  |             parse_error("expected value"); | ||
|  |         } | ||
|  | 
 | ||
|  |         bool parse_null() { | ||
|  |             skip_ws(); | ||
|  |             if (!have(&Encoding::is_n)) { | ||
|  |                 return false; | ||
|  |             } | ||
|  |             expect(&Encoding::is_u, "expected 'null'"); | ||
|  |             expect(&Encoding::is_l, "expected 'null'"); | ||
|  |             expect(&Encoding::is_l, "expected 'null'"); | ||
|  |             callbacks.on_null(); | ||
|  |             return true; | ||
|  |         } | ||
|  | 
 | ||
|  |         bool parse_boolean() { | ||
|  |             skip_ws(); | ||
|  |             if (have(&Encoding::is_t)) { | ||
|  |                 expect(&Encoding::is_r, "expected 'true'"); | ||
|  |                 expect(&Encoding::is_u, "expected 'true'"); | ||
|  |                 expect(&Encoding::is_e, "expected 'true'"); | ||
|  |                 callbacks.on_boolean(true); | ||
|  |                 return true; | ||
|  |             } | ||
|  |             if (have(&Encoding::is_f)) { | ||
|  |                 expect(&Encoding::is_a, "expected 'false'"); | ||
|  |                 expect(&Encoding::is_l, "expected 'false'"); | ||
|  |                 expect(&Encoding::is_s, "expected 'false'"); | ||
|  |                 expect(&Encoding::is_e, "expected 'false'"); | ||
|  |                 callbacks.on_boolean(false); | ||
|  |                 return true; | ||
|  |             } | ||
|  |             return false; | ||
|  |         } | ||
|  | 
 | ||
|  |         bool parse_number() { | ||
|  |             skip_ws(); | ||
|  | 
 | ||
|  |             number_adapter adapter(callbacks, encoding, src.raw_cur()); | ||
|  |             bool started = false; | ||
|  |             if (have(&Encoding::is_minus, adapter)) { | ||
|  |                 started = true; | ||
|  |             } | ||
|  |             if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) { | ||
|  |                 if (started) { | ||
|  |                     parse_error("expected digits after -"); | ||
|  |                 } | ||
|  |                 return false; | ||
|  |             } | ||
|  |             parse_frac_part(adapter); | ||
|  |             parse_exp_part(adapter); | ||
|  |             adapter.finish(); | ||
|  |             return true; | ||
|  |         } | ||
|  | 
 | ||
|  |         bool parse_string() { | ||
|  |             skip_ws(); | ||
|  | 
 | ||
|  |             if (!have(&Encoding::is_quote)) { | ||
|  |                 return false; | ||
|  |             } | ||
|  | 
 | ||
|  |             callbacks.on_begin_string(); | ||
|  |             string_adapter adapter(callbacks, encoding, src.raw_cur()); | ||
|  |             while (!encoding.is_quote(need_cur("unterminated string"))) { | ||
|  |                 if (encoding.is_backslash(*src.raw_cur())) { | ||
|  |                     adapter.finish_run(); | ||
|  |                     next(); | ||
|  |                     parse_escape(); | ||
|  |                     adapter.start_run(); | ||
|  |                 } else { | ||
|  |                     adapter.process_codepoint(src.raw_end(), | ||
|  |                         boost::bind(&parser::parse_error, | ||
|  |                                     this, "invalid code sequence")); | ||
|  |                 } | ||
|  |             } | ||
|  |             adapter.finish_run(); | ||
|  |             callbacks.on_end_string(); | ||
|  |             next(); | ||
|  |             return true; | ||
|  |         } | ||
|  | 
 | ||
|  |         bool parse_array() { | ||
|  |             skip_ws(); | ||
|  | 
 | ||
|  |             if (!have(&Encoding::is_open_bracket)) { | ||
|  |                 return false; | ||
|  |             } | ||
|  | 
 | ||
|  |             callbacks.on_begin_array(); | ||
|  |             skip_ws(); | ||
|  |             if (have(&Encoding::is_close_bracket)) { | ||
|  |                 callbacks.on_end_array(); | ||
|  |                 return true; | ||
|  |             } | ||
|  |             do { | ||
|  |                 parse_value(); | ||
|  |                 skip_ws(); | ||
|  |             } while (have(&Encoding::is_comma)); | ||
|  |             expect(&Encoding::is_close_bracket, "expected ']' or ','"); | ||
|  |             callbacks.on_end_array(); | ||
|  |             return true; | ||
|  |         } | ||
|  | 
 | ||
|  |         bool parse_object() { | ||
|  |             skip_ws(); | ||
|  | 
 | ||
|  |             if (!have(&Encoding::is_open_brace)) { | ||
|  |                 return false; | ||
|  |             } | ||
|  | 
 | ||
|  |             callbacks.on_begin_object(); | ||
|  |             skip_ws(); | ||
|  |             if (have(&Encoding::is_close_brace)) { | ||
|  |                 callbacks.on_end_object(); | ||
|  |                 return true; | ||
|  |             } | ||
|  |             do { | ||
|  |                 if (!parse_string()) { | ||
|  |                     parse_error("expected key string"); | ||
|  |                 } | ||
|  |                 skip_ws(); | ||
|  |                 expect(&Encoding::is_colon, "expected ':'"); | ||
|  |                 parse_value(); | ||
|  |                 skip_ws(); | ||
|  |             } while (have(&Encoding::is_comma)); | ||
|  |             expect(&Encoding::is_close_brace, "expected '}' or ','"); | ||
|  |             callbacks.on_end_object(); | ||
|  |             return true; | ||
|  |         } | ||
|  | 
 | ||
|  |     private: | ||
|  |         typedef typename source::encoding_predicate encoding_predicate; | ||
|  | 
 | ||
|  |         void parse_error(const char* msg) { src.parse_error(msg); } | ||
|  |         void next() { src.next(); } | ||
|  |         template <typename Action> | ||
|  |         bool have(encoding_predicate p, Action& a) { return src.have(p, a); } | ||
|  |         bool have(encoding_predicate p) { return src.have(p); } | ||
|  |         template <typename Action> | ||
|  |         void expect(encoding_predicate p, const char* msg, Action& a) { | ||
|  |             src.expect(p, msg, a); | ||
|  |         } | ||
|  |         void expect(encoding_predicate p, const char* msg) { | ||
|  |             src.expect(p, msg); | ||
|  |         } | ||
|  |         code_unit need_cur(const char* msg) { return src.need_cur(msg); } | ||
|  | 
 | ||
|  |         void skip_ws() { | ||
|  |             while (have(&Encoding::is_ws)) { | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         bool parse_int_part(number_adapter& action) { | ||
|  |             if (!have(&Encoding::is_digit0, action)) { | ||
|  |                 return false; | ||
|  |             } | ||
|  |             parse_digits(action); | ||
|  |             return true; | ||
|  |         } | ||
|  | 
 | ||
|  |         void parse_frac_part(number_adapter& action) { | ||
|  |             if (!have(&Encoding::is_dot, action)) { | ||
|  |                 return; | ||
|  |             } | ||
|  |             expect(&Encoding::is_digit, "need at least one digit after '.'", | ||
|  |                    action); | ||
|  |             parse_digits(action); | ||
|  |         } | ||
|  | 
 | ||
|  |         void parse_exp_part(number_adapter& action) { | ||
|  |             if (!have(&Encoding::is_eE, action)) { | ||
|  |                 return; | ||
|  |             } | ||
|  |             have(&Encoding::is_plusminus, action); | ||
|  |             expect(&Encoding::is_digit, "need at least one digit in exponent", | ||
|  |                    action); | ||
|  |             parse_digits(action); | ||
|  |         } | ||
|  | 
 | ||
|  |         void parse_digits(number_adapter& action) { | ||
|  |             while (have(&Encoding::is_digit, action)) { | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         void parse_escape() { | ||
|  |             if (have(&Encoding::is_quote)) { | ||
|  |                 feed(0x22); | ||
|  |             } else if (have(&Encoding::is_backslash)) { | ||
|  |                 feed(0x5c); | ||
|  |             } else if (have(&Encoding::is_slash)) { | ||
|  |                 feed(0x2f); | ||
|  |             } else if (have(&Encoding::is_b)) { | ||
|  |                 feed(0x08); // backspace | ||
|  |             } else if (have(&Encoding::is_f)) { | ||
|  |                 feed(0x0c); // formfeed | ||
|  |             } else if (have(&Encoding::is_n)) { | ||
|  |                 feed(0x0a); // line feed | ||
|  |             } else if (have(&Encoding::is_r)) { | ||
|  |                 feed(0x0d); // carriage return | ||
|  |             } else if (have(&Encoding::is_t)) { | ||
|  |                 feed(0x09); // horizontal tab | ||
|  |             } else if (have(&Encoding::is_u)) { | ||
|  |                 parse_codepoint_ref(); | ||
|  |             } else { | ||
|  |                 parse_error("invalid escape sequence"); | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         unsigned parse_hex_quad() { | ||
|  |             unsigned codepoint = 0; | ||
|  |             for (int i = 0; i < 4; ++i) { | ||
|  |                 int value = encoding.decode_hexdigit( | ||
|  |                     need_cur("invalid escape sequence")); | ||
|  |                 if (value < 0) { | ||
|  |                     parse_error("invalid escape sequence"); | ||
|  |                 } | ||
|  |                 codepoint *= 16; | ||
|  |                 codepoint += value; | ||
|  |                 next(); | ||
|  |             } | ||
|  |             return codepoint; | ||
|  |         } | ||
|  | 
 | ||
|  |         static bool is_surrogate_high(unsigned codepoint) { | ||
|  |             return (codepoint & 0xfc00) == 0xd800; | ||
|  |         } | ||
|  |         static bool is_surrogate_low(unsigned codepoint) { | ||
|  |             return (codepoint & 0xfc00) == 0xdc00; | ||
|  |         } | ||
|  |         static unsigned combine_surrogates(unsigned high, unsigned low) { | ||
|  |             return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff)); | ||
|  |         } | ||
|  | 
 | ||
|  |         void parse_codepoint_ref() { | ||
|  |             unsigned codepoint = parse_hex_quad(); | ||
|  |             if (is_surrogate_low(codepoint)) { | ||
|  |                 parse_error("invalid codepoint, stray low surrogate"); | ||
|  |             } | ||
|  |             if (is_surrogate_high(codepoint)) { | ||
|  |                 expect(&Encoding::is_backslash, | ||
|  |                     "invalid codepoint, stray high surrogate"); | ||
|  |                 expect(&Encoding::is_u, | ||
|  |                     "expected codepoint reference after high surrogate"); | ||
|  |                 int low = parse_hex_quad(); | ||
|  |                 if (!is_surrogate_low(low)) { | ||
|  |                     parse_error("expected low surrogate after high surrogate"); | ||
|  |                 } | ||
|  |                 codepoint = combine_surrogates(codepoint, low); | ||
|  |             } | ||
|  |             feed(codepoint); | ||
|  |         } | ||
|  | 
 | ||
|  |         void feed(unsigned codepoint) { | ||
|  |             encoding.feed_codepoint(codepoint, | ||
|  |                                     boost::bind(&Callbacks::on_code_unit, | ||
|  |                                                 boost::ref(callbacks), _1)); | ||
|  |         } | ||
|  | 
 | ||
|  |         Callbacks& callbacks; | ||
|  |         Encoding& encoding; | ||
|  |         source src; | ||
|  |     }; | ||
|  | 
 | ||
|  | }}}} | ||
|  | 
 | ||
|  | #endif |