js8call/.svn/pristine/6d/6d16ed6e90c0046e3af97f103bcc3354573f8b99.svn-base

/*
 *
 * Copyright (c) 1998-2002
 * John Maddock
 *
 * Use, modification and distribution are subject to the 
 * Boost Software License, Version 1.0. (See accompanying file 
 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 *
 */

 /*
  *   LOCATION:    see http://www.boost.org for most recent version.
  *   FILE         states.cpp
  *   VERSION      see <boost/version.hpp>
  *   DESCRIPTION: Declares internal state machine structures.
  */

#ifndef BOOST_REGEX_V4_STATES_HPP
#define BOOST_REGEX_V4_STATES_HPP

#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4103)
#endif
#ifdef BOOST_HAS_ABI_HEADERS
#  include BOOST_ABI_PREFIX
#endif
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif

namespace boost{
namespace BOOST_REGEX_DETAIL_NS{

/*** mask_type *******************************************************
Whenever we have a choice of two alternatives, we use an array of bytes
to indicate which of the two alternatives it is possible to take for any
given input character.  If mask_take is set, then we can take the next 
state, and if mask_skip is set then we can take the alternative.
***********************************************************************/
enum mask_type
{
   mask_take = 1,
   mask_skip = 2,
   mask_init = 4,
   mask_any = mask_skip | mask_take,
   mask_all = mask_any
};

/*** helpers **********************************************************
These helpers let us use function overload resolution to detect whether
we have narrow or wide character strings:
***********************************************************************/
struct _narrow_type{};
struct _wide_type{};
template <class charT> struct is_byte;
template<>             struct is_byte<char>         { typedef _narrow_type width_type; };
template<>             struct is_byte<unsigned char>{ typedef _narrow_type width_type; };
template<>             struct is_byte<signed char>  { typedef _narrow_type width_type; };
template <class charT> struct is_byte               { typedef _wide_type width_type; };

/*** enum syntax_element_type ******************************************
Every record in the state machine falls into one of the following types:
***********************************************************************/
enum syntax_element_type
{
   // start of a marked sub-expression, or perl-style (?...) extension
   syntax_element_startmark = 0,
   // end of a marked sub-expression, or perl-style (?...) extension
   syntax_element_endmark = syntax_element_startmark + 1,
   // any sequence of literal characters
   syntax_element_literal = syntax_element_endmark + 1,
   // start of line assertion: ^
   syntax_element_start_line = syntax_element_literal + 1,
   // end of line assertion $
   syntax_element_end_line = syntax_element_start_line + 1,
   // match any character: .
   syntax_element_wild = syntax_element_end_line + 1,
   // end of expression: we have a match when we get here
   syntax_element_match = syntax_element_wild + 1,
   // perl style word boundary: \b
   syntax_element_word_boundary = syntax_element_match + 1,
   // perl style within word boundary: \B
   syntax_element_within_word = syntax_element_word_boundary + 1,
   // start of word assertion: \<
   syntax_element_word_start = syntax_element_within_word + 1,
   // end of word assertion: \>
   syntax_element_word_end = syntax_element_word_start + 1,
   // start of buffer assertion: \`
   syntax_element_buffer_start = syntax_element_word_end + 1,
   // end of buffer assertion: \'
   syntax_element_buffer_end = syntax_element_buffer_start + 1,
   // backreference to previously matched sub-expression
   syntax_element_backref = syntax_element_buffer_end + 1,
   // either a wide character set [..] or one with multicharacter collating elements:
   syntax_element_long_set = syntax_element_backref + 1,
   // narrow character set: [...]
   syntax_element_set = syntax_element_long_set + 1,
   // jump to a new state in the machine:
   syntax_element_jump = syntax_element_set + 1,
   // choose between two production states:
   syntax_element_alt = syntax_element_jump + 1,
   // a repeat
   syntax_element_rep = syntax_element_alt + 1,
   // match a combining character sequence
   syntax_element_combining = syntax_element_rep + 1,
   // perl style soft buffer end: \z
   syntax_element_soft_buffer_end = syntax_element_combining + 1,
   // perl style continuation: \G
   syntax_element_restart_continue = syntax_element_soft_buffer_end + 1,
   // single character repeats:
   syntax_element_dot_rep = syntax_element_restart_continue + 1,
   syntax_element_char_rep = syntax_element_dot_rep + 1,
   syntax_element_short_set_rep = syntax_element_char_rep + 1,
   syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
   // a backstep for lookbehind repeats:
   syntax_element_backstep = syntax_element_long_set_rep + 1,
   // an assertion that a mark was matched:
   syntax_element_assert_backref = syntax_element_backstep + 1,
   syntax_element_toggle_case = syntax_element_assert_backref + 1,
   // a recursive expression:
   syntax_element_recurse = syntax_element_toggle_case + 1,
   // Verbs:
   syntax_element_fail = syntax_element_recurse + 1,
   syntax_element_accept = syntax_element_fail + 1,
   syntax_element_commit = syntax_element_accept + 1,
   syntax_element_then = syntax_element_commit + 1
};

#ifdef BOOST_REGEX_DEBUG
// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion
std::ostream& operator<<(std::ostream&, syntax_element_type);
#endif

struct re_syntax_base;

/*** union offset_type ************************************************
Points to another state in the machine.  During machine construction
we use integral offsets, but these are converted to pointers before
execution of the machine.
***********************************************************************/
union offset_type
{
   re_syntax_base*   p;
   std::ptrdiff_t    i;
};

/*** struct re_syntax_base ********************************************
Base class for all states in the machine.
***********************************************************************/
struct re_syntax_base
{
   syntax_element_type   type;         // what kind of state this is
   offset_type           next;         // next state in the machine
};

/*** struct re_brace **************************************************
A marked parenthesis.
***********************************************************************/
struct re_brace : public re_syntax_base
{
   // The index to match, can be zero (don't mark the sub-expression)
   // or negative (for perl style (?...) extentions):
   int index;
   bool icase;
};

/*** struct re_dot **************************************************
Match anything.
***********************************************************************/
enum
{
   dont_care = 1,
   force_not_newline = 0,
   force_newline = 2,

   test_not_newline = 2,
   test_newline = 3
};
struct re_dot : public re_syntax_base
{
   unsigned char mask;
};

/*** struct re_literal ************************************************
A string of literals, following this structure will be an 
array of characters: charT[length]
***********************************************************************/
struct re_literal : public re_syntax_base
{
   unsigned int length;
};

/*** struct re_case ************************************************
Indicates whether we are moving to a case insensive block or not
***********************************************************************/
struct re_case : public re_syntax_base
{
   bool icase;
};

/*** struct re_set_long ***********************************************
A wide character set of characters, following this structure will be
an array of type charT:
First csingles null-terminated strings
Then 2 * cranges NULL terminated strings
Then cequivalents NULL terminated strings
***********************************************************************/
template <class mask_type>
struct re_set_long : public re_syntax_base
{
   unsigned int            csingles, cranges, cequivalents;
   mask_type               cclasses;
   mask_type               cnclasses;
   bool                    isnot;
   bool                    singleton;
};

/*** struct re_set ****************************************************
A set of narrow-characters, matches any of _map which is none-zero
***********************************************************************/
struct re_set : public re_syntax_base
{
   unsigned char _map[1 << CHAR_BIT];
};

/*** struct re_jump ***************************************************
Jump to a new location in the machine (not next).
***********************************************************************/
struct re_jump : public re_syntax_base
{
   offset_type     alt;                 // location to jump to
};

/*** struct re_alt ***************************************************
Jump to a new location in the machine (possibly next).
***********************************************************************/
struct re_alt : public re_jump
{
   unsigned char   _map[1 << CHAR_BIT]; // which characters can take the jump
   unsigned int    can_be_null;         // true if we match a NULL string
};

/*** struct re_repeat *************************************************
Repeat a section of the machine
***********************************************************************/
struct re_repeat : public re_alt
{
   std::size_t   min, max;  // min and max allowable repeats
   int           state_id;        // Unique identifier for this repeat
   bool          leading;   // True if this repeat is at the start of the machine (lets us optimize some searches)
   bool          greedy;    // True if this is a greedy repeat
};

/*** struct re_recurse ************************************************
Recurse to a particular subexpression.
**********************************************************************/
struct re_recurse : public re_jump
{
   int state_id;             // identifier of first nested repeat within the recursion.
};

/*** struct re_commit *************************************************
Used for the PRUNE, SKIP and COMMIT verbs which basically differ only in what happens
if no match is found and we start searching forward.
**********************************************************************/
enum commit_type
{
   commit_prune,
   commit_skip,
   commit_commit
};
struct re_commit : public re_syntax_base
{
   commit_type action;
};

/*** enum re_jump_size_type *******************************************
Provides compiled size of re_jump structure (allowing for trailing alignment).
We provide this so we know how manybytes to insert when constructing the machine
(The value of padding_mask is defined in regex_raw_buffer.hpp).
***********************************************************************/
enum re_jump_size_type
{
   re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
   re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
   re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
};

/*** proc re_is_set_member *********************************************
Forward declaration: we'll need this one later...
***********************************************************************/

template<class charT, class traits>
struct regex_data;

template <class iterator, class charT, class traits_type, class char_classT>
iterator BOOST_REGEX_CALL re_is_set_member(iterator next, 
                          iterator last, 
                          const re_set_long<char_classT>* set_, 
                          const regex_data<charT, traits_type>& e, bool icase);

} // namespace BOOST_REGEX_DETAIL_NS

} // namespace boost

#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable: 4103)
#endif
#ifdef BOOST_HAS_ABI_HEADERS
#  include BOOST_ABI_SUFFIX
#endif
#ifdef BOOST_MSVC
#pragma warning(pop)
#endif

#endif
Initial Commit 2018-02-08 21:28:33 -05:00			`/*`
			`*`
			`* Copyright (c) 1998-2002`
			`* John Maddock`
			`*`
			`* Use, modification and distribution are subject to the`
			`* Boost Software License, Version 1.0. (See accompanying file`
			`* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)`
			`*`
			`*/`

			`/*`
			`* LOCATION: see http://www.boost.org for most recent version.`
			`* FILE states.cpp`
			`* VERSION see <boost/version.hpp>`
			`* DESCRIPTION: Declares internal state machine structures.`
			`*/`

			`#ifndef BOOST_REGEX_V4_STATES_HPP`
			`#define BOOST_REGEX_V4_STATES_HPP`

			`#ifdef BOOST_MSVC`
			`#pragma warning(push)`
			`#pragma warning(disable: 4103)`
			`#endif`
			`#ifdef BOOST_HAS_ABI_HEADERS`
			`# include BOOST_ABI_PREFIX`
			`#endif`
			`#ifdef BOOST_MSVC`
			`#pragma warning(pop)`
			`#endif`

			`namespace boost{`
			`namespace BOOST_REGEX_DETAIL_NS{`

			`/* mask_type *****************************************************`
			`Whenever we have a choice of two alternatives, we use an array of bytes`
			`to indicate which of the two alternatives it is possible to take for any`
			`given input character. If mask_take is set, then we can take the next`
			`state, and if mask_skip is set then we can take the alternative.`
			`***********************************************************************/`
			`enum mask_type`
			`{`
			`mask_take = 1,`
			`mask_skip = 2,`
			`mask_init = 4,`
			`mask_any = mask_skip \| mask_take,`
			`mask_all = mask_any`
			`};`

			`/* helpers ********************************************************`
			`These helpers let us use function overload resolution to detect whether`
			`we have narrow or wide character strings:`
			`***********************************************************************/`
			`struct _narrow_type{};`
			`struct _wide_type{};`
			`template <class charT> struct is_byte;`
			`template<> struct is_byte<char> { typedef _narrow_type width_type; };`
			`template<> struct is_byte<unsigned char>{ typedef _narrow_type width_type; };`
			`template<> struct is_byte<signed char> { typedef _narrow_type width_type; };`
			`template <class charT> struct is_byte { typedef _wide_type width_type; };`

			`/* enum syntax_element_type ****************************************`
			`Every record in the state machine falls into one of the following types:`
			`***********************************************************************/`
			`enum syntax_element_type`
			`{`
			`// start of a marked sub-expression, or perl-style (?...) extension`
			`syntax_element_startmark = 0,`
			`// end of a marked sub-expression, or perl-style (?...) extension`
			`syntax_element_endmark = syntax_element_startmark + 1,`
			`// any sequence of literal characters`
			`syntax_element_literal = syntax_element_endmark + 1,`
			`// start of line assertion: ^`
			`syntax_element_start_line = syntax_element_literal + 1,`
			`// end of line assertion $`
			`syntax_element_end_line = syntax_element_start_line + 1,`
			`// match any character: .`
			`syntax_element_wild = syntax_element_end_line + 1,`
			`// end of expression: we have a match when we get here`
			`syntax_element_match = syntax_element_wild + 1,`
			`// perl style word boundary: \b`
			`syntax_element_word_boundary = syntax_element_match + 1,`
			`// perl style within word boundary: \B`
			`syntax_element_within_word = syntax_element_word_boundary + 1,`
			`// start of word assertion: \<`
			`syntax_element_word_start = syntax_element_within_word + 1,`
			`// end of word assertion: \>`
			`syntax_element_word_end = syntax_element_word_start + 1,`
			// start of buffer assertion: \`
			`syntax_element_buffer_start = syntax_element_word_end + 1,`
			`// end of buffer assertion: \'`
			`syntax_element_buffer_end = syntax_element_buffer_start + 1,`
			`// backreference to previously matched sub-expression`
			`syntax_element_backref = syntax_element_buffer_end + 1,`
			`// either a wide character set [..] or one with multicharacter collating elements:`
			`syntax_element_long_set = syntax_element_backref + 1,`
			`// narrow character set: [...]`
			`syntax_element_set = syntax_element_long_set + 1,`
			`// jump to a new state in the machine:`
			`syntax_element_jump = syntax_element_set + 1,`
			`// choose between two production states:`
			`syntax_element_alt = syntax_element_jump + 1,`
			`// a repeat`
			`syntax_element_rep = syntax_element_alt + 1,`
			`// match a combining character sequence`
			`syntax_element_combining = syntax_element_rep + 1,`
			`// perl style soft buffer end: \z`
			`syntax_element_soft_buffer_end = syntax_element_combining + 1,`
			`// perl style continuation: \G`
			`syntax_element_restart_continue = syntax_element_soft_buffer_end + 1,`
			`// single character repeats:`
			`syntax_element_dot_rep = syntax_element_restart_continue + 1,`
			`syntax_element_char_rep = syntax_element_dot_rep + 1,`
			`syntax_element_short_set_rep = syntax_element_char_rep + 1,`
			`syntax_element_long_set_rep = syntax_element_short_set_rep + 1,`
			`// a backstep for lookbehind repeats:`
			`syntax_element_backstep = syntax_element_long_set_rep + 1,`
			`// an assertion that a mark was matched:`
			`syntax_element_assert_backref = syntax_element_backstep + 1,`
			`syntax_element_toggle_case = syntax_element_assert_backref + 1,`
			`// a recursive expression:`
			`syntax_element_recurse = syntax_element_toggle_case + 1,`
			`// Verbs:`
			`syntax_element_fail = syntax_element_recurse + 1,`
			`syntax_element_accept = syntax_element_fail + 1,`
			`syntax_element_commit = syntax_element_accept + 1,`
			`syntax_element_then = syntax_element_commit + 1`
			`};`

			`#ifdef BOOST_REGEX_DEBUG`
			`// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion`
			`std::ostream& operator<<(std::ostream&, syntax_element_type);`
			`#endif`

			`struct re_syntax_base;`

			`/* union offset_type **********************************************`
			`Points to another state in the machine. During machine construction`
			`we use integral offsets, but these are converted to pointers before`
			`execution of the machine.`
			`***********************************************************************/`
			`union offset_type`
			`{`
			`re_syntax_base* p;`
			`std::ptrdiff_t i;`
			`};`

			`/* struct re_syntax_base ******************************************`
			`Base class for all states in the machine.`
			`***********************************************************************/`
			`struct re_syntax_base`
			`{`
			`syntax_element_type type; // what kind of state this is`
			`offset_type next; // next state in the machine`
			`};`

			`/* struct re_brace ************************************************`
			`A marked parenthesis.`
			`***********************************************************************/`
			`struct re_brace : public re_syntax_base`
			`{`
			`// The index to match, can be zero (don't mark the sub-expression)`
			`// or negative (for perl style (?...) extentions):`
			`int index;`
			`bool icase;`
			`};`

			`/* struct re_dot ************************************************`
			`Match anything.`
			`***********************************************************************/`
			`enum`
			`{`
			`dont_care = 1,`
			`force_not_newline = 0,`
			`force_newline = 2,`

			`test_not_newline = 2,`
			`test_newline = 3`
			`};`
			`struct re_dot : public re_syntax_base`
			`{`
			`unsigned char mask;`
			`};`

			`/* struct re_literal **********************************************`
			`A string of literals, following this structure will be an`
			`array of characters: charT[length]`
			`***********************************************************************/`
			`struct re_literal : public re_syntax_base`
			`{`
			`unsigned int length;`
			`};`

			`/* struct re_case **********************************************`
			`Indicates whether we are moving to a case insensive block or not`
			`***********************************************************************/`
			`struct re_case : public re_syntax_base`
			`{`
			`bool icase;`
			`};`

			`/* struct re_set_long *********************************************`
			`A wide character set of characters, following this structure will be`
			`an array of type charT:`
			`First csingles null-terminated strings`
			`Then 2 * cranges NULL terminated strings`
			`Then cequivalents NULL terminated strings`
			`***********************************************************************/`
			`template <class mask_type>`
			`struct re_set_long : public re_syntax_base`
			`{`
			`unsigned int csingles, cranges, cequivalents;`
			`mask_type cclasses;`
			`mask_type cnclasses;`
			`bool isnot;`
			`bool singleton;`
			`};`

			`/* struct re_set **************************************************`
			`A set of narrow-characters, matches any of _map which is none-zero`
			`***********************************************************************/`
			`struct re_set : public re_syntax_base`
			`{`
			`unsigned char _map[1 << CHAR_BIT];`
			`};`

			`/* struct re_jump *************************************************`
			`Jump to a new location in the machine (not next).`
			`***********************************************************************/`
			`struct re_jump : public re_syntax_base`
			`{`
			`offset_type alt; // location to jump to`
			`};`

			`/* struct re_alt *************************************************`
			`Jump to a new location in the machine (possibly next).`
			`***********************************************************************/`
			`struct re_alt : public re_jump`
			`{`
			`unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump`
			`unsigned int can_be_null; // true if we match a NULL string`
			`};`

			`/* struct re_repeat ***********************************************`
			`Repeat a section of the machine`
			`***********************************************************************/`
			`struct re_repeat : public re_alt`
			`{`
			`std::size_t min, max; // min and max allowable repeats`
			`int state_id; // Unique identifier for this repeat`
			`bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches)`
			`bool greedy; // True if this is a greedy repeat`
			`};`

			`/* struct re_recurse **********************************************`
			`Recurse to a particular subexpression.`
			`**********************************************************************/`
			`struct re_recurse : public re_jump`
			`{`
			`int state_id; // identifier of first nested repeat within the recursion.`
			`};`

			`/* struct re_commit ***********************************************`
			`Used for the PRUNE, SKIP and COMMIT verbs which basically differ only in what happens`
			`if no match is found and we start searching forward.`
			`**********************************************************************/`
			`enum commit_type`
			`{`
			`commit_prune,`
			`commit_skip,`
			`commit_commit`
			`};`
			`struct re_commit : public re_syntax_base`
			`{`
			`commit_type action;`
			`};`

			`/* enum re_jump_size_type *****************************************`
			`Provides compiled size of re_jump structure (allowing for trailing alignment).`
			`We provide this so we know how manybytes to insert when constructing the machine`
			`(The value of padding_mask is defined in regex_raw_buffer.hpp).`
			`***********************************************************************/`
			`enum re_jump_size_type`
			`{`
			`re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),`
			`re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),`
			`re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)`
			`};`

			`/* proc re_is_set_member *******************************************`
			`Forward declaration: we'll need this one later...`
			`***********************************************************************/`

			`template<class charT, class traits>`
			`struct regex_data;`

			`template <class iterator, class charT, class traits_type, class char_classT>`
			`iterator BOOST_REGEX_CALL re_is_set_member(iterator next,`
			`iterator last,`
			`const re_set_long<char_classT>* set_,`
			`const regex_data<charT, traits_type>& e, bool icase);`

			`} // namespace BOOST_REGEX_DETAIL_NS`

			`} // namespace boost`

			`#ifdef BOOST_MSVC`
			`#pragma warning(push)`
			`#pragma warning(disable: 4103)`
			`#endif`
			`#ifdef BOOST_HAS_ABI_HEADERS`
			`# include BOOST_ABI_SUFFIX`
			`#endif`
			`#ifdef BOOST_MSVC`
			`#pragma warning(pop)`
			`#endif`

			`#endif`