bug4556.xquery - Saxon - Saxonica Developer Community

Bug #4556 » bug4556.xquery

Gunther Rademacher, 2020-05-17 22:03
    
    xquery version "1.0" encoding "UTF-8";

    (: This file was generated on Sun May 17, 2020 21:59 (UTC+02) by REx v5.50 which is Copyright (c) 1979-2020 by Gunther Rademacher <grd@gmx.net> :)

    (: REx command line: -backtrack -xquery -a xq -tree bug4556.ebnf :)

                                                                          (: line 1 "bug4556.ebnf" :)

                                                                          declare namespace p="bug4556";

                                                                          (: line 9 "bug4556.xquery" :)

    (:~

     : The index of the lexer state for accessing the combined

     : (i.e. level > 1) lookahead code.

     :)

    declare variable $p:lk as xs:integer := 1;

    (:~

     : The index of the lexer state for accessing the position in the

     : input string of the begin of the token that has been consumed.

     :)

    declare variable $p:b0 as xs:integer := 2;

    (:~

     : The index of the lexer state for accessing the position in the

     : input string of the end of the token that has been consumed.

     :)

    declare variable $p:e0 as xs:integer := 3;

    (:~

     : The index of the lexer state for accessing the code of the

     : level-1-lookahead token.

     :)

    declare variable $p:l1 as xs:integer := 4;

    (:~

     : The index of the lexer state for accessing the position in the

     : input string of the begin of the level-1-lookahead token.

     :)

    declare variable $p:b1 as xs:integer := 5;

    (:~

     : The index of the lexer state for accessing the position in the

     : input string of the end of the level-1-lookahead token.

     :)

    declare variable $p:e1 as xs:integer := 6;

    (:~

     : The index of the lexer state for accessing the code of the

     : level-2-lookahead token.

     :)

    declare variable $p:l2 as xs:integer := 7;

    (:~

     : The index of the lexer state for accessing the position in the

     : input string of the begin of the level-2-lookahead token.

     :)

    declare variable $p:b2 as xs:integer := 8;

    (:~

     : The index of the lexer state for accessing the position in the

     : input string of the end of the level-2-lookahead token.

     :)

    declare variable $p:e2 as xs:integer := 9;

    (:~

     : The index of the lexer state for accessing the code of the

     : level-3-lookahead token.

     :)

    declare variable $p:l3 as xs:integer := 10;

    (:~

     : The index of the lexer state for accessing the position in the

     : input string of the begin of the level-3-lookahead token.

     :)

    declare variable $p:b3 as xs:integer := 11;

    (:~

     : The index of the lexer state for accessing the position in the

     : input string of the end of the level-3-lookahead token.

     :)

    declare variable $p:e3 as xs:integer := 12;

    (:~

     : The index of the lexer state for accessing the token code that

     : was expected when an error was found.

     :)

    declare variable $p:error as xs:integer := 13;

    (:~

     : The index of the lexer state for accessing the memoization

     : of backtracking results.

     :)

    declare variable $p:memo as xs:integer := 14;

    (:~

     : The index of the lexer state that points to the first entry

     : used for collecting action results.

     :)

    declare variable $p:result as xs:integer := 15;

    (:~

     : The codepoint to charclass mapping for 7 bit codepoints.

     :)

    declare variable $p:MAP0 as xs:integer+ :=

    (

      12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 4, 4, 4,

      5, 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,

      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,

      4, 4, 4, 4, 4, 4, 10, 4, 11, 4, 4

    );

    (:~

     : The codepoint to charclass mapping for codepoints below the surrogate block.

     :)

    declare variable $p:MAP1 as xs:integer+ :=

    (

      54, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,

      62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 126, 140, 156,

      171, 168, 187, 168, 201, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,

      140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,

      140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 12,

      0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 4, 4, 4, 4, 4, 5, 4, 4,

      4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 9, 4, 4, 4, 4,

      4, 4, 4, 4, 4, 4, 4, 10, 4, 11, 4, 4

    );

    (:~

     : The token-set-id to DFA-initial-state mapping.

     :)

    declare variable $p:INITIAL as xs:integer+ :=

    (

      1, 2, 3, 4, 5, 6, 7

    );

    (:~

     : The DFA transition table.

     :)

    declare variable $p:TRANSITION as xs:integer+ :=

    (

      95, 95, 95, 95, 116, 117, 64, 95, 80, 81, 70, 95, 52, 52, 56, 68, 95, 95, 56, 68, 95, 95, 74, 62, 90, 85, 56, 68, 96,

      97, 56, 68, 94, 95, 88, 68, 95, 95, 109, 68, 102, 101, 59, 77, 95, 95, 59, 106, 95, 113, 95, 95, 72, 72, 72, 72, 12,

      14, 0, 12, 0, 0, 12, 14, 0, 0, 80, 0, 13, 14, 0, 0, 11, 0, 13, 14, 0, 13, 0, 0, 0, 11, 11, 11, 0, 0, 112, 112, 0, 14,

      0, 0, 112, 0, 9, 0, 0, 0, 0, 128, 0, 10, 10, 10, 0, 0, 13, 48, 0, 0, 14, 0, 32, 96, 96, 0, 0, 80, 80, 80, 0

    );

    (:~

     : The DFA-state to expected-token-set mapping.

     :)

    declare variable $p:EXPECTED as xs:integer+ :=

    (

      14, 28, 88, 152, 60, 124, 220, 8, 2, 4, 16, 2, 2, 4

    );

    (:~

     : The token-string table.

     :)

    declare variable $p:TOKEN as xs:string+ :=

    (

      "(0)",

      "SetLiteral",

      "SetName",

      "Whitespace_Ch",

      "NewLine",

      "EOF",

      "'+'",

      "'='"

    );

    (:~

     : Match next token in input string, starting at given index, using

     : the DFA entry state for the set of tokens that are expected in

     : the current context.

     :

     : @param $input the input string.

     : @param $begin the index where to start in input string.

     : @param $token-set the expected token set id.

     : @return a sequence of three: the token code of the result token,

     : with input string begin and end positions. If there is no valid

     : token, return the negative id of the DFA state that failed, along

     : with begin and end positions of the longest viable prefix.

     :)

    declare function p:match($input as xs:string,

                             $begin as xs:integer,

                             $token-set as xs:integer) as xs:integer+

    {

      let $result := $p:INITIAL[1 + $token-set]

      return p:transition($input,

                          $begin,

                          $begin,

                          $begin,

                          $result,

                          $result mod 16,

                          0)

    };

    (:~

     : The DFA state transition function. If we are in a valid DFA state, save

     : it's result annotation, consume one input codepoint, calculate the next

     : state, and use tail recursion to do the same again. Otherwise, return

     : any valid result or a negative DFA state id in case of an error.

     :

     : @param $input the input string.

     : @param $begin the begin index of the current token in the input string.

     : @param $current the index of the current position in the input string.

     : @param $end the end index of the result in the input string.

     : @param $result the result code.

     : @param $current-state the current DFA state.

     : @param $previous-state the  previous DFA state.

     : @return a sequence of three: the token code of the result token,

     : with input string begin and end positions. If there is no valid

     : token, return the negative id of the DFA state that failed, along

     : with begin and end positions of the longest viable prefix.

     :)

    declare function p:transition($input as xs:string,

                                  $begin as xs:integer,

                                  $current as xs:integer,

                                  $end as xs:integer,

                                  $result as xs:integer,

                                  $current-state as xs:integer,

                                  $previous-state as xs:integer)

    {

      if ($current-state eq 0) then

        let $result := $result idiv 16

        let $end := if ($end gt string-length($input)) then string-length($input) + 1 else $end

        return

          if ($result ne 0) then

          (

            $result - 1,

            $begin,

            $end

          )

          else

          (

            - $previous-state,

            $begin,

            $current - 1

          )

      else

        let $c0 := (string-to-codepoints(substring($input, $current, 1)), 0)[1]

        let $c1 :=

          if ($c0 < 128) then

            $p:MAP0[1 + $c0]

          else if ($c0 < 55296) then

            let $c1 := $c0 idiv 16

            let $c2 := $c1 idiv 64

            return $p:MAP1[1 + $c0 mod 16 + $p:MAP1[1 + $c1 mod 64 + $p:MAP1[1 + $c2]]]

          else

            0

        let $current := $current + 1

        let $i0 := 16 * $c1 + $current-state - 1

        let $i1 := $i0 idiv 4

        let $next-state := $p:TRANSITION[$i0 mod 4 + $p:TRANSITION[$i1 + 1] + 1]

        return

          if ($next-state > 15) then

            p:transition($input, $begin, $current, $current, $next-state, $next-state mod 16, $current-state)

          else

            p:transition($input, $begin, $current, $end, $result, $next-state, $current-state)

    };

    (:~

     : Recursively translate one 32-bit chunk of an expected token bitset

     : to the corresponding sequence of token strings.

     :

     : @param $result the result of previous recursion levels.

     : @param $chunk the 32-bit chunk of the expected token bitset.

     : @param $base-token-code the token code of bit 0 in the current chunk.

     : @return the set of token strings.

     :)

    declare function p:token($result as xs:string*,

                             $chunk as xs:integer,

                             $base-token-code as xs:integer)

    {

      if ($chunk = 0) then

        $result

      else

        p:token

        (

          ($result, if ($chunk mod 2 != 0) then $p:TOKEN[$base-token-code] else ()),

          if ($chunk < 0) then $chunk idiv 2 + 2147483648 else $chunk idiv 2,

          $base-token-code + 1

        )

    };

    (:~

     : Calculate expected token set for a given DFA state as a sequence

     : of strings.

     :

     : @param $state the DFA state.

     : @return the set of token strings.

     :)

    declare function p:expected-token-set($state as xs:integer) as xs:string*

    {

      if ($state > 0) then

        for $t in 0 to 0

        let $i0 := $t * 14 + $state - 1

        return p:token((), $p:EXPECTED[$i0 + 1], $t * 32 + 1)

      else

        ()

    };

    (:~

     : Parse the 1st loop of production nl (one or more). Use

     : tail recursion for iteratively updating the lexer state.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-nl-1($input as xs:string, $state as item()+)

    {

      if ($state[$p:error]) then

        $state

      else

        let $state := p:consume(4, $input, $state)              (: NewLine :)

        let $state := p:lookahead1W(4, $input, $state)          (: SetName | Whitespace_Ch | NewLine | EOF :)

        return

          if ($state[$p:l1] != 4) then                          (: NewLine :)

            $state

          else

            p:parse-nl-1($input, $state)

    };

    (:~

     : Parse nl.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-nl($input as xs:string, $state as item()+) as item()+

    {

      let $count := count($state)

      let $begin := $state[$p:e0]

      let $state := p:parse-nl-1($input, $state)

      let $end := $state[$p:e0]

      return p:reduce($state, "nl", $count, $begin, $end)

    };

    (:~

     : Parse Set_Item.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-Set_Item($input as xs:string, $state as item()+) as item()+

    {

      let $count := count($state)

      let $begin := $state[$p:e0]

      let $state :=

        if ($state[$p:error]) then

          $state

        else if ($state[$p:l1] = 1) then                        (: SetLiteral :)

          let $state := p:consume(1, $input, $state)            (: SetLiteral :)

          return $state

        else

          let $state := p:consume(2, $input, $state)            (: SetName :)

          return $state

      let $end := $state[$p:e0]

      return p:reduce($state, "Set_Item", $count, $begin, $end)

    };

    (:~

     : Try parsing Set_Item.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:try-Set_Item($input as xs:string, $state as item()+) as item()+

    {

      let $state :=

        if ($state[$p:error]) then

          $state

        else if ($state[$p:l1] = 1) then                        (: SetLiteral :)

          let $state := p:consumeT(1, $input, $state)           (: SetLiteral :)

          return $state

        else

          let $state := p:consumeT(2, $input, $state)           (: SetName :)

          return $state

      return $state

    };

    (:~

     : Parse the 1st loop of production Set_Exp (zero or more). Use

     : tail recursion for iteratively updating the lexer state.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-Set_Exp-1($input as xs:string, $state as item()+)

    {

      if ($state[$p:error]) then

        $state

      else

        let $state := p:lookahead1W(2, $input, $state)          (: Whitespace_Ch | NewLine | '+' :)

        let $state :=

          if ($state[$p:l1] eq 4) then                          (: NewLine :)

            let $state := p:lookahead2W(5, $input, $state)      (: SetName | Whitespace_Ch | NewLine | EOF | '+' :)

            let $state :=

              if ($state[$p:lk] eq 68) then                     (: NewLine NewLine :)

                let $state := p:lookahead3W(5, $input, $state)  (: SetName | Whitespace_Ch | NewLine | EOF | '+' :)

                return $state

              else

                $state

            return $state

          else

            ($state[$p:l1], subsequence($state, $p:lk + 1))

        let $state :=

          if ($state[$p:error]) then

            $state

          else if ($state[$p:lk] = 1092) then                   (: NewLine NewLine NewLine :)

            let $state := p:memoized($state, 0)

            return

              if ($state[$p:lk] != 0) then

                $state

              else

                let $backtrack := $state

                let $state := p:strip-result($state)

                let $state :=

                  if ($state[$p:error]) then

                    $state

                  else

                    p:try-nl_opt($input, $state)

                let $state := p:consumeT(6, $input, $state)     (: '+' :)

                let $state := p:lookahead1W(0, $input, $state)  (: SetLiteral | SetName | Whitespace_Ch :)

                let $state :=

                  if ($state[$p:error]) then

                    $state

                  else

                    p:try-Set_Item($input, $state)

                return

                  if (not($state[$p:error])) then

                    p:memoize($backtrack, $state, 0, $backtrack[$p:e0], -1, -1)

                  else

                    p:memoize($backtrack, $state, 0, $backtrack[$p:e0], -2, -2)

          else

            $state

        return

          if ($state[$p:lk] != -1

          and $state[$p:lk] != 6                                (: '+' :)

          and $state[$p:lk] != 100                              (: NewLine '+' :)

          and $state[$p:lk] != 1604) then                       (: NewLine NewLine '+' :)

            $state

          else

            let $state := p:whitespace($input, $state)

            let $state :=

              if ($state[$p:error]) then

                $state

              else

                p:parse-nl_opt($input, $state)

            let $state := p:consume(6, $input, $state)          (: '+' :)

            let $state := p:lookahead1W(0, $input, $state)      (: SetLiteral | SetName | Whitespace_Ch :)

            let $state := p:whitespace($input, $state)

            let $state :=

              if ($state[$p:error]) then

                $state

              else

                p:parse-Set_Item($input, $state)

            return p:parse-Set_Exp-1($input, $state)

    };

    (:~

     : Parse Set_Exp.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-Set_Exp($input as xs:string, $state as item()+) as item()+

    {

      let $count := count($state)

      let $begin := $state[$p:e0]

      let $state :=

        if ($state[$p:error]) then

          $state

        else

          p:parse-Set_Item($input, $state)

      let $state := p:parse-Set_Exp-1($input, $state)

      let $end := $state[$p:e0]

      return p:reduce($state, "Set_Exp", $count, $begin, $end)

    };

    (:~

     : Parse Set_Decl.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-Set_Decl($input as xs:string, $state as item()+) as item()+

    {

      let $count := count($state)

      let $begin := $state[$p:e0]

      let $state := p:consume(2, $input, $state)                (: SetName :)

      let $state := p:lookahead1W(3, $input, $state)            (: Whitespace_Ch | NewLine | '=' :)

      let $state := p:whitespace($input, $state)

      let $state :=

        if ($state[$p:error]) then

          $state

        else

          p:parse-nl_opt($input, $state)

      let $state := p:consume(7, $input, $state)                (: '=' :)

      let $state := p:lookahead1W(0, $input, $state)            (: SetLiteral | SetName | Whitespace_Ch :)

      let $state := p:whitespace($input, $state)

      let $state :=

        if ($state[$p:error]) then

          $state

        else

          p:parse-Set_Exp($input, $state)

      let $state := p:whitespace($input, $state)

      let $state :=

        if ($state[$p:error]) then

          $state

        else

          p:parse-nl($input, $state)

      let $end := $state[$p:e0]

      return p:reduce($state, "Set_Decl", $count, $begin, $end)

    };

    (:~

     : Parse the 1st loop of production nl_opt (zero or more). Use

     : tail recursion for iteratively updating the lexer state.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-nl_opt-1($input as xs:string, $state as item()+)

    {

      if ($state[$p:error]) then

        $state

      else

        let $state := p:lookahead1W(6, $input, $state)          (: SetName | Whitespace_Ch | NewLine | '+' | '=' :)

        return

          if ($state[$p:l1] != 4) then                          (: NewLine :)

            $state

          else

            let $state := p:consume(4, $input, $state)          (: NewLine :)

            return p:parse-nl_opt-1($input, $state)

    };

    (:~

     : Try parsing the 1st loop of production nl_opt (zero or more). Use

     : tail recursion for iteratively updating the lexer state.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:try-nl_opt-1($input as xs:string, $state as item()+)

    {

      if ($state[$p:error]) then

        $state

      else

        let $state := p:lookahead1W(6, $input, $state)          (: SetName | Whitespace_Ch | NewLine | '+' | '=' :)

        return

          if ($state[$p:l1] != 4) then                          (: NewLine :)

            $state

          else

            let $state := p:consumeT(4, $input, $state)         (: NewLine :)

            return p:try-nl_opt-1($input, $state)

    };

    (:~

     : Parse nl_opt.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-nl_opt($input as xs:string, $state as item()+) as item()+

    {

      let $count := count($state)

      let $begin := $state[$p:e0]

      let $state := p:parse-nl_opt-1($input, $state)

      let $end := $state[$p:e0]

      return p:reduce($state, "nl_opt", $count, $begin, $end)

    };

    (:~

     : Try parsing nl_opt.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:try-nl_opt($input as xs:string, $state as item()+) as item()+

    {

      let $state := p:try-nl_opt-1($input, $state)

      return $state

    };

    (:~

     : Parse the 1st loop of production Grammar (one or more). Use

     : tail recursion for iteratively updating the lexer state.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-Grammar-1($input as xs:string, $state as item()+)

    {

      if ($state[$p:error]) then

        $state

      else

        let $state := p:whitespace($input, $state)

        let $state :=

          if ($state[$p:error]) then

            $state

          else

            p:parse-Set_Decl($input, $state)

        return

          if ($state[$p:l1] != 2) then                          (: SetName :)

            $state

          else

            p:parse-Grammar-1($input, $state)

    };

    (:~

     : Parse Grammar.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:parse-Grammar($input as xs:string, $state as item()+) as item()+

    {

      let $count := count($state)

      let $begin := $state[$p:e0]

      let $state := p:lookahead1W(1, $input, $state)            (: SetName | Whitespace_Ch | NewLine :)

      let $state := p:whitespace($input, $state)

      let $state :=

        if ($state[$p:error]) then

          $state

        else

          p:parse-nl_opt($input, $state)

      let $state := p:parse-Grammar-1($input, $state)

      let $state := p:consume(5, $input, $state)                (: EOF :)

      let $end := $state[$p:e0]

      return p:reduce($state, "Grammar", $count, $begin, $end)

    };

    (:~

     : Create a textual error message from a parsing error.

     :

     : @param $input the input string.

     : @param $error the parsing error descriptor.

     : @return the error message.

     :)

    declare function p:error-message($input as xs:string, $error as element(error)) as xs:string

    {

      let $begin := xs:integer($error/@b)

      let $context := string-to-codepoints(substring($input, 1, $begin - 1))

      let $linefeeds := index-of($context, 10)

      let $line := count($linefeeds) + 1

      let $column := ($begin - $linefeeds[last()], $begin)[1]

      return

        string-join

        (

          (

            if ($error/@o) then

              ("syntax error, found ", $p:TOKEN[$error/@o + 1])

            else

              "lexical analysis failed",

            "&#10;",

            "while expecting ",

            if ($error/@x) then

              $p:TOKEN[$error/@x + 1]

            else

              let $expected := p:expected-token-set($error/@s)

              return

              (

                "["[exists($expected[2])],

                string-join($expected, ", "),

                "]"[exists($expected[2])]

              ),

            "&#10;",

            if ($error/@o or $error/@e = $begin) then

              ()

            else

              ("after successfully scanning ", string($error/@e - $begin), " characters beginning "),

            "at line ", string($line), ", column ", string($column), ":&#10;",

            "...", substring($input, $begin, 64), "..."

          ),

          ""

        )

    };

    (:~

     : Consume one token, i.e. compare lookahead token 1 with expected

     : token and in case of a match, shift lookahead tokens down such that

     : l1 becomes the current token, and higher lookahead tokens move down.

     : When lookahead token 1 does not match the expected token, raise an

     : error by saving the expected token code in the error field of the

     : lexer state.

     :

     : @param $code the expected token.

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:consume($code as xs:integer, $input as xs:string, $state as item()+) as item()+

    {

      if ($state[$p:error]) then

        $state

      else if ($state[$p:l1] eq $code) then

      (

        subsequence($state, $p:l1, 9),

        0, 0, 0,

        subsequence($state, 13),

        let $begin := $state[$p:e0]

        let $end := $state[$p:b1]

        where $begin ne $end

        return

          text

          {

            substring($input, $begin, $end - $begin)

          },

        let $token := $p:TOKEN[1 + $state[$p:l1]]

        let $name := if (starts-with($token, "'")) then "TOKEN" else $token

        let $begin := $state[$p:b1]

        let $end := $state[$p:e1]

        return

          element {$name}

          {

            substring($input, $begin, $end - $begin)

          }

      )

      else

      (

        subsequence($state, 1, $p:error - 1),

        element error

        {

          if ($state[$p:e1] < $state[$p:memo]/@e) then

            $state[$p:memo]/@*

          else

          (

            attribute b {$state[$p:b1]},

            attribute e {$state[$p:e1]},

            if ($state[$p:l1] lt 0) then

              attribute s {- $state[$p:l1]}

            else

              (attribute o {$state[$p:l1]}, attribute x {$code})

          )

        },

        subsequence($state, $p:error + 1)

      )

    };

    (:~

     : Consume one token, i.e. compare lookahead token 1 with expected

     : token and in case of a match, shift lookahead tokens down such that

     : l1 becomes the current token, and higher lookahead tokens move down.

     : When lookahead token 1 does not match the expected token, raise an

     : error by saving the expected token code in the error field of the

     : lexer state. In contrast to p:consume, do not create any output.

     :

     : @param $code the expected token.

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:consumeT($code as xs:integer, $input as xs:string, $state as item()+) as item()+

    {

      if ($state[$p:error]) then

        $state

      else if ($state[$p:l1] eq $code) then

      (

        subsequence($state, $p:l1, 9),

        0, 0, 0,

        subsequence($state, 13)

      )

      else

      (

        subsequence($state, 1, $p:error - 1),

        element error

        {

          if ($state[$p:e1] < $state[$p:memo]/@e) then

            $state[$p:memo]/@*

          else

          (

            attribute b {$state[$p:b1]},

            attribute e {$state[$p:e1]},

            if ($state[$p:l1] lt 0) then

              attribute s {- $state[$p:l1]}

            else

              (attribute o {$state[$p:l1]}, attribute x {$code})

          )

        },

        subsequence($state, $p:error + 1)

      )

    };

    (:~

     : Consume whitespace.

     :

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:whitespace($input as xs:string,

                                  $state as item()+) as item()+

    {

      if ($state[$p:e0] = $state[$p:b1]) then

        $state

      else

        let $begin := $state[$p:e0]

        let $end := $state[$p:b1]

        return

        (

          0,

          $state[$p:b0],

          $end,

          subsequence($state, $p:e0 + 1),

          text

          {

            substring($input, $begin, $end - $begin)

          }

        )

    };

    (:~

     : Use p:match to fetch the next token, but skip any leading

     : whitespace.

     :

     : @param $input the input string.

     : @param $begin the index where to start.

     : @param $token-set the valid token set id.

     : @return a sequence of three values: the token code of the result

     : token, with input string positions of token begin and end.

     :)

    declare function p:matchW($input as xs:string,

                              $begin as xs:integer,

                              $token-set as xs:integer)

    {

      let $match := p:match($input, $begin, $token-set)

      return

        if ($match[1] = 3) then                                 (: Whitespace_Ch :)

          p:matchW($input, $match[3], $token-set)

        else

          $match

    };

    (:~

     : Lookahead one token on level 1 with whitespace skipping.

     :

     : @param $set the code of the DFA entry state for the set of valid tokens.

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:lookahead1W($set as xs:integer, $input as xs:string, $state as item()+) as item()+

    {

      if ($state[$p:l1] ne 0) then

        $state

      else

        let $match :=

          (

            p:matchW($input, $state[$p:e0], $set),

            0, 0, 0

          )

        return

        (

          $match[1],

          subsequence($state, $p:b0, 2),

          $match,

          subsequence($state, 10)

        )

    };

    (:~

     : Lookahead one token on level 2 with whitespace skipping.

     :

     : @param $set the code of the DFA entry state for the set of valid tokens.

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:lookahead2W($set as xs:integer, $input as xs:string, $state as item()+) as item()+

    {

      let $match :=

        if ($state[$p:l2] ne 0) then

          subsequence($state, $p:l2, 6)

        else

        (

          p:matchW($input, $state[$p:e1], $set),

          0, 0, 0

        )

      return

      (

        $match[1] * 16 + $state[$p:l1],

        subsequence($state, $p:b0, 5),

        $match,

        subsequence($state, 13)

      )

    };

    (:~

     : Lookahead one token on level 3 with whitespace skipping.

     :

     : @param $set the code of the DFA entry state for the set of valid tokens.

     : @param $input the input string.

     : @param $state lexer state, error indicator, and result stack.

     : @return the updated state.

     :)

    declare function p:lookahead3W($set as xs:integer, $input as xs:string, $state as item()+) as item()+

    {

      let $match :=

        if ($state[$p:l3] ne 0) then

          subsequence($state, $p:l3, 3)

        else

          p:matchW($input, $state[$p:e2], $set)

      return

      (

        $match[1] * 256 + $state[$p:lk],

        subsequence($state, $p:b0, 8),

        $match,

        subsequence($state, 13)

      )

    };

    (:~

     : Reduce the result stack, creating a nonterminal element. Pop

     : $count elements off the stack, wrap them in a new element

     : named $name, and push the new element.

     :

     : @param $state lexer state, error indicator, and result stack.

     : @param $name the name of the result node.

     : @param $count the number of child nodes.

     : @param $begin the input index where the nonterminal begins.

     : @param $end the input index where the nonterminal ends.

     : @return the updated state.

     :)

    declare function p:reduce($state as item()+, $name as xs:string, $count as xs:integer, $begin as xs:integer, $end as xs:integer) as item()+

    {

      subsequence($state, 1, $count),

      element {$name}

      {

        subsequence($state, $count + 1)

      }

    };

    (:~

     : Strip result from lexer state, in order to avoid carrying it while

     : backtracking.

     :

     : @param $state the lexer state after an alternative failed.

     : @return the updated state.

     :)

    declare function p:strip-result($state as item()+) as item()+

    {

      subsequence($state, 1, $p:memo)

    };

    (:~

     : Memoize the backtracking result that was computed at decision point

     : $dpi for input position $e0. Reconstruct state from the parameters.

     :

     : @param $state the lexer state to be restored.

     : @param $update the lexer state containing updates.

     : @param $dpi the decision point id.

     : @param $e0 the input position.

     : @param $v the id of the successful alternative.

     : @param $lk the new lookahead code.

     : @return the reconstructed state.

     :)

    declare function p:memoize($state as item()+,

                               $update as item()+,

                               $dpi as xs:integer,

                               $e0 as xs:integer,

                               $v as xs:integer,

                               $lk as xs:integer) as item()+

    {

      $lk,

      subsequence($state, $p:b0, $p:memo - $p:b0),

      let $memo := $update[$p:memo]

      let $errors := ($memo, $update[$p:error])[.]

      return

        element memo

        {

          $errors[@e = max($errors/xs:integer(@e))][last()]/@*,

          $memo/value,

          element value {attribute key {$e0 * 1 + $dpi}, $v}

        },

      subsequence($state, $p:memo + 1)

    };

    (:~

     : Retrieve memoized backtracking result for decision point $dpi

     : and input position $state[$p:e0] into $state[$p:lk].

     :

     : @param $state lexer state, error indicator, and result stack.

     : @param $dpi the decision point id.

     : @return the updated state.

     :)

    declare function p:memoized($state as item()+, $dpi as xs:integer) as item()+

    {

      let $value := data($state[$p:memo]/value[@key = $state[$p:e0] * 1 + $dpi])

      return

      (

        if ($value) then $value else 0,

        subsequence($state, $p:lk + 1)

      )

    };

    (:~

     : Parse start symbol Grammar from given string.

     :

     : @param $s the string to be parsed.

     : @return the result as generated by parser actions.

     :)

    declare function p:parse-Grammar($s as xs:string) as item()*

    {

      let $state := (0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, false(), <memo/>)

      let $state := p:parse-Grammar($s, $state)

      let $error := $state[$p:error]

      return

        if ($error) then

          element ERROR {$error/@*, p:error-message($s, $error)}

        else

          subsequence($state, $p:result)

    };

                                                                          (: line 26 "bug4556.ebnf" :)

                                                                          p:parse-Grammar(

                                                                          "{HT} = {#09}

                                                                          {Whitespace} = {Space}

                                                                          "

                                                                          )

                                                                          (: line 1035 "bug4556.xquery" :)

    (: End :)
« Previous
1
2
Next »
(2-2/2)
Project

Profile

Help

Saxon

Bug #4556 » bug4556.xquery