// Copyright (C)2009 Laurence Tratt http://tratt.net/laurie/
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.

import Builtins, CEI, CPK::Earley::DSL, CPK::Traverser, Exceptions, Strings, Sys


//
// This file implements an ultra-simple PEG parser generator. It can handle direct left-recursion
// correctly (including when direct right-recursion is also present in a rule).
//
// But it is fairly easy to understand. It uses fairly standard PEG syntax (as per Ford's original
// papers) apart from character classes are written ["..."] instead of [...] i.e. the characters
// in the character class must be enclosed in quotes (this is due to a limitation in the Converge
// tokenizer). And it produces parse trees so one can see what happened.
//

//
// This parser is structured into several sub-rules to make it more convenient for processing during
// the translation stage.
//

parse := $<<DSL::mk_parser("peg", ["state", "transition", "and", "or", "from", "to"], [])>>:
    peg ::= defn ( "NEWLINE" defn )*
    
    defn ::= "ID" "<" "-" seq_expr ( "/" seq_expr )*
    
    seq_expr ::= ( expr )*
    
    expr ::= match
           | ref
           | predicate
           | operator
           | "(" seq_expr ")"
           
    match ::= "STRING"
        | "[" "STRING" "]"
        | "."
    
    ref ::= "ID"

    predicate ::= "&" expr
        | "!" expr

    operator ::= expr "*"
        | expr "?"
        | expr "+"


func peg(dsl_block, src_infos):

    pn := parse(dsl_block, src_infos)
    
    nullables := Nullables.new().nullables(pn)
    Nullable_Calls.new().nullable_calls(pn, nullables)
    
    lrrs := Left_Recursive_Rules.new().lrrs(pn, nullables)
    Right_Recursive_Rules.new().rrrs(pn, nullables, lrrs)
    
    defns, defn_names := Peg_Translator.new().translate(pn)
    growing_init := []
    for defn_name := defn_names.iter():
        growing_init.append(CEI::idict_elem(CEI::istring(defn_name), CEI::idict([])))

    it := [|
        class:
            $c{defns}
            
            func init(self):
                self._growing := ${CEI::idict(growing_init)}
                self._limit := Set{}
        
            func parse_with_tree(self, s):
                return self.$c{defns[0].name.name}(s, 0)
    |]
    Sys::println(CEI::elided_pp_itree(it, self_module.mod_id))
    
    return it


class Non_Term:

    func init(self, name, children):
        
        self.name := name
        self.children := children


    func to_str(self):
    
        return self.pp(0)


    func pp(self, indent):
    
        pp := [self.name]
        for c := self.children.iter():
            pp.append("\n" + "  " * (indent + 1) + "-> ")
            pp.append(c.pp(indent + 1))
        
        return Strings::join(pp, "")


class Term:

    func init(self, type, val):
    
        self.type := type
        self.val := val


    func to_str(self):
    
        return self.pp(0)


    func pp(self, indent):
    
        return "<" + self.type + " " + self.val + ">"


class Nullables:

    func nullables(self, node):
    
        self._nullables := Set{}
        while 1:
            prev_len := self._nullables.len()
            self._preorder(node)
            if self._nullables.len() == prev_len:
                break
        
        return self._nullables


    func _preorder(self, node):
    
        return self.get_slot("_t_" + node.name)(node)


    func _t_peg(self, pn):

        // peg ::= defn ( "NEWLINE" defn )*

        i := 0
        while i < pn.len():
            self._preorder(pn[i])
            i += 2


    func _t_defn(self, pn):

        // defn ::= "ID" "<" "-" seq_expr ( "/" seq_expr )*

        i := 3
        while i < pn.len():
            if self._preorder(pn[i]) == 1:
                self._nullables.add(pn[0].value)
                break
            i += 2


    func _t_seq_expr(self, pn):

        // seq_expr ::= ( expr )+

        for expr := pn.iter():
            if self._preorder(expr) == 0:
                return 0
        
        return 1


    func _t_expr(self, pn):

        // expr ::= match
        //        | ref
        //        | predicate
        //        | operator
        //        | "(" seq_expr ")"
        //        | choice

        if pn.len() == 1:
            return self._preorder(pn[0])
        else:
            // pn ::= "(" seq_expr ")"
            return self._preorder(pn[1])


    func _t_match(self, pn):

        // match ::= "STRING"
        //         | "[" "STRING" "]"
        //         | "."
        
        return 0


    func _t_ref(self, pn):
    
        // ref ::= "ID"

        if self._nullables.find(pn[0].value):
            return 1
        else:
            return 0


    func _t_predicate(self, pn):

        // predicate ::= "&" expr
        //             | "!" expr
        
        return 1


    func _t_operator(self, pn):

        // operator ::= expr "*"
        //            | expr "?"
        //            | expr "+"
        
        ndif pn[1].type == "*":
            return 1
        elif pn[1].type == "?":
            return 1
        elif pn[1].type == "+":
            return 0


class Nullable_Calls:

    func nullable_calls(self, node, nullables):
    
        self._nullables := nullables
        self._preorder(node)


    func _preorder(self, node):
    
        return self.get_slot("_t_" + node.name)(node)


    func _t_peg(self, pn):

        // peg ::= defn ( "NEWLINE" defn )*

        i := 0
        while i < pn.len():
            self._preorder(pn[i])
            i += 2


    func _t_defn(self, pn):

        // defn ::= "ID" "<" "-" seq_expr ( "/" seq_expr )*

        i := 3
        while i < pn.len():
            self._preorder(pn[i])
            i += 2


    func _t_seq_expr(self, pn):

        // seq_expr ::= ( expr )+
        
        for expr := pn.riter():
            if self._preorder(expr) == 0:
                return 0

        return 1


    func _t_expr(self, pn):

        // expr ::= match
        //        | ref
        //        | predicate
        //        | operator
        //        | "(" seq_expr ")"
        //        | choice

        if pn.len() == 1:
            return self._preorder(pn[0])
        else:
            // pn ::= "(" seq_expr ")"
            return self._preorder(pn[1])


    func _t_match(self, pn):

        // match ::= "STRING"
        //         | "[" "STRING" "]"
        //         | "."
        
        return 0


    func _t_ref(self, pn):
    
        // ref ::= "ID"

        pn.rr := 1
        if self._nullables.find(pn[0].value):
            return 1
        else:
            return 0


    func _t_predicate(self, pn):

        // predicate ::= "&" expr
        //             | "!" expr
        
        return 1


    func _t_operator(self, pn):

        // operator ::= expr "*"
        //            | expr "?"
        //            | expr "+"
        
        ndif pn[1].type == "*":
            self._preorder(pn[0])
            return 1
        elif pn[1].type == "?":
            self._preorder(pn[0])
            return 1
        elif pn[1].type == "+":
            self._preorder(pn[0])
            return 0


class Left_Recursive_Rules:

    func lrrs(self, node, nullables):
    
        self._nullables := nullables
        self._lrr := Set{}
        self._preorder(node)
        
        return self._lrr


    func _preorder(self, node):
    
        return self.get_slot("_t_" + node.name)(node)


    func _t_peg(self, pn):

        // peg ::= defn ( "NEWLINE" defn )*

        i := 0
        while i < pn.len():
            self._preorder(pn[i])
            i += 2


    func _t_defn(self, pn):

        // defn ::= "ID" "<" "-" seq_expr ( "/" seq_expr )*

        self._rule := pn[0].value
        i := 3
        while i < pn.len():
            self._preorder(pn[i])
            i += 2


    func _t_seq_expr(self, pn):

        // seq_expr ::= ( expr )+
        
        for expr := pn.iter():
            if self._preorder(expr) == 0:
                return 0

        return 1


    func _t_expr(self, pn):

        // expr ::= match
        //        | ref
        //        | predicate
        //        | operator
        //        | "(" seq_expr ")"
        //        | choice

        if pn.len() == 1:
            return self._preorder(pn[0])
        else:
            // pn ::= "(" seq_expr ")"
            return self._preorder(pn[1])


    func _t_match(self, pn):

        // match ::= "STRING"
        //         | "[" "STRING" "]"
        //         | "."
        
        return 0


    func _t_ref(self, pn):
    
        // ref ::= "ID"

        if self._rule == pn[0].value:
            self._lrr.add(self._rule)

        if self._nullables.find(pn[0].value):
            return 1
        else:
            return 0


    func _t_predicate(self, pn):

        // predicate ::= "&" expr
        //             | "!" expr
        
        return 1


    func _t_operator(self, pn):

        // operator ::= expr "*"
        //            | expr "?"
        //            | expr "+"
        
        ndif pn[1].type == "*":
            self._preorder(pn[0])
            return 1
        elif pn[1].type == "?":
            self._preorder(pn[0])
            return 1
        elif pn[1].type == "+":
            self._preorder(pn[0])
            return 0


class Right_Recursive_Rules:

    func rrrs(self, node, nullables, lrrs):
    
        self._nullables := nullables
        self._lrrs := lrrs
        self._preorder(node)


    func _preorder(self, node):
    
        return self.get_slot("_t_" + node.name)(node)


    func _t_peg(self, pn):

        // peg ::= defn ( "NEWLINE" defn )*

        i := 0
        while i < pn.len():
            self._preorder(pn[i])
            i += 2


    func _t_defn(self, pn):

        // defn ::= "ID" "<" "-" seq_expr ( "/" seq_expr )*

        self._rule := pn[0].value
        i := 3
        while i < pn.len():
            self._preorder(pn[i])
            i += 2


    func _t_seq_expr(self, pn):

        // seq_expr ::= ( expr )+
        
        self._potential := 0
        for expr := pn.riter():
            if self._preorder(expr) == 0:
                return 0

        return 1


    func _t_expr(self, pn):

        // expr ::= match
        //        | ref
        //        | predicate
        //        | operator
        //        | "(" seq_expr ")"
        //        | choice

        if pn.len() == 1:
            return self._preorder(pn[0])
        else:
            // pn ::= "(" seq_expr ")"
            return self._preorder(pn[1])


    func _t_match(self, pn):

        // match ::= "STRING"
        //         | "[" "STRING" "]"
        //         | "."
        
        return 0


    func _t_ref(self, pn):
    
        // ref ::= "ID"

        if self._potential == 1 & self._rule == pn[0].value & self._lrrs.find(pn[0].value):
            CEI::error("Left-recursive rules can not contain potentially right-recursive calls", \
              pn.src_infos)

        if self._nullables.find(pn[0].value):
            return 1
        else:
            return 0


    func _t_predicate(self, pn):

        // predicate ::= "&" expr
        //             | "!" expr
        
        return 1


    func _t_operator(self, pn):

        // operator ::= expr "*"
        //            | expr "?"
        //            | expr "+"
        
        ndif pn[1].type == "*":
            self._potential := 1
            self._preorder(pn[0])
            return 1
        elif pn[1].type == "?":
            self._potential := 1
            self._preorder(pn[0])
            return 1
        elif pn[1].type == "+":
            self._preorder(pn[0])
            return 0


//
// The basic way we translate PEGs is as follows. All matching occurs on the variables &s (the string
// being matched) and &p (our current position in it). We maintain a list of the current parse tree
// in the variable whose name comes from col_nm (note that col_nm is a *pointer* to a variable name,
// not the variable itself - this is to allow for PEG nesting). If an expression is successful, it
// advances &p and updates the value pointed to by col_nm. If it is unsuccessful, it sets &p to null.
// Each subsequent part of the PEG checks to see if &p is null; if it is, it realises that an earlier
// part of the match failed, and that it should fail itself.
//
// What this means is that we manually encode backtracking in a "flat" manner. It's not entirely
// pretty, but it is efficient.
//

class Peg_Translator:

    func translate(self, node):
    
        self._defn_names := Set{}

        return [self._preorder(node), self._defn_names]


    func _preorder(self, node):
    
        return self.get_slot("_t_" + node.name)(node)


    func _col_preorder(self, node, col):
    
        return self.get_slot("_t_" + node.name)(node, col)


    func _t_peg(self, pn):

        // peg ::= defn ( "NEWLINE" defn )*

        defns := []
        i := 0
        while i < pn.len():
            defns.append(self._preorder(pn[i]))
            i += 2
        
        return defns


    func _t_defn(self, pn):

        // defn ::= "ID" "<" "-" seq_expr ( "/" seq_expr )*

        self._defn_name := pn[0].value
        self._defn_names.add(self._defn_name)
        col_nm := CEI::fresh_name()

        suc_var := CEI::ivar(CEI::fresh_name())
        exprs := []
        i := 3
        while i < pn.len():
            exprs.append([|
                old_p := &p
                if $c{suc_var} == 0:
                    $c{self._col_preorder(pn[i], col_nm)}
                    if &p is null:
                        &p := old_p
                    else:
                        $c{suc_var} := 1
            |])
            i += 2
        
        return [|
            bound_func $c{CEI::ivar("_p_" + pn[0].value)}(&self, &s, &p):
                &orig_p := &p
                $c{CEI::ivar(col_nm)} := []
                
                $c{suc_var} := 0
                $c{exprs.flattened()}
                if $c{suc_var} == 0:
                    return null
                else:
                    val := Non_Term.new(${CEI::lift(pn[0].value)}, $c{CEI::ivar(col_nm)})
                    return [&p, val]
        |]


    func _t_seq_expr(self, pn, col_nm):

        // seq_expr ::= ( expr )+

        sub_col_nm := CEI::fresh_name()
        exprs := []
        for expr := pn.iter():
            exprs.append([|
                if not &p is null:
                    $c{self._col_preorder(expr, sub_col_nm)}
            |])
        
        return [|
            $c{CEI::ivar(sub_col_nm)} := []
            $c{exprs}
            if not &p is null:
                $c{CEI::ivar(col_nm)}.extend($c{CEI::ivar(sub_col_nm)})
        |]


    func _t_expr(self, pn, col_nm):

        // expr ::= match
        //        | ref
        //        | predicate
        //        | operator
        //        | "(" seq_expr ")"
        //        | choice

        if pn.len() == 1:
            return self._col_preorder(pn[0], col_nm)
        else:
            // pn ::= "(" seq_expr ")"
            return self._col_preorder(pn[1], col_nm)


    func _t_match(self, pn, col_nm):

        // match ::= "STRING"
        //         | "[" "STRING" "]"
        //         | "."
        
        if pn[0].type == "STRING":
            return [|
                if &p + ${CEI::lift(pn[0].value.len())} <= &s.len() & \
                  &s.prefixed_by(${CEI::lift(pn[0].value)}, &p):
                    val := Term.new(${CEI::lift(pn[0].value)}, ${CEI::lift(pn[0].value)})
                    $c{CEI::ivar(col_nm)}.append(val)
                    &p += ${CEI::lift(pn[0].value.len())}
                else:
                    &p := null
            |]
        elif pn.len() == 3:
            // match ::= "[" "STRING" "]"
            cs := pn[1].value
            if cs.len() > 0 & cs[0] == "^":
                neg := 1
                i := 1
            else:
                neg := 0
                i := 0

            alts := []
            while i < cs.len():
                if i + 2 < cs.len() & cs[i + 1] == "-":
                    alts.append([| &s[&p].int_val() >= ${CEI::lift(cs[i].int_val())} & \
                      &s[&p].int_val() <= ${CEI::lift(cs[i + 2].int_val())} |])
                    i += 3
                else:
                    alts.append([| &s[&p] == ${CEI::lift(cs[i])} |])
                    i += 1

            if neg == 0:
                return [|
                    if &p < &s.len() & $c{CEI::ialternation(alts)}:
                        val := Term.new("[" + ${CEI::lift(pn[1].value)} + "]", &s[&p])
                        $c{CEI::ivar(col_nm)}.append(val)
                        &p += 1
                    else:
                        &p := null
                |]
            else:
                return [|
                    if &p < &s.len() & not $c{CEI::ialternation(alts)}:
                        val := Term.new("[" + ${CEI::lift(pn[1].value)} + "]", &s[&p])
                        $c{CEI::ivar(col_nm)}.append(val)
                        &p += 1
                    else:
                        &p := null
                |]
        else:
            // match ::= "."
            return [|
                if &p < &s.len():
                    val := Term.new(".", &s[&p])
                    $c{CEI::ivar(col_nm)}.append(val)
                    &p += 1
                else:
                    &p := null
            |]


    func _t_ref(self, pn, col_nm):
    
        // ref ::= "ID"

        normal := [|
            rtn := &self.${"_p_" + pn[0].value}(&s, &p)
            if rtn is null:
                &p := null
            else:
                &p := rtn[0]
                $c{CEI::ivar(col_nm)}.append(rtn[1])
        |]

        if self._defn_name != pn[0].value:
            return normal
        elif not pn.find_slot("rr"):
            return [|
                if seed := &self._growing[${CEI::lift(pn[0].value)}].find(&p):
                    if seed is null:
                        &p := null
                    else:
                        $c{CEI::ivar(col_nm)}.append(seed[1])
                        &p := seed[0]
                elif &orig_p == &p:
                    &self._growing[${CEI::lift(pn[0].value)}][&p] := null
                    while 1:
                        rtn := &self.${"_p_" + self._defn_name}(&s, &p)
                        if rtn is null | \
                          (not &self._growing[${CEI::lift(pn[0].value)}][&p] is null & \
                          rtn[0] <= &self._growing[${CEI::lift(pn[0].value)}][&p][0]):
                            seed := &self._growing[${CEI::lift(pn[0].value)}][&p]
                            &self._growing[${CEI::lift(pn[0].value)}].del(&p)
                            if seed is null:
                                &p := null
                                break
                            else:
                                return seed

                        &self._growing[${CEI::lift(pn[0].value)}][&p] := rtn
                else:
                    if &self._limit.find(${CEI::lift(pn[0].value)}):
                        &self._limit.del(${CEI::lift(pn[0].value)})
                        add := 1
                    else:
                        add := 0
                
                    $c{normal}

                    if add == 1:
                        &self._limit.add(${CEI::lift(pn[0].value)})
            |]
        else:
            return [|
                if &self._limit.find(${CEI::lift(pn[0].value)}):
                    &p := null
                else:
                    if &self._growing[${CEI::lift(pn[0].value)}].find(&orig_p):
                        del := 1
                        &self._limit.add(${CEI::lift(pn[0].value)})
                    else:
                        del := 0

                    $c{normal}
                    
                    if del == 1:
                        &self._limit.del(${CEI::lift(pn[0].value)})
            |]


    func _t_predicate(self, pn, col_nm):

        // predicate ::= "&" expr
        //             | "!" expr
        
        ndif pn[0].type == "&":
            dummy_col_nm := CEI::fresh_name("dummy_col_nm")
            return [|
                old_p := &p
                $c{CEI::ivar(dummy_col_nm)} := []
                $c{self._col_preorder(pn[1], dummy_col_nm)}
                if not &p is null:
                    &p := old_p
            |]

        elif pn[0].type == "!":
            dummy_col_nm := CEI::fresh_name("dummy_col_nm")
            return [|
                old_p := &p
                $c{CEI::ivar(dummy_col_nm)} := []
                $c{self._col_preorder(pn[1], dummy_col_nm)}
                if &p is null:
                    &p := old_p
                else:
                    &p := null
            |]


    func _t_operator(self, pn, col_nm):

        // operator ::= expr "*"
        //            | expr "?"
        //            | expr "+"
        
        ndif pn[1].type == "*":
            return [|
                while 1:
                    old_p := &p
                    $c{self._col_preorder(pn[0], col_nm)}
                    if &p is null:
                        &p := old_p
                        break
            |]
        elif pn[1].type == "?":
            return [|
                old_p := &p
                $c{self._col_preorder(pn[0], col_nm)}
                if &p is null:
                    &p := old_p
            |]
        elif pn[1].type == "+":
            return [|
                j := 0
                while 1:
                    old_p := &p
                    $c{self._col_preorder(pn[0], col_nm)}
                    if &p is null:
                        if j > 0:
                            &p := old_p
                        break

                    j += 1
            |]


calc := $<<peg>>:
    S <- Term
    Term <- Term "+" Term / Term "-" Term / Fact
    Fact <- Fact "*" Fact / Fact "/" Fact / Int
    
    Int <- ["0-9"]+


calc2 := $<<peg>>:
    S <- Term ! .
    
    Term <- Term "+" Fact / Term "-" Fact / Fact
    Fact <- Fact "*" Int / Fact "/" Int / Assign / List / Int
    
    Assign <- ID ":=" Term
    
    List <- "[" "]" / "[" Term "]" / "[" Term ( Entry )+ "]"
    Entry <- "," Term
    
    ID <- ["a-zA-Z_"]+
    Int <- ["0-9"]+

// If you uncomment the following PEG, you will get a compile-time error as it is both potentially
// left and and potentially right recursive.
//
//s3 := $<<peg>>:
//    expr <- "m"? expr "-" expr "M"? / num
//    num  <- ["0-9"]


func tryp(peg, s):

    rtn := peg.new().parse_with_tree(s)
    if not rtn is null:
        Sys::println("  ", rtn[1].pp(1))
    else:
        Sys::println("  <failed>")


func main():
    tryp(calc, "1-2-3")
    tryp(calc2, "1-2-3")
    tryp(calc2, "123+23")
    tryp(calc2, "123+45*67")
    tryp(calc2, "123*45+67")
    tryp(calc2, "x:=[1,2,3]+[1]")
    tryp(calc2, "x:=[1,2,3]+[z:=[2]]")