Alexander Myltsev alexander-myltsev@githublinkedin.com/in/alexandermyltsevalex_myltsev@twitter This talk: http://myltsev.name/ScalaDays2014
scala?!> val input = "1 + 2 * 3"
scala?!> new Parser(input).run.arithmetical_expression?
true
scala?!> new Parser(input).run.AST_Structure
☂ + ✂
♎ / \ ☎
Some ☭ 1 * ☢
♎ / \ ❄
❤ 2 3 ❤
can't parse recursive data: arithmetical expr, json, etc.
scala?!> new RegExParser({
"location": "Kosmos Belin",
"presentation": {
"name" : "parboiled2",
"geeky": true
}
}).run
libraryDependencies+= "org.parboiled" %% "parboiled" % "2.0.0" import org.parboiled2._
abstract class Parser {
def input: ParserInput
}
class ToyParser(val input: ParserInput) extends Parser {
def InputLine = rule { "a" ~ ("b" | "c") }
}
def InputLine = rule { "a" ~ ("b" | "c") }
> new ToyParser("ab").InputLine.run()
Try[Unit] = Success(())
> new ToyParser("ac").InputLine.run()
Try[Unit] = Success(())
> new ToyParser("d").InputLine.run()
Try[Unit] = Failure(org.parboiled2.ParseError)
> val p = new ToyParser("d")
p: ToyParser = ToyParser@12345
> val Failure(err: ParseError) = p.InputLine.run()
err: ParseError = org.parboiled2.ParseError
> p.formatError(err)
String =
Invalid input 'd', expected 'a' (line 1, column 1):
d
^
def InputLine = rule { "a" ~ ("b" | "c") }
input: "ac"
InputLine starts — calls "~" subrule — cursor at 0 pos
"~" matches left "a", and if succeeds then right "|"
rule "a" is matched against input(0) — cursor is at 1
"|" matches left "b", and if fails then matches "c"
"b" is failed to match — cursor stays at 1
"c" is succeeded to match — cursor is moved to 2
match is successful
class Rule[-I <: HList, +O <: HList]
type RuleN[L <: HList] = Rule[HNil, L]
type Rule0 = RuleN[HNil]
type Rule1[T] = RuleN[T :: HNil]
// B is pushed after A type Rule2[A, B] = RuleN[A :: B :: HNil]
type PopRule[L <: HList] = Rule[L, HNil]
implicit def ch(c: Char): Rule0
rule { 'a' } // rule { ch('a') }
implicit def str(s: String): Rule0
rule { "ab" } // rule { str("ab") }
rule { 42 } // compile-time error
implicit def predicate(p: CharPredicate): Rule0 CharPredicate.Digit, CharPredicate.LowerHexLetter, etc.
def EOI: Char
def a: Rule1[Int] = // ...
def b: Rule1[String] = // ...
def c: Rule2[Int, String] = rule { a ~ b }
Rule[ , A ] ~ Rule[ , B ] = Rule[ , A:B ] Rule[A:B:C, D:E:F] ~ Rule[F , G:H] = Rule[A:B:C, D:E:G:H] Rule[A , B:C ] ~ Rule[D:B:C, E:F] = Rule[D:A , E:F ] Rule[A , B:C ] ~ Rule[D:C , E:F] - is Illegal if D != B
optional(e: Rule0) : Rule0 optional(e: Rule1[T]) : Rule1[Option[T]]
zeroOrMore(e: Rule0) : Rule0 zeroOrMore(e: Rule1[T]) : Rule1[Seq[T]] oneOrMore(e: Rule0) : Rule0 oneOrMore(e: Rule1[T]) : Rule1[Seq[T]]
pushes value onto ValueStack
rule { "true" ~ push(true) }
push(e: Unit) : Rule0 - pushes nothing
push(e: L <: HList) : RuleN[L] - pushes all values of L
push(e: T) : Rule1[T] - pushes a value of T
pushes additional matched string onto ValueStack
rule { capture("42") } - if input matches "42" then pushes "42"
capture(zeroOrMore(CharPredicate.Digit)) : Rule1[String]
capture("true" ~ push(true)) : Rule2[Boolean, String]
transforms top elements of the Value Stack into some other object(s)
(foo: Rule1[String]) ~> ((s: String) ⇒ s.toInt) : Rule1[Int]
(foo: Rule1[String]) ~> (s ⇒ s.toInt) : Rule1[Int]
(foo: Rule1[String]) ~> (_.toInt) : Rule1[Int]
(foo: Rule2[Int, String]) ~> ((i, s) ⇒ i + s.toInt) : Rule1[Int]
(foo: Rule1[String]) ~> (println(_)) : Rule0
(foo: Rule1[String]) ~> (() ⇒ 17) : Rule2[String, Int] (foo: Rule1[String]) ~ push(17) : Rule2[String, Int]
(foo: Rule1[String]) ~> (s ⇒ s.toInt :: 3.14 :: HNil) : Rule2[Int, Double]
case class Person(name: String, age: Int) (foo: Rule2[String, Int]) ~> Person : Rule1[Person]
parser makes progress leaving value stack unchanged
reduce "1+2+3+4+5" to (15:Int)
(number: Rule1[Int]) ~ zeroOrMore("+" ~ number) : Rule2[Int, Seq[Int]]
def reductor = "+" ~ number ~> ((a:Int, b: Int) => a + b) : Rule[Int::HNil, Int::HNil] (number: Rule1[Int]) ~ zeroOrMore(reductor) : Rule1[Int]
capture(CharPredicate.Digit) ~ optional("h" ~> ((s: String) ⇒ s + "hex"))
(factor: Rule1[Int]) ~ oneOrMore("*" ~ factor ~> ((a: Int, b) ⇒ a * b))
> new Calculator("1+(2+4)*3").InputLine.run()
scala.util.Success(25)
class Calculator(val input: ParserInput) extends Parser {
def Digits = rule { oneOrMore(CharPredicate.Digit) } // Rule1[Stirng]
def Number = rule { capture(Digits) ~> (_.toInt) } // Rule1[Int]
def Parens = rule { '(' ~ Expression ~ ')' }
def Factor = rule { Number | Parens }
def Term = rule { Factor ~ zeroOrMore('*' ~ Factor ~> ((_: Int) * _)
| '/' ~ Factor ~> ((_: Int) / _)) }
def Expression: Rule1[Int] = rule {
Term ~ zeroOrMore('+' ~ Term ~> ((_: Int) + _)
| '-' ~ Term ~> ((_: Int) - _)) }
def InputLine = rule { Expression ~ EOI }
}
case class Rule(matched: Boolean)
abstract class Parser {
def rule(r: Rule): Rule = macro ParserMacros.ruleImpl
def input: ParserInput
def __nextChar(): Char = // ...
def __cursor: Int = // ...
}
object ParserMacros {
def ruleImpl(ctx: ParserContext)(r: ctx.Expr[Rule]): ctx.Expr[Rule] = {
// ...
}
}
def ruleImpl(ctx: ParserContext)(r: ctx.Expr[Rule]): ctx.Expr[Rule] =
reify {
OpTree(r.tree).render().splice
}
input: Scala compiler AST
IR: parboiled2 OpTree
output: effective Scala code AST
def r = rule { "a" }
def r = rule { str("a") }
def r = rule { CurrentParser.this.str("a") }
r.tree = Apply(Select(This(TypeName("CurrentParser")), TermName("str")),
List(Literal(Constant("a"))))
r.tree match {
case Apply(Select(_, TermName("str")), List(Literal(Constant(s)))) ⇒
StringMatch(s) // s is "a"
}
r.tree match {
case q"$a.this.str($s)" ⇒ StringMatch(s) // a is CurrentParser
// s is "a"
}
object OpTree {
def apply(tree: Tree): OpTree =
r.tree match {
case q"$a.this.str($s)" ⇒ StringMatch(s)
case q"$a.this.ch($ch)" ⇒ CharMatch(ch)
case q"$lhs.|($rhs)" ⇒ FirstOf(OpTree(lhs), OpTree(rhs))
case q"$lhs.~($rhs)" ⇒ Sequence(OpTree(lhs), OpTree(rhs))
}
}
abstract class OpTree {
def render(): Expr[Rule]
}
case class StringMatch(strTree: Tree) extends OpTree
case class CharMatch(charTree: Tree) extends OpTree
case class FirstOf(lhs: OpTree, rhs: OpTree) extends OpTree
case class Sequence(lhs: OpTree, rhs: OpTree) extends OpTree
// (rule { ch('a') }).tree match { case q"$a.this.ch($charTree)" ⇒ ... }
case class CharMatch(charTree: Tree) extends OpTree {
def render(): Expr[Rule] = reify {
val p = c.prefix.splice // Current parser
val char = c.Expr[Char](charTree).splice // : Char
Rule(p.__nextChar() == char)
}
}
// (rule { str('a') }).tree match { case q"$a.this.str($strTree)" ⇒ ... }
case class StringMatch(strTree: Tree) extends OpTree {
def render(): Expr[Rule] = c.Expr[Rule](q''
val p = ${c.prefix} // Current parser
val str = $strTree // : String
val res = str.forall(c ⇒ c == p.__nextChar())
Rule(res)
'')
// (rule { str('a') }).tree match { case q"$a.this.str($strTree)" ⇒ ... }
case class StringMatch(strTree: Tree) extends OpTree {
def render(): Expr[Rule] = c.Expr[Rule](q''
val p = ${c.prefix} // Current parser
val str = $strTree // : String
var ix = 0
while (ix < str.length && str.charAt(ix) == p.__nextChar()) {
ix += 1
}
Rule(ix == str.length)
'')
// case q"$lhs.~($rhs)" ⇒ Sequence(OpTree(lhs), OpTree(rhs))
case class Sequence(lhs: OpTree, rhs: OpTree) extends OpTree {
def render(): Expr[Rule] = c.Expr[Rule](q''
val res = lhs.render().matched && rhs.render().matched
Rule(res)
'')
}
// case q"$lhs.|($rhs)" ⇒ FirstOf(OpTree(lhs), OpTree(rhs))
case class FirstOf(lhs: OpTree, rhs: OpTree) extends OpTree {
def render(): Expr[Rule] = c.Expr[Rule](q''
val mark = p.__cursor
if (lhs.render().matched) {
Rule(true)
} else {
p.__cursor = mark
val res = rhs.render().matched
Rule(res)
}
'')
}
> cappi::benchmark
benchmark ms linear runtime
Parboiled1JsonParser 85.64 ==============================
arboiled2JsonParser 13.17 ====
Argonaut 7.01 ==
Json4SNative 8.06 ==
Json4SJackson 4.09 =