Alexander Myltsev alexander-myltsev@githublinkedin.com/in/alexandermyltsevalex_myltsev@twitter This talk: http://myltsev.name/ScalaDays2014
scala?!> val input = "1 + 2 * 3" scala?!> new Parser(input).run.arithmetical_expression? true scala?!> new Parser(input).run.AST_Structure ☂ + ✂ ♎ / \ ☎ Some ☭ 1 * ☢ ♎ / \ ❄ ❤ 2 3 ❤
can't parse recursive data: arithmetical expr, json, etc.
scala?!> new RegExParser({ "location": "Kosmos Belin", "presentation": { "name" : "parboiled2", "geeky": true } }).run
libraryDependencies+= "org.parboiled" %% "parboiled" % "2.0.0" import org.parboiled2._
abstract class Parser { def input: ParserInput } class ToyParser(val input: ParserInput) extends Parser { def InputLine = rule { "a" ~ ("b" | "c") } }
def InputLine = rule { "a" ~ ("b" | "c") }
> new ToyParser("ab").InputLine.run() Try[Unit] = Success(())
> new ToyParser("ac").InputLine.run() Try[Unit] = Success(())
> new ToyParser("d").InputLine.run() Try[Unit] = Failure(org.parboiled2.ParseError)
> val p = new ToyParser("d") p: ToyParser = ToyParser@12345 > val Failure(err: ParseError) = p.InputLine.run() err: ParseError = org.parboiled2.ParseError > p.formatError(err) String = Invalid input 'd', expected 'a' (line 1, column 1): d ^
def InputLine = rule { "a" ~ ("b" | "c") } input: "ac"InputLine starts — calls "~" subrule — cursor at 0 pos "~" matches left "a", and if succeeds then right "|" rule "a" is matched against input(0) — cursor is at 1 "|" matches left "b", and if fails then matches "c" "b" is failed to match — cursor stays at 1 "c" is succeeded to match — cursor is moved to 2 match is successful
class Rule[-I <: HList, +O <: HList]
type RuleN[L <: HList] = Rule[HNil, L]
type Rule0 = RuleN[HNil]
type Rule1[T] = RuleN[T :: HNil]
// B is pushed after A type Rule2[A, B] = RuleN[A :: B :: HNil]
type PopRule[L <: HList] = Rule[L, HNil]
implicit def ch(c: Char): Rule0 rule { 'a' } // rule { ch('a') }
implicit def str(s: String): Rule0 rule { "ab" } // rule { str("ab") } rule { 42 } // compile-time error
implicit def predicate(p: CharPredicate): Rule0 CharPredicate.Digit, CharPredicate.LowerHexLetter, etc.
def EOI: Char
def a: Rule1[Int] = // ... def b: Rule1[String] = // ... def c: Rule2[Int, String] = rule { a ~ b }
Rule[ , A ] ~ Rule[ , B ] = Rule[ , A:B ] Rule[A:B:C, D:E:F] ~ Rule[F , G:H] = Rule[A:B:C, D:E:G:H] Rule[A , B:C ] ~ Rule[D:B:C, E:F] = Rule[D:A , E:F ] Rule[A , B:C ] ~ Rule[D:C , E:F] - is Illegal if D != B
optional(e: Rule0) : Rule0 optional(e: Rule1[T]) : Rule1[Option[T]]
zeroOrMore(e: Rule0) : Rule0 zeroOrMore(e: Rule1[T]) : Rule1[Seq[T]] oneOrMore(e: Rule0) : Rule0 oneOrMore(e: Rule1[T]) : Rule1[Seq[T]]
pushes value onto ValueStack
rule { "true" ~ push(true) }
push(e: Unit) : Rule0 - pushes nothing
push(e: L <: HList) : RuleN[L] - pushes all values of L
push(e: T) : Rule1[T] - pushes a value of T
pushes additional matched string onto ValueStack
rule { capture("42") } - if input matches "42" then pushes "42"
capture(zeroOrMore(CharPredicate.Digit)) : Rule1[String]
capture("true" ~ push(true)) : Rule2[Boolean, String]
transforms top elements of the Value Stack into some other object(s)
(foo: Rule1[String]) ~> ((s: String) ⇒ s.toInt) : Rule1[Int]
(foo: Rule1[String]) ~> (s ⇒ s.toInt) : Rule1[Int]
(foo: Rule1[String]) ~> (_.toInt) : Rule1[Int]
(foo: Rule2[Int, String]) ~> ((i, s) ⇒ i + s.toInt) : Rule1[Int]
(foo: Rule1[String]) ~> (println(_)) : Rule0
(foo: Rule1[String]) ~> (() ⇒ 17) : Rule2[String, Int] (foo: Rule1[String]) ~ push(17) : Rule2[String, Int]
(foo: Rule1[String]) ~> (s ⇒ s.toInt :: 3.14 :: HNil) : Rule2[Int, Double]
case class Person(name: String, age: Int) (foo: Rule2[String, Int]) ~> Person : Rule1[Person]
parser makes progress leaving value stack unchanged
reduce "1+2+3+4+5" to (15:Int)
(number: Rule1[Int]) ~ zeroOrMore("+" ~ number) : Rule2[Int, Seq[Int]]
def reductor = "+" ~ number ~> ((a:Int, b: Int) => a + b) : Rule[Int::HNil, Int::HNil] (number: Rule1[Int]) ~ zeroOrMore(reductor) : Rule1[Int]
capture(CharPredicate.Digit) ~ optional("h" ~> ((s: String) ⇒ s + "hex")) (factor: Rule1[Int]) ~ oneOrMore("*" ~ factor ~> ((a: Int, b) ⇒ a * b))
> new Calculator("1+(2+4)*3").InputLine.run() scala.util.Success(25)
class Calculator(val input: ParserInput) extends Parser {
def Digits = rule { oneOrMore(CharPredicate.Digit) } // Rule1[Stirng] def Number = rule { capture(Digits) ~> (_.toInt) } // Rule1[Int]
def Parens = rule { '(' ~ Expression ~ ')' }
def Factor = rule { Number | Parens }
def Term = rule { Factor ~ zeroOrMore('*' ~ Factor ~> ((_: Int) * _) | '/' ~ Factor ~> ((_: Int) / _)) }
def Expression: Rule1[Int] = rule { Term ~ zeroOrMore('+' ~ Term ~> ((_: Int) + _) | '-' ~ Term ~> ((_: Int) - _)) }
def InputLine = rule { Expression ~ EOI } }
case class Rule(matched: Boolean)
abstract class Parser { def rule(r: Rule): Rule = macro ParserMacros.ruleImpl def input: ParserInput def __nextChar(): Char = // ... def __cursor: Int = // ... }
object ParserMacros { def ruleImpl(ctx: ParserContext)(r: ctx.Expr[Rule]): ctx.Expr[Rule] = { // ... } }
def ruleImpl(ctx: ParserContext)(r: ctx.Expr[Rule]): ctx.Expr[Rule] = reify { OpTree(r.tree).render().splice }input: Scala compiler AST IR: parboiled2 OpTree output: effective Scala code AST
def r = rule { "a" } def r = rule { str("a") } def r = rule { CurrentParser.this.str("a") }
r.tree = Apply(Select(This(TypeName("CurrentParser")), TermName("str")), List(Literal(Constant("a"))))
r.tree match { case Apply(Select(_, TermName("str")), List(Literal(Constant(s)))) ⇒ StringMatch(s) // s is "a" }
r.tree match { case q"$a.this.str($s)" ⇒ StringMatch(s) // a is CurrentParser // s is "a" }
object OpTree { def apply(tree: Tree): OpTree = r.tree match { case q"$a.this.str($s)" ⇒ StringMatch(s) case q"$a.this.ch($ch)" ⇒ CharMatch(ch) case q"$lhs.|($rhs)" ⇒ FirstOf(OpTree(lhs), OpTree(rhs)) case q"$lhs.~($rhs)" ⇒ Sequence(OpTree(lhs), OpTree(rhs)) } }
abstract class OpTree { def render(): Expr[Rule] } case class StringMatch(strTree: Tree) extends OpTree case class CharMatch(charTree: Tree) extends OpTree case class FirstOf(lhs: OpTree, rhs: OpTree) extends OpTree case class Sequence(lhs: OpTree, rhs: OpTree) extends OpTree
// (rule { ch('a') }).tree match { case q"$a.this.ch($charTree)" ⇒ ... } case class CharMatch(charTree: Tree) extends OpTree { def render(): Expr[Rule] = reify { val p = c.prefix.splice // Current parser val char = c.Expr[Char](charTree).splice // : Char Rule(p.__nextChar() == char) } }
// (rule { str('a') }).tree match { case q"$a.this.str($strTree)" ⇒ ... } case class StringMatch(strTree: Tree) extends OpTree { def render(): Expr[Rule] = c.Expr[Rule](q'' val p = ${c.prefix} // Current parser val str = $strTree // : String val res = str.forall(c ⇒ c == p.__nextChar()) Rule(res) '')
// (rule { str('a') }).tree match { case q"$a.this.str($strTree)" ⇒ ... } case class StringMatch(strTree: Tree) extends OpTree { def render(): Expr[Rule] = c.Expr[Rule](q'' val p = ${c.prefix} // Current parser val str = $strTree // : String var ix = 0 while (ix < str.length && str.charAt(ix) == p.__nextChar()) { ix += 1 } Rule(ix == str.length) '')
// case q"$lhs.~($rhs)" ⇒ Sequence(OpTree(lhs), OpTree(rhs)) case class Sequence(lhs: OpTree, rhs: OpTree) extends OpTree { def render(): Expr[Rule] = c.Expr[Rule](q'' val res = lhs.render().matched && rhs.render().matched Rule(res) '') }
// case q"$lhs.|($rhs)" ⇒ FirstOf(OpTree(lhs), OpTree(rhs)) case class FirstOf(lhs: OpTree, rhs: OpTree) extends OpTree { def render(): Expr[Rule] = c.Expr[Rule](q'' val mark = p.__cursor if (lhs.render().matched) { Rule(true) } else { p.__cursor = mark val res = rhs.render().matched Rule(res) } '') }
> cappi::benchmark benchmark ms linear runtime Parboiled1JsonParser 85.64 ============================== arboiled2JsonParser 13.17 ==== Argonaut 7.01 == Json4SNative 8.06 == Json4SJackson 4.09 =