Skip to content

Commit

Permalink
Merge pull request #80 from uuverifiers/automaton-parser
Browse files Browse the repository at this point in the history
Automaton parser
  • Loading branch information
pruemmer committed Mar 5, 2024
2 parents 144d54f + b4d948a commit 24d7ad3
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 6 deletions.
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ lazy val root = (project in file(".")).
libraryDependencies += "org.sat4j" % "org.sat4j.core" % "2.3.1",
libraryDependencies += "org.scalacheck" %% "scalacheck" % "1.14.0" % "test",
libraryDependencies += "dk.brics.automaton" % "automaton" % "1.11-8",
libraryDependencies += "com.lihaoyi" %% "fastparse" % "3.0.2",
)


74 changes: 74 additions & 0 deletions src/main/scala/ostrich/AutomatonParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package ostrich

import fastparse.Parsed
import ostrich.automata.{AtomicStateAutomaton, AtomicStateAutomatonAdapter, Automaton, BricsAutomaton, BricsAutomatonBuilder}

import scala.collection.mutable
import fastparse._
import MultiLineWhitespace._

case class Transition(from: String, to: String, range: (Int, Int))
case class AutomatonFragment(name: String, initStates: String, transitions: Seq[Transition], acceptingStates: Seq[String])


class AutomatonParser {
def identifier[_: P]: P[String] = P(CharsWhileIn("a-zA-Z_0-9").!)

def number[_: P]: P[Int] = P(CharsWhileIn("0-9").!).map(_.toInt)

def range[_: P]: P[(Int, Int)] = P("[" ~/ number ~ "," ~ number ~ "]").map { case (start, end) => (start, end) }

def transition[_: P]: P[Transition] = P(identifier ~ "->" ~ identifier ~ range ~ ";").map {
case (from, to, range) => Transition(from, to, range)
}
def init[_: P]: P[String] = P("init" ~/ identifier ~ ";")
def accepting[_: P]: P[Seq[String]] = P("accepting" ~/ identifier.rep(1, sep = ",") ~ ";").map(_.toSeq)

def automaton[_: P]: P[AutomatonFragment] = P("automaton" ~/ identifier ~ "{" ~ init ~ transition.rep ~ accepting ~ "}").map {
case (name, initStates, transitions, acceptingStates) => AutomatonFragment(name, initStates, transitions, acceptingStates)
}

def parseAutomaton(input: String): Either[String, Automaton] = parse(input, automaton(_)) match {
case Parsed.Success(value, _) => Right(automatonFromFragment(value))
case f: Parsed.Failure => Left(f.msg)
}

def automatonFromFragment(fragment: AutomatonFragment) : Automaton = {
val builder = new BricsAutomatonBuilder()
val nameToState = new mutable.HashMap[String, BricsAutomaton#State]()
val init = builder.getNewState
nameToState.put(fragment.initStates, init)
builder.setInitialState(init)

for (transition <- fragment.transitions) {
val source_state = nameToState.getOrElseUpdate(transition.from, builder.getNewState)
val destination_state = nameToState.getOrElseUpdate(transition.to, builder.getNewState)
builder.addTransition(source_state, builder.LabelOps.interval(transition.range._1.toChar,transition.range._2.toChar), destination_state)
}

for (accepting <- fragment.acceptingStates) {
val accepting_state = nameToState.getOrElseUpdate(accepting, builder.getNewState)
builder.setAccept(accepting_state, isAccepting = true)
}


val result = builder.getAutomaton
Console.err.println("Result at \n ", result)
result
}
}


/*
// Example usage
val exampleInput = """automaton value_0 {
init s0;
s0 -> s0 [0, 31];
s0 -> s1 [32, 32];
s0 -> s0 [33, 65535];
s1 -> s1 [0, 65535];
accepting s0,s1;
};"""
val result = AutomatonParser.parseAutomaton(exampleInput)
println(result)*/
6 changes: 4 additions & 2 deletions src/main/scala/ostrich/OstrichStringTheory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ class OstrichStringTheory(transducers : Seq[(String, Transducer)],
MonoSortedIFunction("re.from_ecma2020", List(SSo), RSo, true, false)
val re_from_ecma2020_flags =
MonoSortedIFunction("re.from_ecma2020_flags", List(SSo, SSo), RSo, true, false)
val re_from_automaton =
MonoSortedIFunction("re.from_automaton", List(SSo), RSo, true, false)
val re_case_insensitive =
MonoSortedIFunction("re.case_insensitive", List(RSo), RSo, true, false)
val str_at_right =
Expand Down Expand Up @@ -200,7 +202,7 @@ class OstrichStringTheory(transducers : Seq[(String, Transducer)],

val extraRegexFunctions =
List(re_begin_anchor, re_end_anchor,
re_from_ecma2020, re_from_ecma2020_flags,
re_from_ecma2020, re_from_ecma2020_flags, re_from_automaton,
re_case_insensitive,
str_at_right, str_trim,
str_replacere_longest, str_replaceallre_longest,
Expand Down Expand Up @@ -306,7 +308,7 @@ class OstrichStringTheory(transducers : Seq[(String, Transducer)],
re_comp, re_loop, re_loop_?, re_from_str, re_capture,
re_reference,
re_begin_anchor, re_end_anchor,
re_from_ecma2020, re_from_ecma2020_flags,
re_from_ecma2020, re_from_ecma2020_flags, re_from_automaton,
re_case_insensitive))
yield functionPredicateMap(f)) ++
(for (f <- List(str_len); if theoryFlags.useLength != OFlags.LengthOptions.Off)
Expand Down
20 changes: 16 additions & 4 deletions src/main/scala/ostrich/automata/Regex2Aut.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

package ostrich.automata

import ostrich.{ECMARegexParser, OFlags, OstrichStringTheory}
import ostrich.{AutomatonParser, ECMARegexParser, OFlags, OstrichStringTheory}
import ap.basetypes.IdealInt
import ap.parser.IExpression.Const
import ap.parser._
Expand Down Expand Up @@ -333,7 +333,7 @@ class Regex2Aut(theory : OstrichStringTheory) {
re_opt_?, re_loop_?,
re_opt, re_comp, re_loop, str_to_re, re_from_str, re_capture,
re_begin_anchor, re_end_anchor,
re_from_ecma2020, re_from_ecma2020_flags,
re_from_ecma2020, re_from_ecma2020_flags, re_from_automaton,
re_case_insensitive}

import Regex2Aut._
Expand Down Expand Up @@ -857,9 +857,21 @@ class Regex2Aut(theory : OstrichStringTheory) {
toBAutomaton(t, true)

def buildAut(t : ITerm,
minimize : Boolean = true) : Automaton =
new BricsAutomaton(toBAutomaton(t, minimize))
minimize : Boolean = true) : Automaton = t match
{
case IFunApp(`re_from_automaton`, Seq(EncodedString(str))) => {
val parser = new AutomatonParser()
val res = parser.parseAutomaton(str)
res match {
case Left(a) => throw new Exception("Automata parsing re.from_automata went wrong " + a)
case Right(b) => b

}
}
case _ => {
new BricsAutomaton(toBAutomaton(t, minimize))
}
}
private def numToUnicode(num : Int) : String =
new String(Character.toChars(num))

Expand Down
77 changes: 77 additions & 0 deletions src/test/scala/ostrich/automata/AutomatonParserTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package ostrich.automata

import dk.brics.automaton.{Transition, Automaton => BAutomaton}
import org.scalacheck.Prop._
import org.scalacheck.Properties
import ostrich.AutomatonParser

object AutomatonParserTest
extends Properties("AutomatonParser"){

val anyStringAut = BricsAutomaton.makeAnyString()

val aPlusbPlusAut = {
// Transducer encoding the capture group (a*)a*
val builder = new BricsAutomatonBuilder()
val init = builder.getNewState
val aplus = builder.getNewState
val bplus = builder.getNewState
builder.setInitialState(init)

builder.addTransition(init,builder.LabelOps singleton 'a', aplus)
builder.addTransition(aplus,builder.LabelOps singleton 'a', aplus)

builder.addTransition(aplus,builder.LabelOps singleton 'b', bplus)
builder.addTransition(bplus,builder.LabelOps singleton 'b', bplus)
builder.setAccept(bplus, isAccepting = true)
builder.getAutomaton
}

property("parse success") = {

val exampleInput = "automaton value_0 {init s0; s0 -> s0 [0, 31]; s0 -> s1 [32, 32]; s0 -> s0 [33, 65535]; s1 -> s1 [0, 65535]; accepting s0,s1;};"
val result = new AutomatonParser().parseAutomaton(exampleInput)
println(result)
result.isRight
}
property("parse two initial states") = {
// exactly one initial state
val exampleInput = "automaton value_0 {init s0,s1; s0 -> s0 [0, 31]; s0 -> s1 [32, 32]; s0 -> s0 [33, 65535]; s1 -> s1 [0, 65535]; accepting s0,s1;};"
val result = new AutomatonParser().parseAutomaton(exampleInput)
result.isLeft
}

property("parse no initial state") = {
// exactly one initial state
val exampleInput = "automaton value_0 {init; s0 -> s0 [0, 31]; s0 -> s1 [32, 32]; s0 -> s0 [33, 65535]; s1 -> s1 [0, 65535]; accepting s0,s1;};"
val result = new AutomatonParser().parseAutomaton(exampleInput)
result.isLeft
}

property("parse equiv check") = {
val exampleInput = "automaton value_0 {init s0; s0 -> s0 [0, 65535]; accepting s0;};"
val result = new AutomatonParser().parseAutomaton(exampleInput)
val res1 = result.isRight
val aut = result.right.get
val aut2 = !aut & anyStringAut
val res2 = aut2.isEmpty
val aut3 = aut & !anyStringAut
val res3 = aut3.isEmpty
res1 & res2 & res3
}

property("parse equiv check 2") = {
val exampleInput = "automaton value_0 {init s0; s0 -> s1 [97, 97]; s1 -> s1 [97, 97]; s1 -> s2 [98, 98]; s2 -> s2 [98, 98];accepting s2;};"
val result = new AutomatonParser().parseAutomaton(exampleInput)
val res1 = result.isRight
val aut = result.right.get

val aut2 = !aut & aPlusbPlusAut
val res2 = aut2.isEmpty

val aut3 = aut & !aPlusbPlusAut
val res3 = aut3.isEmpty

res1 & res2 & res3
}
}
3 changes: 3 additions & 0 deletions tests/automata.smt2
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
(declare-const x String)
(assert (= x "ab"))
(assert (str.in.re x (re.from_automaton "automaton value_0 {init s0; s0 -> s1 [0, 100]; s1 -> s1 [0, 65535];accepting s1;};")))

0 comments on commit 24d7ad3

Please sign in to comment.