Skip to content

Commit

Permalink
Use JSONx to transfer parse tree between processes
Browse files Browse the repository at this point in the history
By default, the language of the parsers generated from grammars is
Python and they are executed in-process. However, the generation of
Java parsers is also supported for performance reasons, in which
case the parsers are executed in a separate process and their
result must be communicated back to the Python part of the reducer.

The data exchange format used to be JSON, but JSON is not a
first-class citizen in Java and standard edition contains no
packages to read/write JSON. Unfortunately, the latest releases of
ANTLR have stopped bundling a JSON implementation with the tool
jar.

To avoid the need for adding dependencies to the use of Java
parsers, the data exchange format is changed to JSONx.
Serialization to JSONx in Java is supportd by a minimal partial
implementation, while deserialization in Python is supported by the
XSON project.
  • Loading branch information
akosthekiss authored and renatahodovan committed May 14, 2023
1 parent e6d46e2 commit fc4dd29
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 34 deletions.
11 changes: 6 additions & 5 deletions picireny/antlr4/hdd_tree_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# This file may not be copied, modified, or distributed except
# according to those terms.

import json
import logging
import re
import shutil
Expand All @@ -18,6 +17,8 @@
from string import Template
from subprocess import CalledProcessError, PIPE, run, STDOUT

import xson

from antlr4 import CommonTokenStream, error, InputStream, Token
from antlr4.Token import CommonToken

Expand Down Expand Up @@ -372,7 +373,7 @@ def set_replacement(node):
logger.debug('Parse input with %s rule', start_rule)
if lang != 'python':

def hdd_tree_from_json(node_dict):
def hdd_tree_from_dict(node_dict):
# Convert interval dictionaries to Position objects.
if 'start' in node_dict:
node_dict['start'] = Position(**node_dict['start'])
Expand All @@ -386,7 +387,7 @@ def hdd_tree_from_json(node_dict):

if children:
for child in children:
node.add_child(hdd_tree_from_json(child))
node.add_child(hdd_tree_from_dict(child))
elif name:
if name in grammar['islands']:
island_nodes.append(node)
Expand All @@ -398,8 +399,8 @@ def hdd_tree_from_json(node_dict):
input=src, stdout=PIPE, stderr=PIPE, universal_newlines=True, cwd=current_workdir, check=True)
if proc.stderr:
logger.debug(proc.stderr)
result = json.loads(proc.stdout)
tree_root = hdd_tree_from_json(result)
result = xson.loads(proc.stdout)
tree_root = hdd_tree_from_dict(result)
except CalledProcessError as e:
logger.error('Java parser failed!\n%s\n%s', e.stdout, e.stderr)
raise
Expand Down
109 changes: 80 additions & 29 deletions picireny/antlr4/resources/ExtendedTargetParser.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2021 Renata Hodovan, Akos Kiss.
* Copyright (c) 2016-2023 Renata Hodovan, Akos Kiss.
*
* Licensed under the BSD 3-Clause License
* <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
Expand All @@ -9,8 +9,7 @@

import java.io.*;
import java.util.*;

import javax.json.*;
import javax.xml.stream.*;

import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.tree.*;
Expand Down Expand Up @@ -57,15 +56,75 @@ public static void main(String[] args) {
parser.addParseListener(listener);
Extended$parser_class.class.getMethod(args[0]).invoke(parser);
parser.syntaxErrorWarning();
try (JsonWriter w = Json.createWriter(System.out)) {
w.write(listener.root.createJsonObjectBuilder().build());

try (XsonStreamWriter w = new XsonStreamWriter(System.out)) {
w.write(null, listener.root);
}
} catch(Exception e) {
e.printStackTrace(System.err);
System.exit(1);
}
}

private static interface XsonObject {
public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException;
}

/**
* XsonStreamWriter is a partial implementation for writing JSONx documents.
* It only implements the minimum required to dump HDDNode objects.
*/
private static class XsonStreamWriter implements AutoCloseable {
public static final String JSONX_PREFIX = "json";
public static final String JSONX_NS_URI = "http://www.ibm.com/xmlns/prod/2009/jsonx";

private XMLStreamWriter w;

public XsonStreamWriter(OutputStream o) throws XMLStreamException {
XMLOutputFactory factory = XMLOutputFactory.newInstance();
factory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true);
w = factory.createXMLStreamWriter(o);
w.setPrefix(JSONX_PREFIX, JSONX_NS_URI);
}

public void write(String name, XsonObject value) throws XMLStreamException {
w.writeStartElement(JSONX_PREFIX, "object", JSONX_NS_URI);
if (name != null)
w.writeAttribute("name", name);
value.writeXsonMembers(this);
w.writeEndElement();
}

public void write(String name, Iterable<? extends XsonObject> value) throws XMLStreamException {
w.writeStartElement(JSONX_PREFIX, "array", JSONX_NS_URI);
if (name != null)
w.writeAttribute("name", name);
for (XsonObject o : value)
write(null, o);
w.writeEndElement();
}

public void write(String name, int value) throws XMLStreamException {
w.writeStartElement(JSONX_PREFIX, "number", JSONX_NS_URI);
if (name != null)
w.writeAttribute("name", name);
w.writeCharacters(Integer.toString(value));
w.writeEndElement();
}

public void write(String name, String value) throws XMLStreamException {
w.writeStartElement(JSONX_PREFIX, "string", JSONX_NS_URI);
if (name != null)
w.writeAttribute("name", name);
w.writeCharacters(value);
w.writeEndElement();
}

public void close() throws XMLStreamException {
w.close();
}
}

/**
* ExtendedTargetLexer is a subclass of the original lexer implementation.
* It can recognize skipped tokens and instead of eliminating them from the parser
Expand Down Expand Up @@ -100,7 +159,7 @@ private static class ExtendedTargetListener extends $listener_class {
private HDDRule root;
private boolean seen_terminal;

private static class Position {
private static class Position implements XsonObject {
public int line;
public int column;

Expand All @@ -127,14 +186,13 @@ private static int countLineBreaks(String text) {
}
}

public JsonObjectBuilder createJsonObjectBuilder() {
return Json.createObjectBuilder()
.add("line", line)
.add("column", column);
public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException {
w.write("line", line);
w.write("column", column);
}
}

private static abstract class HDDNode {
private static abstract class HDDNode implements XsonObject {
public String name;
public HDDRule parent;
public Position start;
Expand All @@ -147,17 +205,14 @@ public HDDNode(String _name) {
end = null;
}

public JsonObjectBuilder createJsonObjectBuilder() {
JsonObjectBuilder builder = Json.createObjectBuilder()
.add("type", getClass().getSimpleName());
public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException {
w.write("type", getClass().getSimpleName());
if (name != null)
builder.add("name", name);
w.write("name", name);
if (start != null)
builder.add("start", start.createJsonObjectBuilder());
w.write("start", start);
if (end != null)
builder.add("end", end.createJsonObjectBuilder());

return builder;
w.write("end", end);
}
}

Expand All @@ -176,13 +231,9 @@ public void addChild(HDDNode node) {
node.parent = this;
}

public JsonObjectBuilder createJsonObjectBuilder() {
JsonArrayBuilder children_array = Json.createArrayBuilder();
for (HDDNode child : children)
children_array.add(child.createJsonObjectBuilder());

return super.createJsonObjectBuilder()
.add("children", children_array);
public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException {
super.writeXsonMembers(w);
w.write("children", children);
}
}

Expand All @@ -196,9 +247,9 @@ public HDDToken(String _name, String _text, Position _start, Position _end) {
end = _end;
}

public JsonObjectBuilder createJsonObjectBuilder() {
return super.createJsonObjectBuilder()
.add("text", text);
public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException {
super.writeXsonMembers(w);
w.write("text", text);
}
}

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ install_requires =
importlib-metadata; python_version < "3.8"
inators
picire==21.8
xson

[options.entry_points]
console_scripts =
Expand Down

0 comments on commit fc4dd29

Please sign in to comment.