From fc4dd29393095d5a95a1aaf6dc1a20cad0381d61 Mon Sep 17 00:00:00 2001 From: Akos Kiss Date: Sun, 14 May 2023 01:34:02 +0200 Subject: [PATCH] Use JSONx to transfer parse tree between processes By default, the language of the parsers generated from grammars is Python and they are executed in-process. However, the generation of Java parsers is also supported for performance reasons, in which case the parsers are executed in a separate process and their result must be communicated back to the Python part of the reducer. The data exchange format used to be JSON, but JSON is not a first-class citizen in Java and standard edition contains no packages to read/write JSON. Unfortunately, the latest releases of ANTLR have stopped bundling a JSON implementation with the tool jar. To avoid the need for adding dependencies to the use of Java parsers, the data exchange format is changed to JSONx. Serialization to JSONx in Java is supportd by a minimal partial implementation, while deserialization in Python is supported by the XSON project. --- picireny/antlr4/hdd_tree_builder.py | 11 +- .../resources/ExtendedTargetParser.java | 109 +++++++++++++----- setup.cfg | 1 + 3 files changed, 87 insertions(+), 34 deletions(-) diff --git a/picireny/antlr4/hdd_tree_builder.py b/picireny/antlr4/hdd_tree_builder.py index bb60caa..ee096e9 100644 --- a/picireny/antlr4/hdd_tree_builder.py +++ b/picireny/antlr4/hdd_tree_builder.py @@ -5,7 +5,6 @@ # This file may not be copied, modified, or distributed except # according to those terms. -import json import logging import re import shutil @@ -18,6 +17,8 @@ from string import Template from subprocess import CalledProcessError, PIPE, run, STDOUT +import xson + from antlr4 import CommonTokenStream, error, InputStream, Token from antlr4.Token import CommonToken @@ -372,7 +373,7 @@ def set_replacement(node): logger.debug('Parse input with %s rule', start_rule) if lang != 'python': - def hdd_tree_from_json(node_dict): + def hdd_tree_from_dict(node_dict): # Convert interval dictionaries to Position objects. if 'start' in node_dict: node_dict['start'] = Position(**node_dict['start']) @@ -386,7 +387,7 @@ def hdd_tree_from_json(node_dict): if children: for child in children: - node.add_child(hdd_tree_from_json(child)) + node.add_child(hdd_tree_from_dict(child)) elif name: if name in grammar['islands']: island_nodes.append(node) @@ -398,8 +399,8 @@ def hdd_tree_from_json(node_dict): input=src, stdout=PIPE, stderr=PIPE, universal_newlines=True, cwd=current_workdir, check=True) if proc.stderr: logger.debug(proc.stderr) - result = json.loads(proc.stdout) - tree_root = hdd_tree_from_json(result) + result = xson.loads(proc.stdout) + tree_root = hdd_tree_from_dict(result) except CalledProcessError as e: logger.error('Java parser failed!\n%s\n%s', e.stdout, e.stderr) raise diff --git a/picireny/antlr4/resources/ExtendedTargetParser.java b/picireny/antlr4/resources/ExtendedTargetParser.java index e91f4ff..6e6f103 100644 --- a/picireny/antlr4/resources/ExtendedTargetParser.java +++ b/picireny/antlr4/resources/ExtendedTargetParser.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Renata Hodovan, Akos Kiss. + * Copyright (c) 2016-2023 Renata Hodovan, Akos Kiss. * * Licensed under the BSD 3-Clause License * . @@ -9,8 +9,7 @@ import java.io.*; import java.util.*; - -import javax.json.*; +import javax.xml.stream.*; import org.antlr.v4.runtime.*; import org.antlr.v4.runtime.tree.*; @@ -57,8 +56,9 @@ public static void main(String[] args) { parser.addParseListener(listener); Extended$parser_class.class.getMethod(args[0]).invoke(parser); parser.syntaxErrorWarning(); - try (JsonWriter w = Json.createWriter(System.out)) { - w.write(listener.root.createJsonObjectBuilder().build()); + + try (XsonStreamWriter w = new XsonStreamWriter(System.out)) { + w.write(null, listener.root); } } catch(Exception e) { e.printStackTrace(System.err); @@ -66,6 +66,65 @@ public static void main(String[] args) { } } + private static interface XsonObject { + public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException; + } + + /** + * XsonStreamWriter is a partial implementation for writing JSONx documents. + * It only implements the minimum required to dump HDDNode objects. + */ + private static class XsonStreamWriter implements AutoCloseable { + public static final String JSONX_PREFIX = "json"; + public static final String JSONX_NS_URI = "http://www.ibm.com/xmlns/prod/2009/jsonx"; + + private XMLStreamWriter w; + + public XsonStreamWriter(OutputStream o) throws XMLStreamException { + XMLOutputFactory factory = XMLOutputFactory.newInstance(); + factory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true); + w = factory.createXMLStreamWriter(o); + w.setPrefix(JSONX_PREFIX, JSONX_NS_URI); + } + + public void write(String name, XsonObject value) throws XMLStreamException { + w.writeStartElement(JSONX_PREFIX, "object", JSONX_NS_URI); + if (name != null) + w.writeAttribute("name", name); + value.writeXsonMembers(this); + w.writeEndElement(); + } + + public void write(String name, Iterable value) throws XMLStreamException { + w.writeStartElement(JSONX_PREFIX, "array", JSONX_NS_URI); + if (name != null) + w.writeAttribute("name", name); + for (XsonObject o : value) + write(null, o); + w.writeEndElement(); + } + + public void write(String name, int value) throws XMLStreamException { + w.writeStartElement(JSONX_PREFIX, "number", JSONX_NS_URI); + if (name != null) + w.writeAttribute("name", name); + w.writeCharacters(Integer.toString(value)); + w.writeEndElement(); + } + + public void write(String name, String value) throws XMLStreamException { + w.writeStartElement(JSONX_PREFIX, "string", JSONX_NS_URI); + if (name != null) + w.writeAttribute("name", name); + w.writeCharacters(value); + w.writeEndElement(); + } + + public void close() throws XMLStreamException { + w.close(); + } + } + /** * ExtendedTargetLexer is a subclass of the original lexer implementation. * It can recognize skipped tokens and instead of eliminating them from the parser @@ -100,7 +159,7 @@ private static class ExtendedTargetListener extends $listener_class { private HDDRule root; private boolean seen_terminal; - private static class Position { + private static class Position implements XsonObject { public int line; public int column; @@ -127,14 +186,13 @@ private static int countLineBreaks(String text) { } } - public JsonObjectBuilder createJsonObjectBuilder() { - return Json.createObjectBuilder() - .add("line", line) - .add("column", column); + public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException { + w.write("line", line); + w.write("column", column); } } - private static abstract class HDDNode { + private static abstract class HDDNode implements XsonObject { public String name; public HDDRule parent; public Position start; @@ -147,17 +205,14 @@ public HDDNode(String _name) { end = null; } - public JsonObjectBuilder createJsonObjectBuilder() { - JsonObjectBuilder builder = Json.createObjectBuilder() - .add("type", getClass().getSimpleName()); + public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException { + w.write("type", getClass().getSimpleName()); if (name != null) - builder.add("name", name); + w.write("name", name); if (start != null) - builder.add("start", start.createJsonObjectBuilder()); + w.write("start", start); if (end != null) - builder.add("end", end.createJsonObjectBuilder()); - - return builder; + w.write("end", end); } } @@ -176,13 +231,9 @@ public void addChild(HDDNode node) { node.parent = this; } - public JsonObjectBuilder createJsonObjectBuilder() { - JsonArrayBuilder children_array = Json.createArrayBuilder(); - for (HDDNode child : children) - children_array.add(child.createJsonObjectBuilder()); - - return super.createJsonObjectBuilder() - .add("children", children_array); + public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException { + super.writeXsonMembers(w); + w.write("children", children); } } @@ -196,9 +247,9 @@ public HDDToken(String _name, String _text, Position _start, Position _end) { end = _end; } - public JsonObjectBuilder createJsonObjectBuilder() { - return super.createJsonObjectBuilder() - .add("text", text); + public void writeXsonMembers(XsonStreamWriter w) throws XMLStreamException { + super.writeXsonMembers(w); + w.write("text", text); } } diff --git a/setup.cfg b/setup.cfg index c485a26..d0b7c8a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,6 +32,7 @@ install_requires = importlib-metadata; python_version < "3.8" inators picire==21.8 + xson [options.entry_points] console_scripts =