Skip to content

Commit

Permalink
Python adapter objects are not dicts (#18)
Browse files Browse the repository at this point in the history
* strings converted to python objects.

* more benchmark data

* removed outliner in benchmark data, updated async iter to convert str. to json. fixed broken UTs.

---------

Co-authored-by: Salaah Amin <[email protected]>
  • Loading branch information
Salaah01 and Salaah01 committed Jul 11, 2023
1 parent 15b8aca commit 08ff3c2
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 75 deletions.
2 changes: 1 addition & 1 deletion adapters/python/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"..",
"..",
"sample_data",
"large-file2.json",
"1gb_sample.json",
)


Expand Down
5 changes: 3 additions & 2 deletions adapters/python/json_lineage/bin_interface.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import json
import os
import platform
import subprocess
Expand Down Expand Up @@ -99,7 +100,7 @@ def __next__(self) -> str:
if not line and self.process.poll() is not None:
raise StopIteration

return line
return json.loads(line)

def raise_err_if_stderr(self) -> None:
"""Raise an exception if the process has exited with a non-zero
Expand Down Expand Up @@ -161,7 +162,7 @@ async def __anext__(self) -> str:
if not output:
raise StopAsyncIteration

return output
return json.loads(output)

async def read_output(self, process: asyncio.subprocess.Process) -> str:
if process.stdout is None:
Expand Down
5 changes: 3 additions & 2 deletions adapters/python/json_lineage/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import argparse
import json

from .bin_interface import BinaryReader

Expand Down Expand Up @@ -42,14 +43,14 @@ def parse_args() -> argparse.Namespace:
def print_lines(reader: BinaryReader) -> None:
"""Prints the lines from the given reader to stdout."""
for line in reader:
print(line)
print(json.dumps(line))


def write_lines(reader: BinaryReader, filepath: str) -> None:
"""Writes the lines from the given reader to the given filepath."""
with open(filepath, "w") as f:
for line in reader:
f.write(line + "\n")
f.write(json.dumps(line) + "\n")


def main() -> None:
Expand Down
8 changes: 4 additions & 4 deletions adapters/python/tests/test_bin_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ def test_iter_next_valid(self):
correctly.
"""
iterator = iter(self.reader)
self.assertEqual(next(iterator), '{"a": {"B": 1},"b": 2}')
self.assertEqual(next(iterator), '{"a": 1,"b": 2}')
self.assertEqual(next(iterator), {"a": {"B": 1}, "b": 2})
self.assertEqual(next(iterator), {"a": 1, "b": 2})
with self.assertRaises(StopIteration):
next(iterator)

Expand Down Expand Up @@ -213,8 +213,8 @@ async def test__anext__valid(self):
correctly.
"""
iterator = self.reader.__aiter__()
self.assertEqual(await iterator.__anext__(), '{"a": {"B": 1},"b": 2}')
self.assertEqual(await iterator.__anext__(), '{"a": 1,"b": 2}')
self.assertEqual(await iterator.__anext__(), {"a": {"B": 1}, "b": 2})
self.assertEqual(await iterator.__anext__(), {"a": 1, "b": 2})
with self.assertRaises(StopAsyncIteration):
await iterator.__anext__()

Expand Down
Binary file removed docs/benchmark/chart.png
Binary file not shown.
122 changes: 61 additions & 61 deletions docs/benchmark/data.csv
Original file line number Diff line number Diff line change
@@ -1,61 +1,61 @@
Size (MB),json time (s),json_lineage time (s),json memory (MB),json_lineage memory (MB)
0.048,0.00021077,0.000845804,0.25390625,0.25390625
0.104,0.00047136,0.001612068,0.56640625,0.25390625
0.5,0.00308214,0.00314784,2.38671875,0.25390625
1,0.00581463,0.004497603,5.48046875,0.25390625
1,0.00583787,0.005626616,5.4921875,0
5,0.0262362,0.016702787,25.1875,0.76953125
5,0.02599522,0.022903366,25.47265625,0.51171875
10,0.05348607,0.030667299,49.30078125,0.76953125
10,0.05098542,0.043556251,49.30078125,0.76953125
22,0.11981642,0.094660366,109.0625,1.02734375
22.4,0.11301189,0.066785997,109.1210938,1.02734375
32,0.16630844,0.134776533,158.7421875,1.02734375
32.6,0.16941246,0.095722743,158.8476563,1.02734375
324,1.82063374,0.983030271,1580.453125,1.02734375
324,1.72193738,1.352585148,1580.398438,1.02734375
648,1.96224359,1.871552215,1893.640625,1.28515625
648,2.09448288,2.588428135,1893.636719,1.28515625
1299,3.93848725,3.616621059,3788.453125,1.28515625
1299,3.92919501,5.314683937,3788.449219,1.28515625
2599,7.89523022,7.427831112,7578.089844,1.28515625
2599,7.85149572,10.72114959,7577.972656,1.28515625
1299,3.88051242,3.603517595,3788.457031,1.03125
2599,7.7918933,7.289222741,7577.980469,1.28515625
1,0.00570846,0.00396567,5.4921875,0
5,0.02496078,0.015906791,25.4609375,0.51171875
10,0.05085998,0.030398058,49.33203125,0.76953125
22,0.11198785,0.064576183,109.0664063,1.02734375
32,0.17143015,0.096963441,158.8867188,1.02734375
324,1.72314042,0.9171876,1580.386719,1.0546875
650,1.92980589,1.831675567,1893.894531,1.02734375
32.5,0.39204991,0.268465507,158.8007813,1.3515625
32.5,0.374459068,0.25794184,158.9179688,1.33203125
32.5,0.355715643,0.264436918,159.0351563,1.265625
65.2,0.61693874,0.552779791,205.4765625,1.40234375
65.2,0.54103022,0.504108346,205.2226563,1.66015625
65.2,0.544830753,0.499249982,205.21875,1.66015625
130.5,1.410542798,1.148517098,411.34375,1.66015625
130.5,0.934719726,1.008595377,411.34375,1.66015625
130.5,1.020322171,1.048664546,411.4101563,1.59765625
261,2.007359278,1.976628878,824.2929688,1.578125
261,2.030777841,1.97742864,824.2773438,1.58984375
261,2.028148368,2.537675085,824.1875,1.68359375
522,4.564506504,3.949188775,1649.941406,1.40234375
522,3.898375265,4.742846328,1649.667969,1.671875
522,4.147978261,4.158081964,1649.945313,1.40234375
1044,11.8663244,8.044492946,3300.285156,1.76953125
1044,15.13048573,12.38840186,3300.378906,1.67578125
1044,14.38754885,13.7675003,3300.378906,1.67578125
2089,17.24552824,23.21747523,6599.136719,1.41015625
2089,16.69158588,19.35446463,6599.636719,1.55078125
2089,17.56462736,24.50044865,6587.847656,1.6015625
17.7,0.0894825869999635,0.0544712319999689,86.33203125,0.7734375
17.7,0.0906111929999724,0.0509335030000102,86.35546875,1.02734375
17.7,0.0930709350000142,0.051624127000025,86.3046875,1.02734375
9.8,0.0511048029999869,0.0286221590000081,47.8984375,0.76953125
9.8,0.0509068070000466,0.0286550430000147,47.86328125,0.76953125
9.8,0.0520795989999669,0.0290204319999816,47.82421875,0.76953125
4.75,0.0252546689999917,0.0143774709999888,23.1484375,0.51171875
4.75,0.0247309629999108,0.0140819330000567,23.13671875,0.51171875
4.75,0.025624189000041,0.0138256049999654,23.1796875,0.51171875
Size (MB),json time (s),json_lineage time (s),json memory (MB),json_lineage memory (MB)
0.048,0.000187379,0.001026665,0.25390625,0.25390625
0.048,0.000187639,0.000848341,0.25390625,0.25390625
0.048,0.000185429,0.000782916,0.25390625,0.25390625
648,1.786290982,1.764834627,1894.136719,0.76953125
648,1.748629923,1.608522877,1894.144531,0.76953125
648,1.80302736,1.635774736,1894.136719,0.76953125
0.5,0.002574576,0.002234618,2.45703125,0.25390625
0.5,0.002652391,0.002266548,2.45703125,0.25390625
0.5,0.002646586,0.002431317,2.4375,0.25390625
324,1.623446973,1.019178936,1580.472656,0.76953125
324,1.633219484,0.904567404,1580.296875,1.02734375
324,1.648815081,0.918388073,1580.375,0.875
0.1,0.000385728,0.001770351,0.34375,0.25390625
0.1,0.000391548,0.001030844,0.375,0.25390625
0.1,0.000392988,0.000955715,0.37890625,0.25390625
32,0.167733618,0.092833867,158.9726563,0.76953125
32,0.165368629,0.092652957,158.828125,0.890625
32,0.165844254,0.092207408,158.90625,0.76953125
22,0.113389825,0.06948521,109.0625,0.87109375
22,0.11228614,0.064824643,109.171875,0.76953125
22,0.116620554,0.063575595,109.2421875,0.76953125
10,0.049972365,0.031992314,49.265625,0.76953125
10,0.052166676,0.030218926,49.1796875,0.76953125
10,0.05329383,0.030508478,49.2734375,0.76953125
5,0.026881424,0.016021506,25.28125,0.76953125
5,0.026973428,0.01563254,25.2890625,0.76953125
5,0.027570056,0.01551155,25.3125,0.76953125
1,0.005546092,0.005055609,5.3046875,0.51171875
1,0.005906938,0.004042198,5.3125,0.51171875
1,0.005686951,0.004397667,5.3203125,0.51171875
2600,7.355221545,7.158330476,7578.5,0.76953125
2600,7.22098767,6.478043138,7578.480469,0.76953125
2600,7.358220967,6.567071394,7578.476563,0.76953125
1300,3.698027505,3.437218335,3788.957031,0.76953125
1300,3.567906313,3.212152002,3789.054688,0.76953125
1300,3.55010366,3.186897628,3788.949219,0.76953125
32.5,0.39026705099999504,0.4173327340000128,158.90625,0.5703125
32.5,0.41303973200001565,0.40899053300000787,159.1484375,0.82421875
32.5,0.9608042609999643,1.1909728859999973,159.0859375,0.88671875
1.2,0.017493891999947664,0.03367988400009381,4.671875,0.3671875
1.2,0.016430391000085365,0.03195728399987274,4.67578125,0.3671875
1.2,0.012793589000011707,0.030167773000130182,4.66796875,0.30859375
5.7,0.07111108200001581,0.10417817400002605,23.5,0.3671875
5.7,0.06610126099985791,0.15221917999997459,23.55859375,0.30859375
5.7,0.06116652799983058,0.11612183399984133,23.5,0.37109375
0.05,0.0007447000000411208,0.004224599000053786,0.37109375,0.37109375
0.05,0.0010039000001142995,0.003694498999948337,0.30859375,0.30859375
0.05,0.0007703000001129112,0.003491900999961217,0.37109375,0.37109375
0.17,0.0019102070000371896,0.004866018000029726,0.59375,0.37109375
0.17,0.00258639999992738,0.005157099999905768,0.58984375,0.3671875
0.17,0.0034123010000257636,0.005403102000173021,0.65234375,0.30859375
0.436,0.010152108000056614,0.015467912000076467,1.83203125,0.37109375
0.436,0.010152108000056614,0.015467912000076467,1.83203125,0.37109375
0.436,0.007171901999981856,0.008216802999868378,1.828125,0.37109375
12,0.11637160300006144,0.2233615060001739,49.9140625,0.37109375
12,0.1175036029999319,0.22289990600006604,49.84765625,0.3671875
12,0.11890860199991948,0.22787510300008762,49.91015625,0.30859375
27,0.26490470999988247,0.47356741899989174,110.703125,0.3671875
27,0.2516662139998971,0.476593628000046,110.765625,0.37109375
27,0.24983521099989048,0.48311261999992894,110.76953125,0.37109375
Binary file modified docs/benchmark/mem_diff_chart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 11 additions & 5 deletions docs/benchmark/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# This file is autogenerated by pip-compile with Python 3.8
# by the following command:
#
# pip-compile
# pip-compile requirements.in
#
build==0.10.0
# via pip-tools
click==8.1.3
click==8.1.4
# via pip-tools
contourpy==1.1.0
# via matplotlib
cycler==0.11.0
# via matplotlib
fonttools==4.40.0
# via matplotlib
importlib-resources==6.0.0
# via matplotlib
kiwisolver==1.4.4
# via matplotlib
matplotlib==3.7.1
# via
# -r requirements.in
# seaborn
numpy==1.25.0
numpy==1.24.4
# via
# contourpy
# matplotlib
Expand All @@ -34,7 +36,7 @@ pandas==2.0.3
# via
# -r requirements.in
# seaborn
pillow==9.5.0
pillow==10.0.0
# via matplotlib
pip-tools==6.13.0
# via -r requirements.in
Expand All @@ -52,10 +54,14 @@ seaborn==0.12.2
# via -r requirements.in
six==1.16.0
# via python-dateutil
tomli==2.0.1
# via build
tzdata==2023.3
# via pandas
wheel==0.40.0
# via pip-tools
zipp==3.16.0
# via importlib-resources

# The following packages are considered to be unsafe in a requirements file:
# pip
Expand Down
Binary file modified docs/benchmark/time_diff_chart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 08ff3c2

Please sign in to comment.