Skip to content

Commit

Permalink
Update all_formula_basic_metadata.py
Browse files Browse the repository at this point in the history
  • Loading branch information
sgbaird committed Mar 7, 2022
1 parent f05e6bc commit c1b0941
Showing 1 changed file with 27 additions and 14 deletions.
41 changes: 27 additions & 14 deletions all_formula_basic_metadata.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from itertools import chain
import requests
import json
import numpy as np
Expand Down Expand Up @@ -64,23 +65,20 @@ def get_data(page_start_calc_id, page_size=default_page_size):
result, next_page_calc_id, n_iter = post_first_request(
page_start_calc_id, page_size=page_size
)
data = result["data"]
formulas = [
datum["formula"] if "formula" in datum.keys() else None for datum in data
]
calc_ids = [datum["calc_id"] for datum in data]
# initialize
data = []
d = result["data"]
data.append(d)

for _ in trange(n_iter):
result, next_page_calc_id = post_request(next_page_calc_id)
data = result["data"]
formula = [
datum["formula"] if "formula" in datum.keys() else "" for datum in data
]
calc_id = [datum["calc_id"] for datum in data]
formulas = formulas + formula
calc_ids = calc_ids + calc_id
result, next_page_calc_id = post_request(next_page_calc_id, page_size=page_size)
d = result["data"]
data.append(d)

print(f"merging {n_iter + 1} lists")
data = list(chain(*data))

df = pd.DataFrame({"formula": formulas, "calc_id": calc_ids}).set_index("calc_id")
df = pd.DataFrame(data).set_index("calc_id")

return df

Expand Down Expand Up @@ -132,3 +130,18 @@ def get_data(page_start_calc_id, page_size=default_page_size):
# df = pd.DataFrame(data, index=[0])

# return df

# formulas = [
# datum["formula"] if "formula" in datum.keys() else None for datum in data
# ]
# calc_ids = [datum["calc_id"] for datum in data]

# formula = [
# datum["formula"] if "formula" in datum.keys() else "" for datum in data
# ]
# calc_id = [datum["calc_id"] for datum in data]
# formulas = formulas + formula
# calc_ids = calc_ids + calc_id

# df = pd.DataFrame({"formula": formulas, "calc_id": calc_ids}).set_index("calc_id")

0 comments on commit c1b0941

Please sign in to comment.