Skip to content

Commit

Permalink
unique formula tracking and counts
Browse files Browse the repository at this point in the history
  • Loading branch information
sgbaird committed Mar 7, 2022
1 parent c1b0941 commit d756719
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions all_formula_basic_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,21 @@ def get_data(page_start_calc_id, page_size=default_page_size):

df.to_csv("all-formula.csv")

# keep track of repeated formula calc_id-s and track counts
uniq_df = (
df.reset_index()
.groupby(by="formula")
.agg({"calc_id": lambda x: tuple(x)})
.reset_index()
)
uniq_df["count"] = uniq_df["calc_id"].apply(len)

# remove "unavailable" formula and make `calc_id`-s the index
uniq_df = uniq_df[uniq_df["formula"] != "unavailable"]
uniq_df = uniq_df.set_index("calc_id")

uniq_df.to_csv("unique-formula.csv")

1 + 1

# %% Code Graveyard
Expand Down

0 comments on commit d756719

Please sign in to comment.