vmware · yonitoo · Apr 18, 2023 · Apr 7, 2023 · Apr 12, 2023 · Apr 12, 2023
diff --git a/examples/dag-with-args-example/README.md b/examples/dag-with-args-example/README.md
diff --git a/examples/dag-with-args-example/dag-job/config.ini b/examples/dag-with-args-example/dag-job/config.ini
@@ -0,0 +1,13 @@
+; Supported format: https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
+
+; This is the only file required to deploy a Data Job.
+; Read more to understand what each option means:
+
+; Information about the owner of the Data Job
+[owner]
+
+; Team is a way to group Data Jobs that belonged to the same team.
+team = my-team
+
+[vdk]
+meta_jobs_max_concurrent_running_jobs = 2
diff --git a/examples/dag-with-args-example/dag-job/dag_job.py b/examples/dag-with-args-example/dag-job/dag_job.py
@@ -0,0 +1,66 @@
+# Copyright 2021-2023 VMware, Inc.
+# SPDX-License-Identifier: Apache-2.0
+from vdk.plugin.meta_jobs.meta_job_runner import MetaJobInput
+
+
+JOBS_RUN_ORDER = [
+    {
+        "job_name": "ingest-job1",
+        "team_name": "my-team",
+        "fail_meta_job_on_error": True,
+        "arguments": {
+            "db_table": "test_dag_one",
+            "db_schema": "default",
+            "db_catalog": "memory",
+        },
+        "depends_on": [],
+    },
+    {
+        "job_name": "ingest-job2",
+        "team_name": "my-team",
+        "fail_meta_job_on_error": True,
+        "arguments": {
+            "db_table": "test_dag_two",
+            "db_schema": "default",
+            "db_catalog": "memory",
+        },
+        "depends_on": [],
+    },
+    {
+        "job_name": "read-job1",
+        "team_name": "my-team",
+        "fail_meta_job_on_error": True,
+        "arguments": {
+            "db_tables": ["test_dag_one", "test_dag_two"],
+            "db_schema": "default",
+            "db_catalog": "memory",
+        },
+        "depends_on": ["ingest-job1", "ingest-job2"],
+    },
+    {
+        "job_name": "read-job2",
+        "team_name": "my-team",
+        "fail_meta_job_on_error": True,
+        "arguments": {
+            "db_tables": ["test_dag_one", "test_dag_two"],
+            "db_schema": "default",
+            "db_catalog": "memory",
+        },
+        "depends_on": ["ingest-job1", "ingest-job2"],
+    },
+    {
+        "job_name": "read-job3",
+        "team_name": "my-team",
+        "fail_meta_job_on_error": True,
+        "arguments": {
+            "db_tables": ["test_dag_one", "test_dag_two"],
+            "db_schema": "default",
+            "db_catalog": "memory",
+        },
+        "depends_on": ["ingest-job1", "ingest-job2"],
+    },
+]
+
+
+def run(job_input):
+    MetaJobInput().run_meta_job(JOBS_RUN_ORDER)
diff --git a/examples/dag-with-args-example/dag-job/requirements.txt b/examples/dag-with-args-example/dag-job/requirements.txt
diff --git a/examples/dag-with-args-example/images/dag.png b/examples/dag-with-args-example/images/dag.png
diff --git a/examples/dag-with-args-example/ingest-job1/01_drop_table.sql b/examples/dag-with-args-example/ingest-job1/01_drop_table.sql
@@ -0,0 +1 @@
+drop table if exists memory.default.test_dag_one
diff --git a/examples/dag-with-args-example/ingest-job1/10_insert_data.py b/examples/dag-with-args-example/ingest-job1/10_insert_data.py
@@ -0,0 +1,45 @@
+# Copyright 2021-2023 VMware, Inc.
+# SPDX-License-Identifier: Apache-2.0
+import json
+import pathlib
+
+from vdk.api.job_input import IJobInput
+
+
+def run(job_input: IJobInput):
+    data_job_dir = pathlib.Path(job_input.get_job_directory())
+    data_file = data_job_dir / "data.json"
+
+    db_catalog = job_input.get_arguments().get("db_catalog")
+    db_schema = job_input.get_arguments().get("db_schema")
+    db_table = job_input.get_arguments().get("db_table")
+
+    if data_file.exists():
+        with open(data_file) as f:
+            data = json.load(f)
+
+        rows = [tuple(i.values()) for i in data]
+        insert_query = f"""
+        INSERT INTO {db_catalog}.{db_schema}.{db_table} VALUES
+        """ + ", ".join(
+            str(i) for i in rows
+        )
+
+        create_query = f"""
+        CREATE TABLE IF NOT EXISTS {db_catalog}.{db_schema}.{db_table}
+        (
+            id varchar,
+            first_name varchar,
+            last_name varchar,
+            city varchar,
+            country varchar,
+            phone varchar
+        )
+        """
+
+        job_input.execute_query(create_query)
+        job_input.execute_query(insert_query)
+
+        print("Success! The data was send trino.")
+    else:
+        print("No data File Available! Exiting job execution!")
diff --git a/examples/dag-with-args-example/ingest-job1/config.ini b/examples/dag-with-args-example/ingest-job1/config.ini
@@ -0,0 +1,14 @@
+; Supported format: https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
+
+; This is the only file required to deploy a Data Job.
+; Read more to understand what each option means:
+
+; Information about the owner of the Data Job
+[owner]
+
+; Team is a way to group Data Jobs that belonged to the same team.
+team = my-team
+
+; Configuration related to running data jobs
+[job]
+db_default_type = TRINO
diff --git a/examples/dag-with-args-example/ingest-job1/data.json b/examples/dag-with-args-example/ingest-job1/data.json
@@ -0,0 +1 @@
+[{"id":"18","FirstName":"Michelle","LastName":"Brooks","City":"New York","Country":"USA","Phone":"+1 (212) 221-3546"},{"id":"19","FirstName":"Tim","LastName":"Goyer","City":"Cupertino","Country":"USA","Phone":"+1 (408) 996-1010"},{"id":"20","FirstName":"Dan","LastName":"Miller","City":"Mountain View","Country":"USA","Phone":"+ 1(650) 644 - 3358"},{"id":"21","FirstName":"Kathy","LastName":"Chase","City":"Reno","Country":"USA","Phone":"+1 (775) 223-7665"},{"id":"22","FirstName":"Heather","LastName":"Leacock","City":"Orlando","Country":"USA","Phone":"+1 (407) 999-7788"},{"id":"23","FirstName":"John","LastName":"Gordon","City":"Boston","Country":"USA","Phone":"+1 (617) 522-1333"},{"id":"24","FirstName":"Frank","LastName":"Ralston","City":"Chicago","Country":"USA","Phone":"+1 (312) 332-3232"},{"id":"25","FirstName":"Victor","LastName":"Stevens","City":"Madison","Country":"USA","Phone":"+1 (608) 257-0597"},{"id":"26","FirstName":"Richard","LastName":"Cunningham","City":"Fort Worth","Country":"USA","Phone":"+1 (817) 924-7272"},{"id":"27","FirstName":"Patrick","LastName":"Gray","City":"Tucson","Country":"USA","Phone":"+1 (520) 622-4200"},{"id":"28","FirstName":"Julia","LastName":"Barnett","City":"Salt Lake City","Country":"USA","Phone":"+1 (801) 531-7272"},{"id":"29","FirstName":"Robert","LastName":"Brown","City":"Toronto","Country":"Canada","Phone":"+1 (416) 363-8888"},{"id":"30","FirstName":"Edward","LastName":"Francis","City":"Ottawa","Country":"Canada","Phone":"+1 (613) 234-3322"}]
diff --git a/examples/dag-with-args-example/ingest-job1/requirements.txt b/examples/dag-with-args-example/ingest-job1/requirements.txt
@@ -0,0 +1 @@
+vdk-trino
diff --git a/examples/dag-with-args-example/ingest-job2/01_drop_table.sql b/examples/dag-with-args-example/ingest-job2/01_drop_table.sql
@@ -0,0 +1 @@
+drop table if exists memory.default.test_dag_two
diff --git a/examples/dag-with-args-example/ingest-job2/10_insert_data.py b/examples/dag-with-args-example/ingest-job2/10_insert_data.py
@@ -0,0 +1,45 @@
+# Copyright 2021-2023 VMware, Inc.
+# SPDX-License-Identifier: Apache-2.0
+import json
+import pathlib
+
+from vdk.api.job_input import IJobInput
+
+
+def run(job_input: IJobInput):
+    data_job_dir = pathlib.Path(job_input.get_job_directory())
+    data_file = data_job_dir / "data.json"
+
+    db_catalog = job_input.get_arguments().get("db_catalog")
+    db_schema = job_input.get_arguments().get("db_schema")
+    db_table = job_input.get_arguments().get("db_table")
+
+    if data_file.exists():
+        with open(data_file) as f:
+            data = json.load(f)
+
+        rows = [tuple(i.values()) for i in data]
+        insert_query = f"""
+        INSERT INTO {db_catalog}.{db_schema}.{db_table} VALUES
+        """ + ", ".join(
+            str(i) for i in rows
+        )
+
+        create_query = f"""
+        CREATE TABLE IF NOT EXISTS {db_catalog}.{db_schema}.{db_table}
+        (
+            id varchar,
+            first_name varchar,
+            last_name varchar,
+            city varchar,
+            country varchar,
+            phone varchar
+        )
+        """
+
+        job_input.execute_query(create_query)
+        job_input.execute_query(insert_query)
+
+        print("Success! The data was send trino.")
+    else:
+        print("No data File Available! Exiting job execution!")
diff --git a/examples/dag-with-args-example/ingest-job2/config.ini b/examples/dag-with-args-example/ingest-job2/config.ini
@@ -0,0 +1,14 @@
+; Supported format: https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
+
+; This is the only file required to deploy a Data Job.
+; Read more to understand what each option means:
+
+; Information about the owner of the Data Job
+[owner]
+
+; Team is a way to group Data Jobs that belonged to the same team.
+team = my-team
+
+; Configuration related to running data jobs
+[job]
+db_default_type = TRINO
diff --git a/examples/dag-with-args-example/ingest-job2/data.json b/examples/dag-with-args-example/ingest-job2/data.json
@@ -0,0 +1 @@
+[{"id": 31, "FirstName": "Martha", "LastName": "Silk", "City": "Halifax", "Country": "Canada", "Phone": "+1 (902) 450-0450"}, {"id": 32, "FirstName": "Aaron", "LastName": "Mitchell", "City": "Winnipeg", "Country": "Canada", "Phone": "+1 (204) 452-6452"}, {"id": 33, "FirstName": "Ellie", "LastName": "Sullivan", "City": "Yellowknife", "Country": "Canada", "Phone": "+1 (867) 920-2233"}, {"id": 34, "FirstName": "Jo\u00e3o", "LastName": "Fernandes", "City": "Lisbon", "Country": "Portugal", "Phone": "+351 (213) 466-111"}, {"id": 35, "FirstName": "Madalena", "LastName": "Sampaio", "City": "Porto", "Country": "Portugal", "Phone": "+351 (225) 022-448"}, {"id": 36, "FirstName": "Hannah", "LastName": "Schneider", "City": "Berlin", "Country": "Germany", "Phone": "+49 030 26550280"}, {"id": 37, "FirstName": "Fynn", "LastName": "Zimmermann", "City": "Frankfurt", "Country": "Germany", "Phone": "+49 069 40598889"}, {"id": 38, "FirstName": "Niklas", "LastName": "Schr\u00f6der", "City": "Berlin", "Country": "Germany", "Phone": "+49 030 2141444"}, {"id": 39, "FirstName": "Camille", "LastName": "Bernard", "City": "Paris", "Country": "France", "Phone": "+33 01 49 70 65 65"}, {"id": 40, "FirstName": "Dominique", "LastName": "Lefebvre", "City": "Paris", "Country": "France", "Phone": "+33 01 47 42 71 71"}, {"id": 41, "FirstName": "Marc", "LastName": "Dubois", "City": "Lyon", "Country": "France", "Phone": "+33 04 78 30 30 30"}, {"id": 42, "FirstName": "Wyatt", "LastName": "Girard", "City": "Bordeaux", "Country": "France", "Phone": "+33 05 56 96 96 96"}, {"id": 43, "FirstName": "Isabelle", "LastName": "Mercier", "City": "Dijon", "Country": "France", "Phone": "+33 03 80 73 66 99"}, {"id": 44, "FirstName": "Terhi", "LastName": "H\u00e4m\u00e4l\u00e4inen", "City": "Helsinki", "Country": "Finland", "Phone": "+358 09 870 2000"}, {"id": 45, "FirstName": "Ladislav", "LastName": "Kov\u00e1cs", "City": "Budapest", "Country": "Hungary", "Phone": "+123 123 456"}, {"id": 46, "FirstName": "Hugh", "LastName": "OReilly", "City": "Dublin", "Country": "Ireland", "Phone": "+353 01 6792424"}]
diff --git a/examples/dag-with-args-example/ingest-job2/requirements.txt b/examples/dag-with-args-example/ingest-job2/requirements.txt
@@ -0,0 +1 @@
+vdk-trino
diff --git a/examples/dag-with-args-example/read-job1/10_read.py b/examples/dag-with-args-example/read-job1/10_read.py
@@ -0,0 +1,18 @@
+# Copyright 2021-2023 VMware, Inc.
+# SPDX-License-Identifier: Apache-2.0
+from vdk.api.job_input import IJobInput
+
+
+def run(job_input: IJobInput):
+    db_catalog = job_input.get_arguments().get("db_catalog")
+    db_schema = job_input.get_arguments().get("db_schema")
+    db_tables = job_input.get_arguments().get("db_tables")
+
+    job1_data = job_input.execute_query(
+        f"SELECT * FROM {db_catalog}.{db_schema}.{db_tables[0]} WHERE Country = 'USA'"
+    )
+    job2_data = job_input.execute_query(
+        f"SELECT * FROM {db_catalog}.{db_schema}.{db_tables[1]} WHERE Country = 'USA'"
+    )
+
+    print(f"Job 1 Data ===> {job1_data} \n\n\n Job 2 Data ===> {job2_data}")
diff --git a/examples/dag-with-args-example/read-job1/config.ini b/examples/dag-with-args-example/read-job1/config.ini
@@ -0,0 +1,14 @@
+; Supported format: https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
+
+; This is the only file required to deploy a Data Job.
+; Read more to understand what each option means:
+
+; Information about the owner of the Data Job
+[owner]
+
+; Team is a way to group Data Jobs that belonged to the same team.
+team = my-team
+
+; Configuration related to running data jobs
+[job]
+db_default_type = TRINO
diff --git a/examples/dag-with-args-example/read-job1/requirements.txt b/examples/dag-with-args-example/read-job1/requirements.txt
@@ -0,0 +1 @@
+vdk-trino
diff --git a/examples/dag-with-args-example/read-job2/10_read.py b/examples/dag-with-args-example/read-job2/10_read.py
@@ -0,0 +1,20 @@
+# Copyright 2021-2023 VMware, Inc.
+# SPDX-License-Identifier: Apache-2.0
+from vdk.api.job_input import IJobInput
+
+
+def run(job_input: IJobInput):
+    db_catalog = job_input.get_arguments().get("db_catalog")
+    db_schema = job_input.get_arguments().get("db_schema")
+    db_tables = job_input.get_arguments().get("db_tables")
+
+    job1_data = job_input.execute_query(
+        f"SELECT * FROM {db_catalog}.{db_schema}.{db_tables[0]} "
+        f"WHERE Country = 'Canada'"
+    )
+    job2_data = job_input.execute_query(
+        f"SELECT * FROM {db_catalog}.{db_schema}.{db_tables[1]} "
+        f"WHERE Country = 'Canada'"
+    )
+
+    print(f"Job 1 Data ===> {job1_data} \n\n\n Job 2 Data ===> {job2_data}")
diff --git a/examples/dag-with-args-example/read-job2/config.ini b/examples/dag-with-args-example/read-job2/config.ini
@@ -0,0 +1,14 @@
+; Supported format: https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
+
+; This is the only file required to deploy a Data Job.
+; Read more to understand what each option means:
+
+; Information about the owner of the Data Job
+[owner]
+
+; Team is a way to group Data Jobs that belonged to the same team.
+team = my-team
+
+; Configuration related to running data jobs
+[job]
+db_default_type = TRINO
diff --git a/examples/dag-with-args-example/read-job2/requirements.txt b/examples/dag-with-args-example/read-job2/requirements.txt
@@ -0,0 +1 @@
+vdk-trino
diff --git a/examples/dag-with-args-example/read-job3/10_read.py b/examples/dag-with-args-example/read-job3/10_read.py
@@ -0,0 +1,18 @@
+# Copyright 2021-2023 VMware, Inc.
+# SPDX-License-Identifier: Apache-2.0
+from vdk.api.job_input import IJobInput
+
+
+def run(job_input: IJobInput):
+    db_catalog = job_input.get_arguments().get("db_catalog")
+    db_schema = job_input.get_arguments().get("db_schema")
+    db_tables = job_input.get_arguments().get("db_tables")
+
+    job1_data = job_input.execute_query(
+        f"SELECT * FROM {db_catalog}.{db_schema}.{db_tables[0]} WHERE Country NOT IN ('USA', 'Canada')"
+    )
+    job2_data = job_input.execute_query(
+        f"SELECT * FROM {db_catalog}.{db_schema}.{db_tables[1]} WHERE Country NOT IN ('USA', 'Canada')"
+    )
+
+    print(f"Job 1 Data ===> {job1_data} \n\n\n Job 2 Data ===> {job2_data}")
diff --git a/examples/dag-with-args-example/read-job3/20_drop_table_one.sql b/examples/dag-with-args-example/read-job3/20_drop_table_one.sql
@@ -0,0 +1 @@
+drop table if exists memory.default.test_dag_one
diff --git a/examples/dag-with-args-example/read-job3/30_drop_table_two.sql b/examples/dag-with-args-example/read-job3/30_drop_table_two.sql
@@ -0,0 +1 @@
+drop table if exists memory.default.test_dag_two
diff --git a/examples/dag-with-args-example/read-job3/config.ini b/examples/dag-with-args-example/read-job3/config.ini
@@ -0,0 +1,14 @@
+; Supported format: https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
+
+; This is the only file required to deploy a Data Job.
+; Read more to understand what each option means:
+
+; Information about the owner of the Data Job
+[owner]
+
+; Team is a way to group Data Jobs that belonged to the same team.
+team = my-team
+
+; Configuration related to running data jobs
+[job]
+db_default_type = TRINO
diff --git a/examples/dag-with-args-example/read-job3/requirements.txt b/examples/dag-with-args-example/read-job3/requirements.txt
@@ -0,0 +1 @@
+vdk-trino