-
Notifications
You must be signed in to change notification settings - Fork 19
/
19-1 PySpark Example
1 lines (1 loc) · 1 KB
/
19-1 PySpark Example
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"19-1 PySpark Example ","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyOEcyjioY+QXx+7KH38CKyI"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"qIUkzztZOse-"},"source":["# PySpark Example\n","# author: Gressling, T\n","# license: MIT License # code: github.com/gressling/examples\n","# activity: single example # index: 19-1 "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"zcs_rwFrOy-X"},"source":["import pyspark\n","sc = pyspark.SparkContext('local[*]')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"b82yxuviO0Zk"},"source":["txt = sc.textFile('file:////tmp/SMILES.txt')\n","print(txt.count())"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"qIwhA-9jO1am"},"source":["python_lines = txt.filter(lambda line: 'CO' in line.lower())\n","print(CO_lines.count())"],"execution_count":null,"outputs":[]}]}