-
Notifications
You must be signed in to change notification settings - Fork 19
/
18-2 Capture production logs with FLUME
1 lines (1 loc) · 2.33 KB
/
18-2 Capture production logs with FLUME
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"18-2 Capture production logs with FLUME","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyP+v45jX0CA+wtCozev6TP2"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"kro8EAknN7co"},"source":["# Capture production logs with FLUME\n","# author: Gressling, T\n","# license: MIT License # code: github.com/gressling/examples\n","# activity: single example # index: 18-2 "],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"luQGLFMAOBnL"},"source":["# stream.py\n","from pyspark import SparkContext\n","from pyspark.streaming import StreamingContext\n","from pyspark.streaming.flume import FlumeUtils"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"kq5yUN7vOGBq"},"source":["if __name__ == \"__main__\":\n"," sc = SparkContext(appName=\"data_production_lot1\");\n"," ssc = StreamingContext(sc, 30)\n"," stream = FlumeUtils.createStream(ssc, \"127.0.0.1\", 55555)\n"," stream.pp(rint()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"zt7GnKz_OIa2"},"source":["# capture.py\n","import requests\n","import random"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Tsz_NXDWOLHe"},"source":["class log_capture():\n"," def __init__(self):\n"," self.url = \"https://<<YOUR SERVER>>/api/production/\"\n"," self.rand = str(random.randint(0, 99)) # random data\n"," self.r = requests.get(self.url + self.rand)\n"," \n"," def get_r(self):\n"," return(self.r.text)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Tl_hhWOiOPmg"},"source":["if __name__ == \"__main__\":\n"," import os\n"," with open(\"log_capture.txt\", \"w\") as file_in:\n"," file_in.write(log_capture().get_r())\n"," os.system(\"cat log_capture.txt\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"C5t4TGBhOXva"},"source":["# flume.conf\n","agent.sources = tail-file\n","agent.channels = c1\n","agent.sinks=avro-sink\n","# define source and sink\n","agent.sources.tail-file.type = exec # important!\n","agent.sources.tail-file.command = python /home/book/capture.py\n","agent.sinks.avro-sink.type = avro\n","agent.sinks.avro-sink.hostname = 127.0.0.1\n","agent.sinks.avro-sink.port = 55555"],"execution_count":null,"outputs":[]}]}