-
Notifications
You must be signed in to change notification settings - Fork 0
/
Heatmap_Live.py
153 lines (128 loc) · 6.74 KB
/
Heatmap_Live.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# libraries for webscraping and parsing
from urllib.request import urlopen
from urllib.request import Request
from bs4 import BeautifulSoup
import yfinance as yf
import time
#for Date Time
from datetime import datetime
# for plotting and data manipulation
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
# NLTK VADER for sentiment analysis
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
#For importing tickers from Dow Jones Index
df_dow = pd.read_html("https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average")[1]
tickers = df_dow['Symbol'].tolist()
#For scraping the date time and Headlines
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}
for ticker in tickers:
url = finwiz_url + ticker
req = Request(url = url, headers = {"user-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15" })
try:
response = urlopen(req)
except:
time.sleep(10) #repeat after a cooldown of 10s in case request is blocked or someother error
response = urlopen(req)
#Read the content into html
html = BeautifulSoup(response, "html")
# Find 'news-table' in the Soup and load it into 'news_table'
news_table = html.find(id='news-table')
# Add the table to our dictionary
news_tables[ticker] = news_table
# Parse the Date, Time and News Headlines into a Python List
parsed_news = []
# Iterate through the news
for file_name, news_table in news_tables.items():
# Iterate through all tr tags in 'news_table'
for x in news_table.findAll('tr'):
# occasionally x (below) may be None when the html table is poorly formatted, skip it in try except instead of throwing an error and exiting
# may also use an if loop here to check if x is None first
try:
# read the text from each tr tag into text
# get text from a only
text = x.a.get_text()
# splite text in the td tag into a list
date_scrape = x.td.text.split()
# if the length of 'date_scrape' is 1, load 'time' as the only element
if len(date_scrape) == 1:
time = date_scrape[0]
# else load 'date' as the 1st element and 'time' as the second
else:
date = date_scrape[0]
time = date_scrape[1]
# Extract the ticker from the file name, get the string up to the 1st '_'
ticker = file_name.split('_')[0]
print(ticker)
# Append ticker, date, time and headline as a list to the 'parsed_news' list
parsed_news.append([ticker, date, time, text])
except Exception as e:
print(e)
# Perform Sentiment Analysis with Vader
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()
# Set column names
columns = ['ticker', 'date', 'time', 'headline']
# Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
parsed_and_scored_news = pd.DataFrame(parsed_news, columns=columns)
# Iterate through the headlines and get the polarity scores using vader
scores = parsed_and_scored_news['headline'].apply(vader.polarity_scores).tolist()
# Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)
# Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_and_scored_news.join(scores_df, rsuffix='_right')
# Convert the date column from string to datetime
parsed_and_scored_news['date'] = pd.to_datetime(parsed_and_scored_news.date).dt.date
# Group by each ticker and get the mean of all sentiment scores
mean_scores = parsed_and_scored_news.groupby(['ticker']).mean()
# Get Market Cap, Sector and Industry of each Ticker
sectors = []
industries = []
marketcap = []
for ticker in tickers:
print(ticker)
tickerdata = yf.Ticker(ticker)
marketcap.append(tickerdata.fast_info['market_cap'])
sectors.append(tickerdata.info['sector'])
industries.append(tickerdata.info['industry'])
# Combine the Information Above and the Corresponding Tickers into a DataFrame
d = {'Symbol': tickers, 'Sector': sectors, 'Industry': industries, 'Market Cap': marketcap}
# create dataframe from
df_info = pd.DataFrame(data=d)
# Get Names of Companies from the Dow Jones DataFrame obtained Earlier
df_info_name = df_info.merge(df_dow[['Company', 'Symbol']], on = 'Symbol')
# Join Stock Information and Sentiment Information
df = mean_scores.merge(df_info_name, left_on = 'ticker', right_on = 'Symbol')
df = df.rename(columns={"compound": "Sentiment Score", "neg": "Negative", "neu": "Neutral", "pos": "Positive"})
# Generate the Treemap Plot
# group data into sectors at the highest level, breaks it down into industry, and then ticker, specified in the 'path' parameter
# the 'values' parameter uses the value of the column to determine the relative size of each box in the chart
# the color of the chart follows the sentiment score
# when the mouse is hovered over each box in the chart, the negative, neutral, positive and overall sentiment scores will all be shown
# the color is red (#ff0000) for negative sentiment scores, black (#000000) for 0 sentiment score and green (#00FF00) for positive sentiment scores
fig = px.treemap(df, path=[px.Constant("Dow Jones"), 'Sector', 'Industry', 'Symbol'], values='Market Cap',
color='Sentiment Score', hover_data=['Company', 'Negative', 'Neutral', 'Positive', 'Sentiment Score'],
color_continuous_scale=['#FF0000', "#000000", '#00FF00'],
color_continuous_midpoint=0)
fig.data[0].customdata = df[['Company', 'Negative', 'Neutral', 'Positive', 'Sentiment Score']].round(3) # round to 3 decimal places
fig.data[0].texttemplate = "%{label}<br>%{customdata[4]}"
fig.update_traces(textposition="middle center")
fig.update_layout(margin = dict(t=30, l=10, r=10, b=10), font_size=20)
#to get current date, time and timezone to print to the html page
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
timezone_string = datetime.now().astimezone().tzname()
# Generate HTML File with Updated Time and Treemap
with open('dow_jones_live_sentiment.html', 'a') as f:
f.truncate(0) # clear file if something is already written on it
title = "<h1><centre>Dow Jones Stock Sentiment Dashboard</centre></h1>"
updated = "<h3>Last updated: " + dt_string + " (Timezone: " + timezone_string + ")</h3>"
description = "<h6>This dashboard is updated every half an hour with sentiment analysis performed on latest scraped news headlines from the FinViz website.</h6><br>"
author = """<p> | Made with <3 by Rajarshi. <a href="https://github.com/rajarshi1902/Live-Stock-Sentiment_Treemap">Github Repo</a> </p> <p> Inspired by Damian Boh </p>"""
f.write(title + updated + description + author)
f.write(fig.to_html(full_html=False, include_plotlyjs='cdn')) # write the figx created above into the html file