-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
140 lines (107 loc) · 3.9 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# setting up the path to spark
import findspark
findspark.init('c:/spark')
# __pycache__ may cause reloading to the flask server so disabling the cache file
import sys
sys.dont_write_bytecode = True
# get the pyspark libraries
from pyspark.ml.recommendation import ALSModel
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, LongType
# other required library
import codecs
import os
# importing flask
from flask import Flask, render_template, request
# function to load the movie name
def loadMovieNames():
movieNames = {}
# CHANGE THIS TO THE PATH TO YOUR u.ITEM FILE:
with codecs.open("data/ml-100k/u.ITEM", "r", encoding='ISO-8859-1', errors='ignore') as f:
for line in f:
fields = line.split('|')
movieNames[int(fields[0])] = fields[1]
return movieNames
# function to reverse the name dictonary containg movieID as key and movie title as value
# to movie title as key to movie ID as value.
# This is done to find the movie ID from the given input
def rev(d1):
old_dict = d1
new_dict = dict([(value, key) for key, value in old_dict.items()])
return new_dict
# get the movie id from the given title
def getMovieID(name,movieNames):
# reverse the dictonary of movie names
movies=rev(movieNames)
# loop through each key(title)
for i in movies:
# convert to lower case and compare with the entered title
if i.lower()[:len(i)-7]==name.lower():
# retutn if found
return movies[i]
else:
pass
# if not found in the entire dataset then return 0
else:
return 0
# start sparks session
spark = SparkSession.builder.appName("ALSReccom").getOrCreate()
# initiate the flask app
app = Flask(__name__)
# load the model
model_path="M:/spark project/movie recommendation/als_model"
print(model_path)
model = ALSModel.load(model_path)
# defining the schema
moviesSchema = StructType([ \
StructField("userID", IntegerType(), True), \
StructField("movieID", IntegerType(), True), \
StructField("rating", IntegerType(), True), \
StructField("timestamp", LongType(), True)])
# getting the movie names
names = loadMovieNames()
# getting the ratings
ratings = spark.read.option("sep", "\t").schema(moviesSchema) \
.csv("data/ml-100k/u.data")
# load the index page
@app.route("/")
def home():
return render_template("index.html")
# communication with the frontend chatbot
@app.route("/get")
def get_bot_response():
# get the entry from front end
entry = request.args.get('msg')
if entry=='quit':
spark.stop()
# get the movie ID of the entered movie
userID=getMovieID(entry,names)
# if returned 0 then movie not present
if userID == 0:
return "Sorry but I have no data about the movie please try another one :)"
else:
# Manually create adataframe to get recommendations
# get schema
userSchema = StructType([StructField("userID", IntegerType(), True)])
# create the dataframe
users = spark.createDataFrame([[userID,]], userSchema)
# use the loaded model to get predictions
recommendations = model.recommendForUserSubset(users, 10).collect()
# initialize parameters to save the results
l=[]
s=" "
for userRecs in recommendations:
# userRecs is (userID, [Row(movieId, rating), Row(movieID, rating)...])
myRecs = userRecs[1]
# my Recs is just the column of recs for the user
for rec in myRecs:
# For each rec in the list, extract the movie ID and rating
movie = rec[0]
movieName = names[movie]
# save the predicted movie in a list
l.append(movieName)
# return the string containing all the movies
return s.join(l)
# flask main
if __name__ == "__main__":
app.run(debug=False)