-
Notifications
You must be signed in to change notification settings - Fork 0
/
mp3downloader.py
executable file
·204 lines (165 loc) · 6.86 KB
/
mp3downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#! /usr/bin/env python3.7
###
# File: mp3downloader.py
#
# Author: Francesco Tosello
###
##
# TODO
#
# - ...
##
from re import compile as compile_regex, match, escape, IGNORECASE
from os import remove, makedirs, listdir
from os.path import isdir, join as pjoin, isfile, dirname
from shutil import rmtree
from urllib.request import Request, urlopen
from urllib.parse import quote
from urllib.error import URLError
from pytube import YouTube
from ffmpy import FFmpeg as Converter
from mutagen.easyid3 import EasyID3
from utils import MUSIC_DOWNLOAD_DIRECTORY as SAVE_DIR, DOWNLOADED_VIDEOS_HASHES_FILEPATH as HASHES_FILEPATH, IGNORE_VIDEOS_HASHES_FILEPATH as IGNORE_HASHES, VIDEO_URL
from utils import Song
import utils
SONGS_REGEX = r"(?P<title>[\w\ \']+)\ \-\ (?P<artist>[\w\ ]+)" # this is the input format for the songs
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11" # do not change this user-agent
YOUTUBE_SEARCH_QUERY = "https://www.youtube.com/results?search_query={}" # search videos using this query
VIDEO_ID_REGEX = r"data\-context\-item\-id\=\"(?P<id>\w+)\"" # search for videos ids
TEMP_DIR = ".tmp" # temporary working directory
def load_from_file(filename):
sf = open(filename, 'r')
lines = sf.readlines()
sf.close()
sgs = list()
for s in lines:
if not s.strip(): continue # skip empty lines
matched_desc = compile_regex(SONGS_REGEX).match(s)
if matched_desc:
song = Song( *matched_desc.groups() )
sgs.append(song)
else:
print( f"This line is not well formatted: {s}" )
return sgs
def load_from_stdin():
songs = list()
line = 1
while line:
line = input("Provide a song in this format: title - artist. Or leave blank to continue. ").strip()
matched_desc = compile_regex(SONGS_REGEX).match(line)
if matched_desc:
songs.append( Song(*matched_desc.groups()) )
elif line:
print( f"The input is not well formatted." )
return songs
def search_song_url(song, hashes_file = HASHES_FILEPATH, download_another = False):
# Make the query using the provided information
if song.artist:
query = f"{song.artist} - {song.title}"
elif song.album:
query = f"{song.title} - {song.album}"
else:
query = f"{song.title}"
print( f"Searching for: {query}" )
req = Request(YOUTUBE_SEARCH_QUERY.format( quote(query) ), None, {'User-Agent': USER_AGENT})
try:
content = str( urlopen(req).read() )
except URLError:
print("Connection error, quitting.")
exit(-3)
for video_match in compile_regex(VIDEO_ID_REGEX).finditer(content): # find video links
if video_match:
video_id = video_match.group('id')
if utils.match_hash(video_id, IGNORE_HASHES):
print( f"Video with id {video_id} is in the ignore list, searching another link." )
continue
elif hashes_file and utils.match_hash(video_id, hashes_file):
print( f"Video with id {video_id} already downloaded." )
if download_another:
print( "Searching for another link." )
continue
else: return None
link = VIDEO_URL + video_id
print( f"Found video match at {link}" )
return link
print( "No video ids found, skipping.")
return None
def download_song(url, song = None, auto = False, fold = True, overwrite = False, hashes_file = HASHES_FILEPATH):
if not url: return False, None
yt = YouTube(url)
if song and not compile_regex( escape(song.title), IGNORECASE ).search( yt.title ) and not auto:
if input( f"Are you sure you want to download this song: {yt.title} ({url})? " ).lower() not in ['s', 'si', 'y', 'yes']:
utils.add_hash(url, IGNORE_HASHES)
print("Download canceled, video added to ignore list.")
return None, None
if not song: song = Song(yt.title)
if not isdir(TEMP_DIR): makedirs(TEMP_DIR) # create a temporary directory
streams = yt.streams.filter( only_audio = True ).all() + yt.streams.filter( progressive = True ).all() # select both only audio and mixed streams
if not len(streams):
print( "No stream found for song: {song}" )
return None, None
else:
print( "Found {} streams associated with this url: {}".format(len(streams), url) )
streams[0].download( TEMP_DIR ) # download the first
if not isdir(SAVE_DIR): makedirs(SAVE_DIR)
filepath = pjoin(SAVE_DIR, "{}".format( song._filepath(fold) ))
if isfile(filepath):
if overwrite:
print( f"Overwriting {filepath}." )
remove(filepath)
else:
print( f"{filepath} already existing, skipping." )
return None, None
if not isdir( dirname(filepath) ): makedirs( dirname(filepath) )
Converter(
global_options = '-loglevel quiet', # suppress output
inputs = {pjoin(TEMP_DIR, f"{streams[0].default_filename}") : None},
outputs = {filepath : None}
).run()
if hashes_file: utils.add_hash(url, hashes_file)
return filepath, song.title
def edit_tags(filepath, song):
audio_file = EasyID3(filepath)
if not song.title:
raise RuntimeError("Please, provide at least the song title")
audio_file["title"] = song.title
if song.artist: audio_file["artist"] = song.artist
if song.album: audio_file["album"] = song.album
audio_file.save()
if __name__ == '__main__':
from argparse import ArgumentParser
# Argument parsing
parser = ArgumentParser(description = "Download songs from YouTube")
parser.add_argument('-f', '--from-file', metavar = 'FILENAME', dest = 'file', help = "Load a list of songs from a file")
parser.add_argument('--hashes-file', default = HASHES_FILEPATH, help = "Custom path for the file that contains the video hashes")
parser.add_argument('-da', '--download-another', action = 'store_true', help = "Download another version if a video has already been downloaded")
parser.add_argument('-oe', '--overwrite', '--overwrite-existing', action = 'store_true', help = "Overwrite existing files")
parser.add_argument('-nf', '--do-not-fold', dest = 'fold', action = 'store_false', help = "Don't create artist and album directories")
parser.add_argument('--auto', action = 'store_true', help = "Do not prompt anything, go straight forward.")
args = parser.parse_args()
if args.download_another: args.overwrite = True
# Program start
print("Script loaded correctly, started.")
try:
songs = list()
if args.file: songs = load_from_file(args.file)
if not songs and not args.auto: songs = load_from_stdin()
num_songs = len(songs)
if num_songs:
print( "{} songs loaded successfully.".format(num_songs) )
else:
print( "No songs provided, quitting." )
exit(-2)
for index,s in enumerate(songs):
print( f"Processing song {index+1} of {num_songs}: {s}" )
outputfile, title = download_song( search_song_url(s, args.hashes_file, args.download_another), s, args.auto, args.fold, args.overwrite, args.hashes_file )
if outputfile:
print( f"Downloaded: {s} to {outputfile}" )
edit_tags(outputfile, s)
print( f"Done. Downloaded {num_songs}" )
except KeyboardInterrupt:
print("\nInterrupted, exiting.")
finally:
if isdir(TEMP_DIR): rmtree(TEMP_DIR)
if not listdir(SAVE_DIR): remove(SAVE_DIR)
print("All cleaned.")