-
Notifications
You must be signed in to change notification settings - Fork 1
/
transcribe.py
157 lines (122 loc) · 4.84 KB
/
transcribe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# Copyright 2024 roperi. All Rights Reserved.
import httpx
import argparse
import os
import json
from dotenv import load_dotenv
from deepgram import (
DeepgramClient,
PrerecordedOptions,
)
from config import config, TIMEOUT_SECONDS
# Load credentials
load_dotenv()
DG_API_KEY = os.getenv("DG_API_KEY")
TAG = 'SPEAKER '
# Function
def main(input_path, is_url, project_name):
"""
Process audio files or URLs.
Parameters:
input_path (str): Path to the input audio file or URL.
is_url (bool): Flag to specify whether the input is a URL.
project_name (str): Name of the project.
"""
try:
# Create the 'output' folder if it doesn't exist
if not os.path.exists('output'):
os.makedirs('output')
print('Initialising...')
deepgram = DeepgramClient(DG_API_KEY, config)
# Configure Deepgram options for audio analysis
options = PrerecordedOptions(
model="nova-2",
smart_format=True,
diarize=True,
summarize="v2",
detect_topics=True,
)
timeout = httpx.Timeout(TIMEOUT_SECONDS, connect=10.0)
print(f'Transcribing {input_path}')
if is_url:
AUDIO_URL = {
'url': input_path
}
# Input is a URL
response = deepgram.listen.prerecorded.v("1").transcribe_url(AUDIO_URL, options, timeout=timeout)
else:
# Input is a local file
with open(input_path, "rb") as file:
buffer_data = file.read()
payload = {
"buffer": buffer_data,
}
response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options, timeout=timeout)
response_dict = response.to_dict()
print('Saving transcription...')
if is_url:
output_filename = f"{project_name}__transcription.json"
else:
output_filename = f"{project_name}__transcription.json"
output_json = os.path.join('output', output_filename)
with open(output_json, "w") as json_file:
json.dump(response_dict, json_file, indent=4)
# Process transcript text file
print('Getting diarize and summary from transcript...')
process_transcript(output_json, project_name)
except Exception as e:
print(f"Exception: {e}")
write_to_errored_file(project_name)
def write_to_errored_file(project_name):
"""
Write the project name to the errored.txt file.
"""
errored_file_path = "output/errored.txt"
with open(errored_file_path, "a") as errored_file:
errored_file.write(project_name + "\n")
def process_transcript(output_json, project_name):
"""
Process a transcript from the JSON output of Deepgram's transcription.
Parameters:
output_json (str): Path to the JSON file containing the transcription output.
project_name (str): Name of the project.
"""
summary_lines = []
with open(output_json, "r") as file:
data = json.load(file)
# Extract summary from JSON
summary = data.get("results", {}).get("summary", {}).get("short", "")
summary_lines.append(summary)
# Write summary to file
with open(os.path.join('output', f'{project_name}__summary.txt'), 'w') as f_summary:
for line in summary_lines:
f_summary.write(line.strip() + '\n')
# Extract paragraphs from JSON
paragraphs_transcript = data.get('results', {}).get('channels', [{}])[0].get('alternatives', [{}])[0].get(
'paragraphs', {}).get('transcript', "")
# Write paragraphs to file
with open(os.path.join('output', f'{project_name}__paragraphs.txt'), 'w') as f_paragraphs:
f_paragraphs.write(paragraphs_transcript)
# Extract topics from JSON
topics_segments = data.get("results", {}).get("topics", {}).get("segments", [])
topic_values = []
for segment in topics_segments:
topics = segment.get("topics", [])
for topic_info in topics:
topic_value = topic_info.get("topic", "")
topic_values.append(topic_value)
# Write topics to file
with open(os.path.join('output', f'{project_name}__topics.txt'), 'w') as f_topics:
for topic in topic_values:
f_topics.write(topic.strip() + '\n')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process transcripts from audio files or URLs.')
parser.add_argument('--name', '-n', required=True, help='Name of the transcription project.')
parser.add_argument('--input', '-i', nargs=1, help='Path to audio file or URL.')
args = parser.parse_args()
input_path = args.input[0]
is_url = False
# Detect if input path is a URL
if input_path.startswith('http://') or input_path.startswith('https://'):
is_url = True
main(input_path, is_url, args.name)