-
Notifications
You must be signed in to change notification settings - Fork 0
/
voice.py
43 lines (38 loc) · 1.13 KB
/
voice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import torch
import torchaudio
import torch.nn.functional as F
import encodeAndDecode
if torch.cuda.is_available():
device = 'cuda'
else:
device = 'cpu'
# 测试单个音频用
def test():
model_path = "../param/voice_nnf_40_new.pth"
filename = "F:\wyyadd.wav"
# model
myModel = torch.load(model_path)
waveform, sample_rate = torchaudio.load(filename)
waveform = torch.flatten(waveform)
mfcc_transform = torchaudio.transforms.MFCC(
sample_rate=sample_rate,
n_mfcc=40,
melkwargs={
'n_fft': 2048,
'n_mels': 128,
'hop_length': 512,
'mel_scale': 'htk'
}
)
# batch=1, channel=1, feature, time
voice = mfcc_transform(waveform).unsqueeze(0).unsqueeze(0)
voice = voice.to(device)
# batch, time, n_class
pred = myModel(voice)
pred = F.log_softmax(pred, dim=2)
decoded_preds, _ = encodeAndDecode.decode.greed_decode(pred)
chinese, chinese_pinyin = encodeAndDecode.decode.pinyin2chinese(decoded_preds[0])
return chinese, chinese_pinyin
chinese, chinese_pinyin = test()
print(chinese)
print(chinese_pinyin)