From bfeb8a06442d451b146d27d5ee1e45b7f87d349a Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Tue, 11 Jun 2024 11:12:58 +0800 Subject: [PATCH] adapt view behavior change, fix KeyError. (#3794) * adapt view behavior change, fix KeyError. * fix readme demo run error. * fixed opencc version --- paddlespeech/cli/asr/infer.py | 2 +- paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index 4001f957fa..231a00f4d8 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -274,7 +274,7 @@ def preprocess(self, model_type: str, input: Union[str, os.PathLike]): # fbank audio = preprocessing(audio, **preprocess_args) - audio_len = paddle.to_tensor([audio.shape[0]]).unsqueeze(axis=0) + audio_len = paddle.to_tensor(audio.shape[0]).unsqueeze(axis=0) audio = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0) self._inputs["audio"] = audio diff --git a/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py b/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py index a3744d3403..64195defc6 100755 --- a/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py +++ b/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py @@ -188,7 +188,7 @@ def ctc_greedy_search(self, wav) -> List[List[int]]: x_lens = x.shape[1] ctc_probs = self.ctc.log_softmax(x) # (B, maxlen, vocab_size) topk_prob, topk_index = ctc_probs.topk(1, axis=2) # (B, maxlen, 1) - topk_index = topk_index.view([batch_size, x_lens]) # (B, maxlen) + topk_index = topk_index.reshape([batch_size, x_lens]) # (B, maxlen) hyps = [hyp.tolist() for hyp in topk_index] hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps] diff --git a/setup.py b/setup.py index 95069f1a97..7d3af40d78 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ "matplotlib", "nara_wpe", "onnxruntime>=1.11.0", - "opencc", + "opencc==1.1.6", "opencc-python-reimplemented", "pandas", "paddleaudio>=1.1.0",