diff --git a/fixed preprocess data for variance in extraction cell b/fixed preprocess data for variance in extraction cell deleted file mode 100644 index e04f440..0000000 --- a/fixed preprocess data for variance in extraction cell +++ /dev/null @@ -1,797 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4", - "collapsed_sections": [ - "MP5rRkbTpnG8", - "Wv0gfI5feBSc", - "eexZl_OCDmQ3", - "0J3b18EKdzMC", - "FY40fGHEg9_i", - "4sbU1aH5kGFE" - ], - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MP5rRkbTpnG8" - }, - "source": [ - "# _**[DiffSinger](https://github.com/openvpi/DiffSinger)**_\n", - "_Singing Voice Synthesis via Shallow Diffusion Mechanism (SVS & TTS)_\n", - "\n", - "____\n", - "\n", - "Note:\n", - "- This notebook will get update semi-frequently based from the feedback or response from users\n", - "\n", - "\\\n", - "____\n", - "\\\n", - "#### **This notebook is an edited copy of Kei's Diffsinger [colab notebook](https://colab.research.google.com/drive/1kUg9dz8PPH92NfnLZwgq0_9B9an39t1J?usp=sharing)**\n", - "####**This notebook is maintained by MLo7**\n", - "\\\n", - "___\n", - "\n", - "```Expand this cell for more details```" - ] - }, - { - "cell_type": "markdown", - "source": [ - "This notebook converts your data (.lab & .wav) to compatible format via [nnsvs-db-converter](https://github.com/UtaUtaUtau/nnsvs-db-converter)\n", - "\n", - "~~Extracted data will also make .ds files when estimate_midi is enabled, which might not be accurate by default. So you might want to edit it using [SlurCutter](https://github.com/openvpi/MakeDiffSinger/releases) for a refined data for your pitch model~~ (This notebook currently does not support pre-made files)\n", - "\n", - "Preferred zip file format (to avoid difficulties):\n", - "\n", - "
\n",
-        "your_zip.zip:\n",
-        "    |\n",
-        "    |\n",
-        "    data1.wav\n",
-        "    data1.lab\n",
-        "    |\n",
-        "    |\n",
-        "    data2.wav\n",
-        "    data3.lab\n",
-        "    |\n",
-        "    |\n",
-        "    data3.wav\n",
-        "    data3.lab\n",
-        "    |\n",
-        "    ...\n",
-        "
\n", - "This will be updated once multisinger training is added" - ], - "metadata": { - "id": "ZxsTaNBJLd7Y" - } - }, - { - "cell_type": "markdown", - "source": [ - "_**Credits:** _\n", - "\n", - " - [openvpi](https://openvpi.github.io/) for DiffSinger fork and more\n", - "\n", - " - [UtaUtaUtau](https://utautautau.neocities.org/) for nnsvs-db-converter\n", - "\n", - " - [Kei](https://pronouns.page/@kei.wendt06) for the original notebook\n", - "\n", - " - [MLo7](https://github.com/MLo7Ghinsan) for the notebook edit" - ], - "metadata": { - "id": "R8o4pZptA4yc" - } - }, - { - "cell_type": "markdown", - "source": [ - "# **Setup**" - ], - "metadata": { - "id": "Wv0gfI5feBSc" - } - }, - { - "cell_type": "code", - "source": [ - "#@markdown Select this if you don't like seeing warnings throughout your training since most of the time the warnings are nothing to worry about\n", - "\n", - "#@markdown ****WARNING**** this will also hides the error message\n", - "no_warn = False # @param {type:\"boolean\"}\n", - "\n", - "#@markdown you can always come back and enable or disable this cell without re-running the installation" - ], - "metadata": { - "cellView": "form", - "id": "9nZnrUAVHPZQ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "pK8aicf8A2sj" - }, - "outputs": [], - "source": [ - "#@title # Mount Google Drive and Setup\n", - "\n", - "from IPython.display import clear_output\n", - "from IPython.display import Audio, display, HTML\n", - "import os\n", - "from google.colab import drive\n", - "drive.mount(\"/content/drive\")\n", - "\n", - "if not os.path.exists(\"/content/play_sound\"):\n", - " os.makedirs(\"/content/play_sound\")\n", - "%cd /content/play_sound\n", - "!wget -O setup_complete.wav https://github.com/MLo7Ghinsan/MLo7_Diff-SVC_models/releases/download/audio/setup_complete.wav\n", - "%cd /content\n", - "!rm -rf /content/sample_data\n", - "!apt-get install aria2\n", - "clear_output()\n", - "\n", - "!git clone https://github.com/UtaUtaUtau/nnsvs-db-converter\n", - "!git clone https://github.com/openvpi/MakeDiffSinger\n", - "!git clone https://github.com/openvpi/DiffSinger.git\n", - "clear_output()\n", - "!pip install torch==1.13.0 torchvision==0.14.0 torchaudio==0.13.0\n", - "clear_output()\n", - "!pip install -r /content/DiffSinger/requirements.txt\n", - "clear_output()\n", - "!pip install onnx onnxsim #onnx==1.12.0 onnxsim==0.4.10\n", - "clear_output()\n", - "!aria2c https://github.com/openvpi/vocoders/releases/download/nsf-hifigan-v1/nsf_hifigan_20221211.zip\n", - "!aria2c https://github.com/openvpi/DiffSinger/releases/download/v2.1.0/rmvpe.zip\n", - "!unzip -q /content/nsf_hifigan_20221211.zip -d /content/DiffSinger/checkpoints\n", - "!unzip -q /content/rmvpe.zip -d /content/DiffSinger/checkpoints\n", - "!rm /content/nsf_hifigan_20221211.zip\n", - "!rm /content/rmvpe.zip\n", - "clear_output()\n", - "!pip install --upgrade tensorboard\n", - "clear_output()\n", - "!pip install protobuf #protobuf==3.20\n", - "clear_output()\n", - "!pip install onnxruntime\n", - "clear_output()\n", - "#shit tons of clear output cus i dont wanna see anything <3\n", - "\n", - "print(\"setup complete!\")\n", - "print(\"|\")\n", - "print(\"|\")\n", - "print(\"|\")\n", - "\n", - "chika_dance = ''\n", - "display(HTML(chika_dance))\n", - "\n", - "with open(\"/content/play_sound/setup_complete.wav\", \"rb\") as f:\n", - " setup_complete_sound = f.read()\n", - "Audio(data=setup_complete_sound, autoplay=True)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# **Preprocess data for training**" - ], - "metadata": { - "id": "eexZl_OCDmQ3" - } - }, - { - "cell_type": "code", - "source": [ - "#@title #Extract Data\n", - "%cd /content\n", - "#@markdown this cell will create a folder name [raw_data] in the root folder and extract your data into it\n", - "\n", - "#@markdown you can go and check to make sure if it actually unzips your data or not\n", - "\n", - "#@markdown The path to your zip file containing .lab and .wav files\n", - "\n", - "data_zip_path = \"\" #@param {type:\"string\"}\n", - "\n", - "#@markdown Use this if your data is not in diffsinger's preferred format (data are under 30 seconds | have \"AP\" label in your lab)\n", - "\n", - "default_converter_setting = True # @param {type:\"boolean\"}\n", - "\n", - "#@markdown ___\n", - "\n", - "#@markdown this lower section is for variance training\n", - "\n", - "#@markdown Use this if you don't have .cvs that is for variance dataset (skippable if you are doing acoustic)\n", - "\n", - "estimate_midi = False # @param {type:\"boolean\"}\n", - "\n", - "#@markdown Pitch extractor algorithm - for converting your data to DB-ready format (rmvpe is more accurate but has range limit)\n", - "\n", - "f0_ext = \"parselmouth\" # @param [\"parselmouth\", \"rmvpe\"]\n", - "\n", - "all_shits = \"/content/raw_data\"\n", - "\n", - "import os\n", - "import zipfile\n", - "if not os.path.exists(all_shits):\n", - " os.makedirs(all_shits)\n", - "\n", - "#changed from !unzip cus it kinda stopped working for some reason\n", - "with zipfile.ZipFile(data_zip_path, \"r\") as zip_ref:\n", - " file_names = zip_ref.namelist()\n", - "wav_files = [file_name for file_name in file_names if file_name.endswith('.wav')]\n", - "with zipfile.ZipFile(data_zip_path, \"r\") as zip_ref:\n", - " zip_ref.extractall(path=all_shits, members=wav_files)\n", - "lab_files = [file_name for file_name in file_names if file_name.endswith('.lab')]\n", - "with zipfile.ZipFile(data_zip_path, \"r\") as zip_ref:\n", - " zip_ref.extractall(path=all_shits, members=lab_files)\n", - "\n", - "if default_converter_setting:\n", - " !python /content/nnsvs-db-converter/db_converter.py -s 2 {all_shits} 2> /dev/null # -s 2 cus pix tutorial said so <3\n", - " clear_output()\n", - "else:\n", - " !python /content/nnsvs-db-converter/db_converter.py -s 50 -S 20 -l 35 {all_shits} 2> /dev/null # old param set by me lmao uwu\n", - " clear_output()\n", - "\n", - "#funny auto dict generator lmao\n", - "lab_folder_path = \"/content/raw_data\"\n", - "out = \"/content/DiffSinger/dictionaries/custom_dict.txt\"\n", - "phonemes = set()\n", - "for root, dirs, files in os.walk(lab_folder_path):\n", - " for file in files:\n", - " if file.endswith(\".lab\"):\n", - " fpath = os.path.join(root, file)\n", - " with open(fpath, \"r\") as lab_file:\n", - " for line in lab_file:\n", - " line = line.strip()\n", - " if line:\n", - " phoneme = line.split()[2]\n", - " if phoneme != \"pau\" and phoneme != \"AP\" and phoneme != \"SP\":\n", - " phonemes.add(phoneme + \"\t\" + phoneme)\n", - "phonemes_mess = phonemes\n", - "with open(out, \"w\") as f:\n", - " for phoneme in sorted(phonemes_mess):\n", - " f.write(phoneme + \"\\n\")\n", - "\n", - "if estimate_midi:\n", - " #prepare stuff for uhhhh variance even though it wouldnt be that good out of the box for pitch training\n", - "\n", - " dict_path = out\n", - "\n", - " vowel_types = {\"a\", \"i\", \"u\", \"e\", \"o\", \"N\", \"M\", \"NG\"}\n", - " vowel_data = []\n", - " consonant_data = []\n", - "\n", - " with open(dict_path, \"r\") as f:\n", - " for line in f:\n", - " phoneme, _ = line.strip().split(\"\\t\")\n", - " if phoneme[0] in vowel_types:\n", - " vowel_data.append(phoneme)\n", - " else:\n", - " consonant_data.append(phoneme)\n", - "\n", - " vowel_data.sort()\n", - " consonant_data.sort()\n", - " directory = os.path.dirname(dict_path)\n", - " #vowels.txt\n", - " vowel_txt_path = os.path.join(directory, \"vowels.txt\")\n", - " with open(vowel_txt_path, \"w\") as f:\n", - " f.write(\" \".join(vowel_data))\n", - " #consonants.txt\n", - " consonant_txt_path = os.path.join(directory, \"consonants.txt\")\n", - " with open(consonant_txt_path, \"w\") as f:\n", - " f.write(\" \".join(consonant_data))\n", - "\n", - " # idk i just feel like 800 is a lil low for some people\n", - " new_f0_max = 1760\n", - " og_script = \"/content/MakeDiffSinger/variance-temp-solution/get_pitch.py\"\n", - " with open(og_script, 'r') as file:\n", - " mate = file.read()\n", - " up_f0_val = mate.replace(\"f0_max = 800\", f\"f0_max = {new_f0_max}\")\n", - " with open(og_script, 'w') as file:\n", - " file.write(up_f0_val)\n", - "\n", - " if no_warn:\n", - " !python /content/MakeDiffSinger/variance-temp-solution/add_ph_num.py /content/raw_data/diffsinger_db/transcriptions.csv --vowels /content/DiffSinger/dictionaries/vowels.txt --consonants /content/DiffSinger/dictionaries/consonants.txt 2> /dev/null\n", - " clear_output()\n", - " !python /content/MakeDiffSinger/variance-temp-solution/estimate_midi.py /content/raw_data/diffsinger_db/transcriptions.csv /content/raw_data/diffsinger_db/wavs --pe f\"{f0_ext}\" 2> /dev/null\n", - " clear_output()\n", - " !python /content/MakeDiffSinger/variance-temp-solution/convert_ds.py csv2ds /content/raw_data/diffsinger_db/transcriptions.csv /content/raw_data/diffsinger_db/wavs 2> /dev/null\n", - " clear_output()\n", - " !python /content/MakeDiffSinger/variance-temp-solution/correct_cents.py ds /content/raw_data/diffsinger_db/wavs 2> /dev/null\n", - " clear_output()\n", - " !rm -rf {all_shits}/diffsinger_db/transcriptions.csv\n", - " !rm /content/raw_data/diffsinger_db/curves.json\n", - " clear_output()\n", - " !python /content/MakeDiffSinger/variance-temp-solution/convert_ds.py ds2csv /content/raw_data/diffsinger_db/wavs /content/raw_data/diffsinger_db/transcriptions.csv 2> /dev/null\n", - " clear_output()\n", - " else:\n", - " !python /content/MakeDiffSinger/variance-temp-solution/add_ph_num.py /content/raw_data/diffsinger_db/transcriptions.csv --vowels /content/DiffSinger/dictionaries/vowels.txt --consonants /content/DiffSinger/dictionaries/consonants.txt\n", - " clear_output()\n", - " !python /content/MakeDiffSinger/variance-temp-solution/estimate_midi.py /content/raw_data/diffsinger_db/transcriptions.csv /content/raw_data/diffsinger_db/wavs --pe {f0_ext}\n", - " clear_output()\n", - " !python /content/MakeDiffSinger/variance-temp-solution/convert_ds.py csv2ds /content/raw_data/diffsinger_db/transcriptions.csv /content/raw_data/diffsinger_db/wavs\n", - " clear_output()\n", - " !python /content/MakeDiffSinger/variance-temp-solution/correct_cents.py ds /content/raw_data/diffsinger_db/wavs\n", - " clear_output()\n", - " !rm /content/raw_data/diffsinger_db/transcriptions.csv\n", - " !rm /content/raw_data/diffsinger_db/curves.json\n", - " clear_output()\n", - " !python /content/MakeDiffSinger/variance-temp-solution/convert_ds.py ds2csv /content/raw_data/diffsinger_db/wavs /content/raw_data/diffsinger_db/transcriptions.csv\n", - " clear_output()\n", - "else:\n", - " pass\n", - "print(\"extraction complete!\")\n", - "print(\"|\")\n", - "print(\"|\")\n", - "print(\"|\")\n", - "print(\"I'm also nice enough to convert your data and also write your dict.txt lmao. You are welcome :)\")" - ], - "metadata": { - "cellView": "form", - "id": "JsP1TGg2F1g3" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#@title #Edit Config\n", - "\n", - "%cd /content\n", - "clear_output()\n", - "#@markdown The training type you want to do\n", - "config_type = \"acoustic\" # @param [\"acoustic\", \"variance\"]\n", - "config_cap = config_type.upper()\n", - "#@markdown The name of your speaker\n", - "spk_name = \"\" #@param{type:\"string\"}\n", - "\n", - "#@markdown Path to where you want to save your binary data for later use\n", - "binary_save_dir = \"\" #@param{type:\"string\"}\n", - "\n", - "#@markdown Pitch extractor algorithm\n", - "\n", - "f0_ext = \"parselmouth\" # @param [\"parselmouth\", \"rmvpe\"]\n", - "if f0_ext == \"rmvpe\":\n", - " pe_ckpt_pth = \"checkpoints/rmvpe/model.pt\"\n", - "else:\n", - " pe_ckpt_pth = None\n", - "\n", - "#@markdown Select this is you want to use data augmentation (default pitch shift and time stretch values)\n", - "data_aug = False #@param {type:\"boolean\"}\n", - "\n", - "#@markdown Step interval of when your model will be validate and save\n", - "save_interval = 2000 #@param {type:\"slider\", min:100, max:10000, step:100}\n", - "\n", - "#@markdown Your model save path\n", - "save_dir = \"\" #@param{type:\"string\"}\n", - "\n", - "import os\n", - "import yaml\n", - "import random #for the random test files lmaoz\n", - "\n", - "wav_files = [f for f in os.listdir(\"/content/raw_data/diffsinger_db/wavs\") if f.endswith(\".wav\")]\n", - "random.shuffle(wav_files)\n", - "random_ass_wavs = wav_files[:3]\n", - "random_ass_test_files = [os.path.splitext(file)[0] for file in random_ass_wavs]\n", - "\n", - "if config_type == \"acoustic\":\n", - " with open(\"/content/DiffSinger/configs/acoustic.yaml\", \"r\") as config:\n", - " bitch_ass_config = yaml.safe_load(config)\n", - " bitch_ass_config[\"speakers\"] = [spk_name]\n", - " bitch_ass_config[\"test_prefixes\"] = random_ass_test_files\n", - " bitch_ass_config[\"raw_data_dir\"] = \"/content/raw_data/diffsinger_db\"\n", - " bitch_ass_config[\"binary_data_dir\"] = binary_save_dir\n", - " bitch_ass_config[\"dictionary\"] = \"dictionaries/custom_dict.txt\"\n", - " bitch_ass_config[\"augmentation_args\"][\"random_pitch_shifting\"][\"enabled\"] = data_aug\n", - " bitch_ass_config[\"augmentation_args\"][\"random_time_stretching\"][\"enabled\"] = data_aug\n", - " bitch_ass_config[\"use_key_shift_embed\"] = data_aug\n", - " bitch_ass_config[\"use_speed_embed\"] = data_aug\n", - " bitch_ass_config[\"max_batch_size\"] = 9 #ive never tried reaching the limit so ill trust kei's setting for this\n", - " bitch_ass_config[\"val_check_interval\"] = save_interval\n", - " bitch_ass_config[\"pe\"] = f0_ext\n", - " bitch_ass_config[\"pe_ckpt\"] = pe_ckpt_pth\n", - " with open(\"/content/DiffSinger/configs/acoustic.yaml\", \"w\") as config:\n", - " yaml.dump(bitch_ass_config, config)\n", - "else:\n", - " with open(\"/content/DiffSinger/configs/variance.yaml\", \"r\") as config:\n", - " bitch_ass_config = yaml.safe_load(config)\n", - " bitch_ass_config[\"speakers\"] = [spk_name]\n", - " bitch_ass_config[\"test_prefixes\"] = random_ass_test_files\n", - " bitch_ass_config[\"raw_data_dir\"] = \"/content/raw_data/diffsinger_db\"\n", - " bitch_ass_config[\"binary_data_dir\"] = binary_save_dir\n", - " bitch_ass_config[\"dictionary\"] = \"dictionaries/custom_dict.txt\"\n", - " bitch_ass_config[\"max_batch_size\"] = 9 #ive never tried reaching the limit so ill trust kei's setting for this\n", - " bitch_ass_config[\"val_check_interval\"] = save_interval\n", - " bitch_ass_config[\"pe\"] = f0_ext # i think variance uses it for pitch ref as ground-truth for pitch training soooo\n", - " bitch_ass_config[\"pe_ckpt\"] = pe_ckpt_pth #same goes to this one\n", - " with open(\"/content/DiffSinger/configs/variance.yaml\", \"w\") as config:\n", - " yaml.dump(bitch_ass_config, config)\n", - "\n", - "os.makedirs(save_dir, exist_ok=True)\n", - "search_text = \" args_work_dir = os.path.join(\"\n", - "replacement = f\" args_work_dir = '{save_dir}'\"\n", - "with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n", - " lines = file.readlines()\n", - "for i, line in enumerate(lines):\n", - " if search_text in line:\n", - " lines[i] = replacement + \"\\n\"\n", - " break\n", - "with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n", - " file.writelines(lines)\n", - "#incase if anyone wanna change it lmao\n", - "search_text_alt = \" args_work_dir = '\"\n", - "replacement_alt = f\" args_work_dir = '{save_dir}'\"\n", - "with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n", - " lines = file.readlines()\n", - "for i, line in enumerate(lines):\n", - " if search_text_alt in line:\n", - " lines[i] = replacement_alt + \"\\n\"\n", - " break\n", - "with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n", - " file.writelines(lines)\n", - "\n", - "relative_p = \" relative_path = filepath.relative_to(Path('.').resolve())\"\n", - "relative_change = \" relative_path = filepath.relative_to(Path('/content').resolve())\"\n", - "with open(\"/content/DiffSinger/utils/training_utils.py\", \"r\") as file:\n", - " lines = file.readlines()\n", - "for i, line in enumerate(lines):\n", - " if relative_p in line:\n", - " lines[i] = relative_change + \"\\n\"\n", - " break\n", - "with open(\"/content/DiffSinger/utils/training_utils.py\", \"w\") as file:\n", - " file.writelines(lines)\n", - "relative_p_2 = \" relative_path = filepath.relative_to(Path('.').resolve())\"\n", - "relative_change_2 = \" relative_path = filepath.relative_to(Path('/content').resolve())\"\n", - "with open(\"/content/DiffSinger/utils/training_utils.py\", \"r\") as file:\n", - " lines_2 = file.readlines()\n", - "for i, line in enumerate(lines):\n", - " if relative_p_2 in line:\n", - " lines_2[i] = relative_change_2 + \"\\n\"\n", - " break\n", - "with open(\"/content/DiffSinger/utils/training_utils.py\", \"w\") as file:\n", - " file.writelines(lines_2)\n", - "\n", - "if not estimate_midi:\n", - " !python /content/DiffSinger/scripts/migrate.py txt /content/raw_data/diffsinger_db/transcriptions.txt 2> /dev/null\n", - "else:\n", - " pass\n", - "\n", - "print(\"config updated! see below for config's information\")\n", - "print(\"|\")\n", - "print(\"|\")\n", - "print(\"|\")\n", - "print(f\"+++---{config_cap} TRAINING---+++\")\n", - "print(\"|\")\n", - "print(\"|\")\n", - "print(\"|\")\n", - "print(\"+++---user's settings---+++\")\n", - "print(\"\\n\")\n", - "print(f\"speaker name: {spk_name}\")\n", - "print(\"\\n\")\n", - "print(f\"data augmentation: {data_aug}\")\n", - "print(\"\\n\")\n", - "print(f\"pitch extractor: {f0_ext}\")\n", - "print(\"\\n\")\n", - "print(f\"binary data save directory: {binary_save_dir}\")\n", - "print(\"\\n\")\n", - "print(f\"your model will be saved every: {save_interval} steps\")\n", - "print(\"\\n\")\n", - "print(f\"your model will be saved to: {save_dir}\")\n", - "print(\"\\n\")\n", - "print(\"==========================================================================================\")\n", - "print(\"\\n\")\n", - "print(\"+++---other auto-defined settings---+++\")\n", - "print(\"\\n\")\n", - "print(f\"test files (auto selected 3 files): {random_ass_test_files}\")\n", - "print(\"\\n\")\n", - "print(\"dictionary (auto generated): custom_dict.txt\")\n", - "print(\"\\n\")\n", - "print(\"max_sentences: 9\")\n", - "print(\"\\n\")\n", - "print(\"==========================================================================================\")\n", - "print(\"\\n\")\n", - "print(\"if you don't like or disagree with any of these options,\")\n", - "print(f\"you can go and edit the config at [/content/DiffSinger/configs/{config_type}.yaml]\")\n" - ], - "metadata": { - "cellView": "form", - "id": "nI3dzDv_Mr9Y" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#@markdown # Preprocess data\n", - "import os\n", - "\n", - "# idk i just feel like 800 is a lil low for some people part 2\n", - "new_f0_max = 1600\n", - "og_script = \"/content/DiffSinger/utils/binarizer_utils.py\"\n", - "with open(og_script, 'r') as file:\n", - " mate = file.read()\n", - "up_f0_val = mate.replace(\"f0_max = 800\", f\"f0_max = {new_f0_max}\")\n", - "with open(og_script, 'w') as file:\n", - " file.write(up_f0_val)\n", - "\n", - "training_config = f\"/content/DiffSinger/configs/{config_type}.yaml\"\n", - "\n", - "%cd /content/DiffSinger\n", - "os.environ['PYTHONPATH']='.'\n", - "if no_warn:\n", - " !CUDA_VISIBLE_DEVICES=0 python /content/DiffSinger/scripts/binarize.py --config {training_config} 2> /dev/null\n", - "else:\n", - " !CUDA_VISIBLE_DEVICES=0 python /content/DiffSinger/scripts/binarize.py --config {training_config}" - ], - "metadata": { - "cellView": "form", - "id": "76NvDR1cXlDM" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# **Training**" - ], - "metadata": { - "id": "0J3b18EKdzMC" - } - }, - { - "cell_type": "code", - "source": [ - "#@markdown # Tensorboard\n", - "\n", - "#@markdown For monitoring training progress. Enter the directory to your model save location (save_dir)\n", - "\n", - "#@markdown if you are continuing from latest checkpoint, this would be the directory of a folder that you saved your model, it should have [lightning_logs] folder in it\n", - "\n", - "logs = \"\" #@param{type:\"string\"}\n", - "%reload_ext tensorboard\n", - "%tensorboard --logdir {logs}/lightning_logs" - ], - "metadata": { - "cellView": "form", - "id": "ruKNxm_teUlk" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#@markdown #Train your model\n", - "%cd /content/DiffSinger\n", - "#@markdown Enter the name of your singer.\n", - "name = \"\" #@param{type:'string'}\n", - "\n", - "#@markdown ___\n", - "\n", - "#@markdown ###**Only use this lower section if you want to resume training**\n", - "resume_training = False #@param {type:\"boolean\"}\n", - "\n", - "#@markdown path to the config you got from training\n", - "re_config_path = \"\" #@param {type:\"string\"}\n", - "\n", - "#@markdown path to the resume model's **FOLDER** (should mostlikely be the path you put above minus [ /config.yaml ])\n", - "\n", - "model_dir = \"\" #@param {type:\"string\"}\n", - "\n", - "if resume_training:\n", - " config_path = re_config_path\n", - " search_text = \" args_work_dir = os.path.join(\"\n", - " replacement = f\" args_work_dir = '{model_dir}'\"\n", - " with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n", - " lines = file.readlines()\n", - " for i, line in enumerate(lines):\n", - " if search_text in line:\n", - " lines[i] = replacement + \"\\n\"\n", - " break\n", - " with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n", - " file.writelines(lines)\n", - " #incase if anyone wanna change it lmao\n", - " search_text_alt = \" args_work_dir = '\"\n", - " replacement_alt = f\" args_work_dir = '{model_dir}'\"\n", - " with open(\"/content/DiffSinger/utils/hparams.py\", \"r\") as file:\n", - " lines = file.readlines()\n", - " for i, line in enumerate(lines):\n", - " if search_text_alt in line:\n", - " lines[i] = replacement_alt + \"\\n\"\n", - " break\n", - " with open(\"/content/DiffSinger/utils/hparams.py\", \"w\") as file:\n", - " file.writelines(lines)\n", - "\n", - " relative_p = \" relative_path = filepath.relative_to(Path('.').resolve())\"\n", - " relative_change = \" relative_path = filepath.relative_to(Path('/content').resolve())\"\n", - " with open(\"/content/DiffSinger/utils/training_utils.py\", \"r\") as file:\n", - " lines = file.readlines()\n", - " for i, line in enumerate(lines):\n", - " if relative_p in line:\n", - " lines[i] = relative_change + \"\\n\"\n", - " break\n", - " with open(\"/content/DiffSinger/utils/training_utils.py\", \"w\") as file:\n", - " file.writelines(lines)\n", - " relative_p_2 = \" relative_path = filepath.relative_to(Path('.').resolve())\"\n", - " relative_change_2 = \" relative_path = filepath.relative_to(Path('/content').resolve())\"\n", - " with open(\"/content/DiffSinger/utils/training_utils.py\", \"r\") as file:\n", - " lines_2 = file.readlines()\n", - " for i, line in enumerate(lines):\n", - " if relative_p_2 in line:\n", - " lines_2[i] = relative_change_2 + \"\\n\"\n", - " break\n", - " with open(\"/content/DiffSinger/utils/training_utils.py\", \"w\") as file:\n", - " file.writelines(lines_2)\n", - " !cp {model_dir}/dictionary.txt /content/DiffSinger/dictionaries/custom_dict.txt\n", - "\n", - "else:\n", - " config_path = training_config\n", - "\n", - "if no_warn:\n", - " !CUDA_VISIBLE_DEVICES=0 python /content/DiffSinger/scripts/train.py --config {config_path} --exp_name {name} --reset 2> /dev/null\n", - "else:\n", - " !CUDA_VISIBLE_DEVICES=0 python /content/DiffSinger/scripts/train.py --config {config_path} --exp_name {name} --reset" - ], - "metadata": { - "cellView": "form", - "id": "Lu5w72UWgccC" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# **Convert model to ONNX format**" - ], - "metadata": { - "id": "FY40fGHEg9_i" - } - }, - { - "cell_type": "code", - "source": [ - "#@markdown # Convert to ONNX for OpenUtau\n", - "%cd /content\n", - "\n", - "#@markdown the type of the model you want to convert\n", - "model_type = \"acoustic\" # @param [\"acoustic\", \"variance\"]\n", - "\n", - "#@markdown path to your checkpoint's **FOLDER** (NOT the model itself) or path to your save_dir\n", - "checkpoints_path = \"\" #@param{type:\"string'}\n", - "folder_name = os.path.basename(checkpoints_path)\n", - "\n", - "#@markdown path to where you want to save your converted model and it's file\n", - "exp_folder = \"\" #@param{type:\"string\"}\n", - "\n", - "!cp {checkpoints_path} -r /content/DiffSinger/checkpoints\n", - "if no_warn:\n", - " !python /content/DiffSinger/scripts/export.py {model_type} --exp {folder_name} --out {exp_folder} 2> /dev/null\n", - "else:\n", - " !python /content/DiffSinger/scripts/export.py {model_type} --exp {folder_name} --out {exp_folder}" - ], - "metadata": { - "id": "x33iZhZchEMW", - "cellView": "form" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# **Miscellaneous**" - ], - "metadata": { - "id": "4sbU1aH5kGFE" - } - }, - { - "cell_type": "code", - "source": [ - "import os\n", - "\n", - "#@title Generate enunux.yaml\n", - "\n", - "#@markdown path to your dictionary.txt\n", - "\n", - "dict_path = \"\" #@param{type:\"string'}\n", - "enunux = \"enunux.yaml\"\n", - "vowel_types = {\"a\", \"i\", \"u\", \"e\", \"o\", \"N\", \"M\", \"NG\"}\n", - "enunux_data = []\n", - "vowel_data = []\n", - "stop_data = []\n", - "with open(dict_path, \"r\") as f:\n", - " for line in f:\n", - " phoneme, _ = line.strip().split(\"\\t\")\n", - " phoneme_type = \"vowel\" if phoneme[0] in vowel_types else \"stop\"\n", - " entry = f\"- {{\\\"symbol\\\": \\\"{phoneme}\\\", \\\"type\\\": \\\"{phoneme_type}\\\"}}\"\n", - " if phoneme_type == \"vowel\":\n", - " vowel_data.append(entry)\n", - " else:\n", - " stop_data.append(entry)\n", - "vowel_data.sort()\n", - "stop_data.sort()\n", - "enunux_data.extend([\"# Vowel type symbols:\", *vowel_data, \"\", \"# Stop type symbols:\", *stop_data])\n", - "directory = os.path.dirname(dict_path)\n", - "enunux_path = os.path.join(directory, enunux)\n", - "with open(enunux_path, \"w\") as f:\n", - " f.write(\"symbols:\\n\")\n", - " f.write(\"\\n\".join(enunux_data))\n", - " f.write(\"\\n\")" - ], - "metadata": { - "cellView": "form", - "id": "LMHTaub-kMSw" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#@title Make OpenUtau compatible voicebank\n", - "#@markdown not working yet lmao COMING NEXT UPDATE THO I PROMISE" - ], - "metadata": { - "cellView": "form", - "id": "HxQLlcz7k-8n" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Last Section Note\n", - "Wow you made it to the very bottom.... Why though lmao hahahahhshahhasdksajidhasjl\n", - "\n", - "Anyways, now that you are here i guess ill tell you my plan/todo list for this notebook \\\n", - "(feel free to suggest anything via [discord](https://discord.com/invite/wwbu2JUMjj) my user display name is MLo7 and my user name is ghin_mlo7)\n", - "\n", - "todo list:\n", - "- add support for premade/refined data\n", - "- add multi-singer training\n", - "- add OpenUtau voicebank builder\n", - "- add link to vocoder training notebook (yet to be ready) or add a vocoder training section\n", - "\n", - "If you want to add anything to this list then again, just ping or message me lmao" - ], - "metadata": { - "id": "Ljl8Yr6wM3Ma" - } - } - ] -} \ No newline at end of file