File size: 2,070 Bytes
96ee597
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""Get tokens using the SpeechTokenizer.

Apply SpeechTokenizer to extract acoustic and semantic tokens. 
The tokens will be extracted to 
encoding_output/acoustic and encoding_output/semantic.

python utils/get_tokens_speech_tokenizer.py \
    --config_path ckpt/speechtokenizer/config.json \
    --ckpt_path ckpt/speechtokenizer/SpeechTokenizer.pt \
    --encoding_input datasets/example/audios \
    --encoding_output datasets/example/audios-speech-tokenizer

Copyright PolyAI Limited.
"""
import argparse
import pathlib

from modules.speech_tokenizer import SpeechTokenizer

MQTTS_ROOT_PATH = str(pathlib.Path(__file__).parent.resolve())

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--config_path",
        type=str,
        help="Path to the SpeechTokenizer config",
        default=MQTTS_ROOT_PATH + "/ckpt/speechtokenizer/config.json",
    )
    parser.add_argument(
        "--ckpt_path",
        type=str,
        help="Path to the SpeechTokenizer checkpoint",
        default=MQTTS_ROOT_PATH + "/ckpt/speechtokenizer/SpeechTokenizer.pt",
    )
    parser.add_argument(
        "--encoding_input",
        type=str,
        help="Path to the input folder for encoding",
        default=MQTTS_ROOT_PATH + "/datasets/giga-training-data/audios",
    )
    parser.add_argument(
        "--encoding_output",
        type=str,
        help="Path where to save the encoded tokens",
        default="/tmp/encoding_output",
    )
    parser.add_argument(
        "--start_percent",
        type=int,
        default=0,
    )
    parser.add_argument(
        "--end_percent",
        type=int,
        default=100,
    )

    args = parser.parse_args()
    print("Parsed args")
    print(args)

    tokenizer = SpeechTokenizer(
        config_path=args.config_path,
        ckpt_path=args.ckpt_path,
    )
    tokenizer.encode_files_with_model_concurrent(
        folder_path=args.encoding_input, destination_folder=args.encoding_output,
        start_percent=args.start_percent, end_percent=args.end_percent
    )