carlosdanielhernandezmena
/

wav2vec2-large-xlsr-53-spanish-ep5-944h

Automatic Speech Recognition

xlrs-53-spanish

Inference Endpoints

Model card Files Files and versions Community

carlosdanielhernandezmena commited on Dec 1, 2022

Commit

1ea6594

•

1 Parent(s): 478b2d8

Changing the variable sentence

Files changed (1) hide show

README.md +4 -4

README.md CHANGED Viewed

@@ -148,7 +148,7 @@ ds=load_dataset("ciempiess/ciempiess_test", split="test")
 import re
 chars_to_ignore_regex = '[\\,\\?\\.\\!\\\;\\:\\"\\“\\%\\‘\\”\\�\\)\\(\\*)]'
 def remove_special_characters(batch):
-	batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
 	return batch
 ds = ds.map(remove_special_characters)
 #Downsample to 16kHz
@@ -159,7 +159,7 @@ def prepare_dataset(batch):
 	#Batched output is "un-batched" to ensure mapping is correct
 	batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
 	with processor.as_target_processor():
-		batch["labels"] = processor(batch["sentence"]).input_ids
 	return batch
 ds = ds.map(prepare_dataset, remove_columns=ds.column_names,num_proc=1)
 #Define the evaluation metric
@@ -182,11 +182,11 @@ def map_to_result(batch):
 		logits = model(input_values).logits
 	pred_ids = torch.argmax(logits, dim=-1)
 	batch["pred_str"] = processor.batch_decode(pred_ids)[0]
-	batch["sentence"] = processor.decode(batch["labels"], group_tokens=False)
 	return batch
 results = ds.map(map_to_result,remove_columns=ds.column_names)
 #Compute the overall WER now.
-print("Test WER: {:.3f}".format(wer_metric.compute(predictions=results["pred_str"], references=results["sentence"])))
 ```
 **Test Result**: 0.112

 import re
 chars_to_ignore_regex = '[\\,\\?\\.\\!\\\;\\:\\"\\“\\%\\‘\\”\\�\\)\\(\\*)]'
 def remove_special_characters(batch):
+	batch["normalized_text"] = re.sub(chars_to_ignore_regex, '', batch["normalized_text"]).lower()
 	return batch
 ds = ds.map(remove_special_characters)
 #Downsample to 16kHz
 	#Batched output is "un-batched" to ensure mapping is correct
 	batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
 	with processor.as_target_processor():
+		batch["labels"] = processor(batch["normalized_text"]).input_ids
 	return batch
 ds = ds.map(prepare_dataset, remove_columns=ds.column_names,num_proc=1)
 #Define the evaluation metric
 		logits = model(input_values).logits
 	pred_ids = torch.argmax(logits, dim=-1)
 	batch["pred_str"] = processor.batch_decode(pred_ids)[0]
+	batch["normalized_text"] = processor.decode(batch["labels"], group_tokens=False)
 	return batch
 results = ds.map(map_to_result,remove_columns=ds.column_names)
 #Compute the overall WER now.
+print("Test WER: {:.3f}".format(wer_metric.compute(predictions=results["pred_str"], references=results["normalized_text"])))
 ```
 **Test Result**: 0.112