File size: 969 Bytes
a28abbc
f79bd55
f3603bb
8d9efee
a28abbc
805d0ae
a28abbc
96c098d
8d9efee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a28abbc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import streamlit as st
import transformers
import sentencepiece
from transformers import T5ForConditionalGeneration, T5Tokenizer

sentence = st.text_area("enter some text")

if sentence:
    from transformers import T5ForConditionalGeneration, T5Tokenizer

    model = T5ForConditionalGeneration.from_pretrained("Unbabel/gec-t5_small")
    tokenizer = T5Tokenizer.from_pretrained('t5-small')

    sentence = "I like to swimming"
    tokenized_sentence = tokenizer('gec: ' + sentence, max_length=128, truncation=True, padding='max_length', return_tensors='pt')
    corrected_sentence = tokenizer.decode(
        model.generate(
            input_ids = tokenized_sentence.input_ids,
            attention_mask = tokenized_sentence.attention_mask, 
            max_length=128,
            num_beams=5,
            early_stopping=True,
        )[0],
        skip_special_tokens=True, 
        clean_up_tokenization_spaces=True
    )
    st.write(corrected_sentence)