File size: 5,574 Bytes
a2a09d6
 
 
 
 
 
 
 
 
9592801
 
 
8e9bdee
9592801
 
 
 
 
 
7a84cc4
9592801
 
 
 
 
 
1d46d77
9592801
9d845ba
9592801
 
 
 
 
 
31ac90d
9592801
31ac90d
 
9592801
31ac90d
9592801
 
 
 
 
31ac90d
9592801
 
a2a09d6
31ac90d
9592801
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6567f39
 
 
f674e10
9592801
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d845ba
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""
πŸ—£οΈ Translator - Translate text from one language to another.

Application file made with Streamlit.

Author:
    - @ChainYo
"""

import re
import streamlit as st

from datetime import datetime
from transformers import pipeline
from available_models import MODELS


st.set_page_config(page_title="Translator", page_icon="πŸ—£οΈ")
st.title("πŸ—£οΈ Translator")
st.subheader("Translation made fast and easy.")
st.markdown("""
[![GitHub](https://img.shields.io/badge/-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/ChainYo)
[![HuggingFace](https://img.shields.io/badge/-yellow.svg?style=for-the-badge&logo=data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBzdGFuZGFsb25lPSJubyI/Pgo8IURPQ1RZUEUgc3ZnIFBVQkxJQyAiLS8vVzNDLy9EVEQgU1ZHIDIwMDEwOTA0Ly9FTiIKICJodHRwOi8vd3d3LnczLm9yZy9UUi8yMDAxL1JFQy1TVkctMjAwMTA5MDQvRFREL3N2ZzEwLmR0ZCI+CjxzdmcgdmVyc2lvbj0iMS4wIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciCiB3aWR0aD0iMTc1LjAwMDAwMHB0IiBoZWlnaHQ9IjE3NS4wMDAwMDBwdCIgdmlld0JveD0iMCAwIDE3NS4wMDAwMDAgMTc1LjAwMDAwMCIKIHByZXNlcnZlQXNwZWN0UmF0aW89InhNaWRZTWlkIG1lZXQiPgoKPGcgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoMC4wMDAwMDAsMTc1LjAwMDAwMCkgc2NhbGUoMC4xMDAwMDAsLTAuMTAwMDAwKSIKZmlsbD0iIzAwMDAwMCIgc3Ryb2tlPSJub25lIj4KPHBhdGggZD0iTTU2MyAxMjM2IGMtMjkgLTEzIC02MyAtNTkgLTYzIC04NiAwIC0yNiAzMyAtODAgNTIgLTg2IDE1IC00IDI2IDEKNDMgMjEgMjAgMjYgMjQgMjcgNTMgMTcgMjggLTkgMzMgLTggNDIgOCAxNyAzMiAxMSA2OSAtMTcgOTkgLTM0IDM3IC02OCA0NQotMTEwIDI3eiIvPgo8cGF0aCBkPSJNMTA2NCAxMjQwIGMtNTAgLTIwIC03NyAtODYgLTU0IC0xMzAgOSAtMTYgMTQgLTE3IDQyIC04IDI5IDEwIDMzIDkKNTUgLTE3IDIxIC0yNCAyNyAtMjYgNDggLTE3IDMxIDE0IDUxIDc2IDM2IDExNCAtMTcgNDYgLTg0IDc2IC0xMjcgNTh6Ii8+CjxwYXRoIGQ9Ik02MDAgODg4IGMwIC00OSAxNiAtOTggNTAgLTE1MSA4NSAtMTM0IDMyNSAtMTM0IDQxMCAwIDUxIDgwIDY5IDE4MwozMSAxODMgLTEwIDAgLTUwIC0xNSAtODcgLTMyIC02MCAtMjkgLTc5IC0zMyAtMTQ5IC0zMyAtNzAgMCAtODkgNCAtMTQ5IDMzCi0zNyAxNyAtNzcgMzIgLTg3IDMyIC0xNSAwIC0xOSAtNyAtMTkgLTMyeiIvPgo8L2c+Cjwvc3ZnPgo=)](https://huggingface.co/ChainYo)
[![LinkedIn](https://img.shields.io/badge/-%230077B5.svg?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/in/thomas-chaigneau-dev/)
[![Discord](https://img.shields.io/badge/Chainyo%233610-%237289DA.svg?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/)
""")
st.write("To add a new model, hit me up! ⬆️")

with st.expander(label="❓ How does it work", expanded=True):
    st.markdown("""
    **Translator** is a **simple tool** that allows you to **translate text** from one language to another.

    **Translator** is powered by the [Transformers library](https://huggingface.co/transformers) and uses the 
    [Helsinki-NLP](https://huggingface.co/Helsinki-NLP) models.

    Choose the **source language**, the **target language** and add some **text to translate**.

    **Translator** will translate the text and **save the output in a text file**. It cuts the sentences by following 
    the punctuation marks.

    The output file content will also be displayed in the browser to help you understand the translation and choose 
    if you want to download it.

    There is **no limit to the number of characters** that can be translated. 
    The only limit is the time you are ready to wait! πŸ€—

    *P.S. I have built this tool to help me start writing blog posts in different languages. I am a French native speaker
    and I will use it to translate my potential future blog posts in English.*

    *P.P.S. I am a **Junior ML Engineer** passionate about **machine learning** and **data science**. Reach out to me by 
    clicking on the socials badges above.*
    """)

lang1, lang2 = st.columns(2)
lang1.selectbox(
    "Source Language", ["πŸ‡¬πŸ‡§ English", "πŸ‡«πŸ‡· French", "πŸ‡©πŸ‡ͺ German", "πŸ‡ͺπŸ‡Έ Spanish", "πŸ‡·πŸ‡Ί Russian"], 
    key="input_lang", index=1,
)
lang2.selectbox(
    "Target Language", ["πŸ‡¬πŸ‡§ English", "πŸ‡«πŸ‡· French", "πŸ‡©πŸ‡ͺ German", "πŸ‡ͺπŸ‡Έ Spanish", "πŸ‡·πŸ‡Ί Russian"], 
    key="output_lang", index=0,
)

selected_model = MODELS[f"{st.session_state['input_lang']}->{st.session_state['output_lang']}"]


if selected_model[0] == None:
    st.write("No model available for this pair.")
elif selected_model[0] == 0:
    st.write("No translation necessary.")
else:
    st.markdown(f"""
    **Selected model:** [{selected_model[0]}]({selected_model[1]})
    """)

    input_text = st.text_area("Enter text to translate:", height=400, key="input")
    translate_text = st.button("Translate")

    if translate_text:
        with st.spinner(text="βš™οΈ Model loading..."):
            task = pipeline(
                "translation",
                model=selected_model[0],
                tokenizer=selected_model[0],
            )

        progress_bar = st.progress(0)
        with st.spinner(text="πŸ”„ Translating..."):
            text_to_translate = re.split('(?<=[.!?]) +', input_text)
            total_progress = len(text_to_translate)

            for i, text in enumerate(text_to_translate):
                translation = task(text)
                text_to_translate[i] = translation[0]["translation_text"]
                progress_bar.progress((i + 1) / total_progress)

        st.success("πŸ—£οΈ Translated!")
        st.write(f"**Translation:** {' '.join(text_to_translate)}")
        st.download_button(
            label="Download translated text", 
            data="\n".join(text_to_translate), 
            file_name=f"{st.session_state['input_lang']}-{st.session_state['output_lang']}-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.txt",
            mime="text/plain"
        )