GDPR

Running

App Files Files Community

petrsovadina commited on 2 days ago

Commit

03856aa

•

1 Parent(s): 754824e

Update presidio_streamlit.py

Browse files

Files changed (1) hide show

presidio_streamlit.py +3 -70

presidio_streamlit.py CHANGED Viewed

@@ -45,18 +45,17 @@ st.sidebar.header("Anonymizace osobních údajů v českých textech s Microsoft
 # Výběr modelu
 model_help_text = "Vyberte model pro rozpoznávání pojmenovaných entit (NER) pro detekci osobních údajů."
 model_list = [
-    "spacy/cs_core_news_sm",
     "iiiorg/piiranha-v1-detect-personal-information",
     "FacebookAI/xlm-roberta-large-finetuned-conll03-english",
 ]
 if not allow_other_models:
     model_list.pop()
-st_model = st.sidebar.selectbox("NER model", model_list, index=1, help=model_help_text)
 # Zpracování výběru modelu
 st_model_package = st_model.split("/")[0]
-st_model = "/".join(st_model.split("/")[1:]) if st_model_package.lower() in ("spacy", "iiiorg") else st_model
 st_ta_key = st_ta_endpoint = ""  # Placeholder pro případné použití Text Analytics
 analyzer_params = (st_model_package, st_model, st_ta_key, st_ta_endpoint)
@@ -135,70 +134,4 @@ try:
         text=st_text,
         entities=st_entities,
         language="cs",
-        score_threshold=st_threshold,
-        return_decision_process=st_return_decision_process,
-        allow_list=st_allow_list,
-        deny_list=st_deny_list,
-    )
-    # Zobrazení výsledků
-    with col2:
-        st.subheader("Výstup")
-        if st_operator not in ("highlight", "synthesize"):
-            st_anonymize_results = anonymize(
-                text=st_text,
-                operator=st_operator,
-                mask_char=st_mask_char,
-                number_of_chars=st_number_of_chars,
-                encrypt_key=st_encrypt_key,
-                analyze_results=st_analyze_results,
-            )
-            st.text_area(label="Anonymizováno", value=st_anonymize_results.text, height=400)
-        elif st_operator == "synthesize":
-            fake_data = create_fake_data(st_text, st_analyze_results, open_ai_params)
-            st.text_area(label="Syntetická data", value=fake_data, height=400)
-        else:
-            st.subheader("Zvýrazněno")
-            annotated_tokens = annotate(text=st_text, analyze_results=st_analyze_results)
-            annotated_text(*annotated_tokens)
-    # Zobrazení tabulky s výsledky
-    st.subheader("Nálezy" if not st_return_decision_process else "Nálezy s rozhodovacími faktory")
-    if st_analyze_results:
-        df = pd.DataFrame.from_records([r.to_dict() for r in st_analyze_results])
-        df["text"] = [st_text[res.start : res.end] for res in st_analyze_results]
-        df_subset = df[["entity_type", "text", "start", "end", "score"]].rename(
-            {
-                "entity_type": "Typ entity",
-                "text": "Text",
-                "start": "Začátek",
-                "end": "Konec",
-                "score": "Důvěryhodnost",
-            },
-            axis=1,
-        )
-        if st_return_decision_process:
-            analysis_explanation_df = pd.DataFrame.from_records(
-                [r.analysis_explanation.to_dict() for r in st_analyze_results]
-            )
-            df_subset = pd.concat([df_subset, analysis_explanation_df], axis=1)
-        st.dataframe(df_subset.reset_index(drop=True), use_container_width=True)
-    else:
-        st.text("Žádné nálezy")
-except Exception as e:
-    logger.error(f"Došlo k chybě: {str(e)}", exc_info=True)
-    st.error(f"Došlo k chybě při zpracování: {str(e)}")
-# Přidání skriptu pro sledování využití (volitelné)
-components.html(
-    """
-    <script type="text/javascript">
-    (function(c,l,a,r,i,t,y){
-        c[a]=c[a]||function(){(c[a].q=c[a].q||[]).push(arguments)};
-        t=l.createElement(r);t.async=1;t.src="https://www.clarity.ms/tag/"+i;
-        y=l.getElementsByTagName(r)[0];y.parentNode.insertBefore(t,y);
-    })(window, document, "clarity", "script", "h7f8bp42n8");
-    </script>
-    """
-)

 # Výběr modelu
 model_help_text = "Vyberte model pro rozpoznávání pojmenovaných entit (NER) pro detekci osobních údajů."
 model_list = [
     "iiiorg/piiranha-v1-detect-personal-information",
     "FacebookAI/xlm-roberta-large-finetuned-conll03-english",
 ]
 if not allow_other_models:
     model_list.pop()
+st_model = st.sidebar.selectbox("NER model", model_list, index=0, help=model_help_text)
 # Zpracování výběru modelu
 st_model_package = st_model.split("/")[0]
+st_model = "/".join(st_model.split("/")[1:]) if st_model_package.lower() in ("iiiorg") else st_model
 st_ta_key = st_ta_endpoint = ""  # Placeholder pro případné použití Text Analytics
 analyzer_params = (st_model_package, st_model, st_ta_key, st_ta_endpoint)
         text=st_text,
         entities=st_entities,
         language="cs",
+        score_threshol