Spaces:
Running
Running
petrsovadina
commited on
Commit
•
03856aa
1
Parent(s):
754824e
Update presidio_streamlit.py
Browse files- presidio_streamlit.py +3 -70
presidio_streamlit.py
CHANGED
@@ -45,18 +45,17 @@ st.sidebar.header("Anonymizace osobních údajů v českých textech s Microsoft
|
|
45 |
# Výběr modelu
|
46 |
model_help_text = "Vyberte model pro rozpoznávání pojmenovaných entit (NER) pro detekci osobních údajů."
|
47 |
model_list = [
|
48 |
-
"spacy/cs_core_news_sm",
|
49 |
"iiiorg/piiranha-v1-detect-personal-information",
|
50 |
"FacebookAI/xlm-roberta-large-finetuned-conll03-english",
|
51 |
]
|
52 |
if not allow_other_models:
|
53 |
model_list.pop()
|
54 |
|
55 |
-
st_model = st.sidebar.selectbox("NER model", model_list, index=
|
56 |
|
57 |
# Zpracování výběru modelu
|
58 |
st_model_package = st_model.split("/")[0]
|
59 |
-
st_model = "/".join(st_model.split("/")[1:]) if st_model_package.lower() in ("
|
60 |
|
61 |
st_ta_key = st_ta_endpoint = "" # Placeholder pro případné použití Text Analytics
|
62 |
analyzer_params = (st_model_package, st_model, st_ta_key, st_ta_endpoint)
|
@@ -135,70 +134,4 @@ try:
|
|
135 |
text=st_text,
|
136 |
entities=st_entities,
|
137 |
language="cs",
|
138 |
-
|
139 |
-
return_decision_process=st_return_decision_process,
|
140 |
-
allow_list=st_allow_list,
|
141 |
-
deny_list=st_deny_list,
|
142 |
-
)
|
143 |
-
|
144 |
-
# Zobrazení výsledků
|
145 |
-
with col2:
|
146 |
-
st.subheader("Výstup")
|
147 |
-
if st_operator not in ("highlight", "synthesize"):
|
148 |
-
st_anonymize_results = anonymize(
|
149 |
-
text=st_text,
|
150 |
-
operator=st_operator,
|
151 |
-
mask_char=st_mask_char,
|
152 |
-
number_of_chars=st_number_of_chars,
|
153 |
-
encrypt_key=st_encrypt_key,
|
154 |
-
analyze_results=st_analyze_results,
|
155 |
-
)
|
156 |
-
st.text_area(label="Anonymizováno", value=st_anonymize_results.text, height=400)
|
157 |
-
elif st_operator == "synthesize":
|
158 |
-
fake_data = create_fake_data(st_text, st_analyze_results, open_ai_params)
|
159 |
-
st.text_area(label="Syntetická data", value=fake_data, height=400)
|
160 |
-
else:
|
161 |
-
st.subheader("Zvýrazněno")
|
162 |
-
annotated_tokens = annotate(text=st_text, analyze_results=st_analyze_results)
|
163 |
-
annotated_text(*annotated_tokens)
|
164 |
-
|
165 |
-
# Zobrazení tabulky s výsledky
|
166 |
-
st.subheader("Nálezy" if not st_return_decision_process else "Nálezy s rozhodovacími faktory")
|
167 |
-
if st_analyze_results:
|
168 |
-
df = pd.DataFrame.from_records([r.to_dict() for r in st_analyze_results])
|
169 |
-
df["text"] = [st_text[res.start : res.end] for res in st_analyze_results]
|
170 |
-
df_subset = df[["entity_type", "text", "start", "end", "score"]].rename(
|
171 |
-
{
|
172 |
-
"entity_type": "Typ entity",
|
173 |
-
"text": "Text",
|
174 |
-
"start": "Začátek",
|
175 |
-
"end": "Konec",
|
176 |
-
"score": "Důvěryhodnost",
|
177 |
-
},
|
178 |
-
axis=1,
|
179 |
-
)
|
180 |
-
if st_return_decision_process:
|
181 |
-
analysis_explanation_df = pd.DataFrame.from_records(
|
182 |
-
[r.analysis_explanation.to_dict() for r in st_analyze_results]
|
183 |
-
)
|
184 |
-
df_subset = pd.concat([df_subset, analysis_explanation_df], axis=1)
|
185 |
-
st.dataframe(df_subset.reset_index(drop=True), use_container_width=True)
|
186 |
-
else:
|
187 |
-
st.text("Žádné nálezy")
|
188 |
-
|
189 |
-
except Exception as e:
|
190 |
-
logger.error(f"Došlo k chybě: {str(e)}", exc_info=True)
|
191 |
-
st.error(f"Došlo k chybě při zpracování: {str(e)}")
|
192 |
-
|
193 |
-
# Přidání skriptu pro sledování využití (volitelné)
|
194 |
-
components.html(
|
195 |
-
"""
|
196 |
-
<script type="text/javascript">
|
197 |
-
(function(c,l,a,r,i,t,y){
|
198 |
-
c[a]=c[a]||function(){(c[a].q=c[a].q||[]).push(arguments)};
|
199 |
-
t=l.createElement(r);t.async=1;t.src="https://www.clarity.ms/tag/"+i;
|
200 |
-
y=l.getElementsByTagName(r)[0];y.parentNode.insertBefore(t,y);
|
201 |
-
})(window, document, "clarity", "script", "h7f8bp42n8");
|
202 |
-
</script>
|
203 |
-
"""
|
204 |
-
)
|
|
|
45 |
# Výběr modelu
|
46 |
model_help_text = "Vyberte model pro rozpoznávání pojmenovaných entit (NER) pro detekci osobních údajů."
|
47 |
model_list = [
|
|
|
48 |
"iiiorg/piiranha-v1-detect-personal-information",
|
49 |
"FacebookAI/xlm-roberta-large-finetuned-conll03-english",
|
50 |
]
|
51 |
if not allow_other_models:
|
52 |
model_list.pop()
|
53 |
|
54 |
+
st_model = st.sidebar.selectbox("NER model", model_list, index=0, help=model_help_text)
|
55 |
|
56 |
# Zpracování výběru modelu
|
57 |
st_model_package = st_model.split("/")[0]
|
58 |
+
st_model = "/".join(st_model.split("/")[1:]) if st_model_package.lower() in ("iiiorg") else st_model
|
59 |
|
60 |
st_ta_key = st_ta_endpoint = "" # Placeholder pro případné použití Text Analytics
|
61 |
analyzer_params = (st_model_package, st_model, st_ta_key, st_ta_endpoint)
|
|
|
134 |
text=st_text,
|
135 |
entities=st_entities,
|
136 |
language="cs",
|
137 |
+
score_threshol
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|