from typing import List, Optional, Tuple import logging import streamlit as st from presidio_analyzer import ( AnalyzerEngine, RecognizerResult, RecognizerRegistry, PatternRecognizer, Pattern, ) from presidio_analyzer.nlp_engine import NlpEngine from presidio_anonymizer import AnonymizerEngine from presidio_anonymizer.entities import OperatorConfig from openai_fake_data_generator import ( call_completion_model, OpenAIParams, create_prompt, ) from presidio_nlp_engine_config import ( create_nlp_engine_with_spacy, create_nlp_engine_with_transformers, ) logger = logging.getLogger("presidio-streamlit") @st.cache_resource def nlp_engine_and_registry( model_family: str, model_path: str, ta_key: Optional[str] = None, ta_endpoint: Optional[str] = None, ) -> Tuple[NlpEngine, RecognizerRegistry]: if "spacy" in model_family.lower(): return create_nlp_engine_with_spacy(model_path) elif "transformers" in model_family.lower() or "iiiorg" in model_family.lower(): return create_nlp_engine_with_transformers(model_path) else: raise ValueError(f"Model family {model_family} not supported") @st.cache_resource def analyzer_engine( model_family: str, model_path: str, ta_key: Optional[str] = None, ta_endpoint: Optional[str] = None, ) -> AnalyzerEngine: nlp_engine, registry = nlp_engine_and_registry( model_family, model_path, ta_key, ta_endpoint ) analyzer = AnalyzerEngine(nlp_engine=nlp_engine, registry=registry) return analyzer # ... (zbytek pomocných funkcí)