GDPR / presidio_helpers.py
petrsovadina's picture
Update presidio_helpers.py
10ab3e3 verified
raw
history blame
1.6 kB
from typing import List, Optional, Tuple
import logging
import streamlit as st
from presidio_analyzer import (
AnalyzerEngine,
RecognizerResult,
RecognizerRegistry,
PatternRecognizer,
Pattern,
)
from presidio_analyzer.nlp_engine import NlpEngine
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig
from openai_fake_data_generator import (
call_completion_model,
OpenAIParams,
create_prompt,
)
from presidio_nlp_engine_config import (
create_nlp_engine_with_spacy,
create_nlp_engine_with_transformers,
)
logger = logging.getLogger("presidio-streamlit")
@st.cache_resource
def nlp_engine_and_registry(
model_family: str,
model_path: str,
ta_key: Optional[str] = None,
ta_endpoint: Optional[str] = None,
) -> Tuple[NlpEngine, RecognizerRegistry]:
if "spacy" in model_family.lower():
return create_nlp_engine_with_spacy(model_path)
elif "transformers" in model_family.lower() or "iiiorg" in model_family.lower():
return create_nlp_engine_with_transformers(model_path)
else:
raise ValueError(f"Model family {model_family} not supported")
@st.cache_resource
def analyzer_engine(
model_family: str,
model_path: str,
ta_key: Optional[str] = None,
ta_endpoint: Optional[str] = None,
) -> AnalyzerEngine:
nlp_engine, registry = nlp_engine_and_registry(
model_family, model_path, ta_key, ta_endpoint
)
analyzer = AnalyzerEngine(nlp_engine=nlp_engine, registry=registry)
return analyzer
# ... (zbytek pomocných funkcí)