Spaces:
Running
Running
from typing import List, Optional, Tuple | |
import logging | |
import streamlit as st | |
from presidio_analyzer import ( | |
AnalyzerEngine, | |
RecognizerResult, | |
RecognizerRegistry, | |
PatternRecognizer, | |
Pattern, | |
) | |
from presidio_analyzer.nlp_engine import NlpEngine | |
from presidio_anonymizer import AnonymizerEngine | |
from presidio_anonymizer.entities import OperatorConfig | |
from openai_fake_data_generator import ( | |
call_completion_model, | |
OpenAIParams, | |
create_prompt, | |
) | |
from presidio_nlp_engine_config import ( | |
create_nlp_engine_with_spacy, | |
create_nlp_engine_with_transformers, | |
) | |
logger = logging.getLogger("presidio-streamlit") | |
def nlp_engine_and_registry( | |
model_family: str, | |
model_path: str, | |
ta_key: Optional[str] = None, | |
ta_endpoint: Optional[str] = None, | |
) -> Tuple[NlpEngine, RecognizerRegistry]: | |
if "spacy" in model_family.lower(): | |
return create_nlp_engine_with_spacy(model_path) | |
elif "transformers" in model_family.lower() or "iiiorg" in model_family.lower(): | |
return create_nlp_engine_with_transformers(model_path) | |
else: | |
raise ValueError(f"Model family {model_family} not supported") | |
def analyzer_engine( | |
model_family: str, | |
model_path: str, | |
ta_key: Optional[str] = None, | |
ta_endpoint: Optional[str] = None, | |
) -> AnalyzerEngine: | |
nlp_engine, registry = nlp_engine_and_registry( | |
model_family, model_path, ta_key, ta_endpoint | |
) | |
analyzer = AnalyzerEngine(nlp_engine=nlp_engine, registry=registry) | |
return analyzer | |
# ... (zbytek pomocných funkcí) |