import logging from typing import Tuple import os from presidio_analyzer import RecognizerRegistry from presidio_analyzer.nlp_engine import NlpEngine from transformers import AutoTokenizer, AutoModelForTokenClassification from presidio_analyzer.nlp_engine import TransformersNlpEngine logger = logging.getLogger("presidio-streamlit") def create_nlp_engine_with_transformers( model_path: str, ) -> Tuple[NlpEngine, RecognizerRegistry]: """ Instantiate an NlpEngine with a TransformersRecognizer. :param model_path: HuggingFace model path. """ print(f"Loading Transformers model: {model_path}") hf_token = os.getenv("HUGGING_FACE_TOKEN") tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=hf_token) model = AutoModelForTokenClassification.from_pretrained(model_path, use_auth_token=hf_token) nlp_engine = TransformersNlpEngine(tokenizer=tokenizer, model=model, device="cpu") registry = RecognizerRegistry() registry.load_predefined_recognizers(nlp_engine=nlp_engine) return nlp_engine, registry