Spaces:
Running
Running
import logging | |
from typing import Tuple | |
import os | |
from presidio_analyzer import RecognizerRegistry | |
from presidio_analyzer.nlp_engine import NlpEngine | |
from transformers import AutoTokenizer, AutoModelForTokenClassification | |
from presidio_analyzer.nlp_engine import TransformersNlpEngine | |
logger = logging.getLogger("presidio-streamlit") | |
def create_nlp_engine_with_transformers( | |
model_path: str, | |
) -> Tuple[NlpEngine, RecognizerRegistry]: | |
""" | |
Instantiate an NlpEngine with a TransformersRecognizer. | |
:param model_path: HuggingFace model path. | |
""" | |
print(f"Loading Transformers model: {model_path}") | |
hf_token = os.getenv("HUGGING_FACE_TOKEN") | |
tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=hf_token) | |
model = AutoModelForTokenClassification.from_pretrained(model_path, use_auth_token=hf_token) | |
nlp_engine = TransformersNlpEngine(tokenizer=tokenizer, model=model, device="cpu") | |
registry = RecognizerRegistry() | |
registry.load_predefined_recognizers(nlp_engine=nlp_engine) | |
return nlp_engine, registry |