Spaces:
Running
Running
File size: 1,089 Bytes
3477655 57594ac bd67f9b 3477655 3152804 d996ef6 3477655 3152804 3477655 3152804 d7e23a0 bd67f9b d7e23a0 1dc40ea d996ef6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import logging
from typing import Tuple
import os
from presidio_analyzer import RecognizerRegistry
from presidio_analyzer.nlp_engine import NlpEngine
from transformers import AutoTokenizer, AutoModelForTokenClassification
from presidio_analyzer.nlp_engine import TransformersNlpEngine
logger = logging.getLogger("presidio-streamlit")
def create_nlp_engine_with_transformers(
model_path: str,
) -> Tuple[NlpEngine, RecognizerRegistry]:
"""
Instantiate an NlpEngine with a TransformersRecognizer.
:param model_path: HuggingFace model path.
"""
print(f"Loading Transformers model: {model_path}")
hf_token = os.getenv("HUGGING_FACE_TOKEN")
tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=hf_token)
model = AutoModelForTokenClassification.from_pretrained(model_path, use_auth_token=hf_token)
nlp_engine = TransformersNlpEngine(tokenizer=tokenizer, model=model, device="cpu")
registry = RecognizerRegistry()
registry.load_predefined_recognizers(nlp_engine=nlp_engine)
return nlp_engine, registry |