File size: 1,089 Bytes
3477655
57594ac
bd67f9b
3477655
3152804
d996ef6
 
3477655
 
 
 
 
 
 
3152804
3477655
 
3152804
d7e23a0
bd67f9b
 
 
 
d7e23a0
 
 
 
 
1dc40ea
d996ef6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import logging
from typing import Tuple
import os
from presidio_analyzer import RecognizerRegistry
from presidio_analyzer.nlp_engine import NlpEngine
from transformers import AutoTokenizer, AutoModelForTokenClassification
from presidio_analyzer.nlp_engine import TransformersNlpEngine

logger = logging.getLogger("presidio-streamlit")

def create_nlp_engine_with_transformers(
    model_path: str,
) -> Tuple[NlpEngine, RecognizerRegistry]:
    """
    Instantiate an NlpEngine with a TransformersRecognizer.
    :param model_path: HuggingFace model path.
    """
    print(f"Loading Transformers model: {model_path}")
    
    hf_token = os.getenv("HUGGING_FACE_TOKEN")
    
    tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=hf_token)
    model = AutoModelForTokenClassification.from_pretrained(model_path, use_auth_token=hf_token)
    
    nlp_engine = TransformersNlpEngine(tokenizer=tokenizer, model=model, device="cpu")
    
    registry = RecognizerRegistry()
    registry.load_predefined_recognizers(nlp_engine=nlp_engine)
    
    return nlp_engine, registry