GDPR

Running

App Files Files Community

petrsovadina commited on 1 day ago

Commit

3152804

•

1 Parent(s): 7b02c7c

Update presidio_nlp_engine_config.py

Browse files

Files changed (1) hide show

presidio_nlp_engine_config.py +3 -49

presidio_nlp_engine_config.py CHANGED Viewed

@@ -1,66 +1,20 @@
 import logging
 from typing import Tuple
-import os
-import spacy
 from presidio_analyzer import RecognizerRegistry
-from presidio_analyzer.nlp_engine import (
-    NlpEngine,
-    NlpEngineProvider,
-)
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 from presidio_analyzer.nlp_engine import TransformersNlpEngine
-from huggingface_hub import login
 logger = logging.getLogger("presidio-streamlit")
-def create_nlp_engine_with_spacy(
-    model_path: str,
-) -> Tuple[NlpEngine, RecognizerRegistry]:
-    """
-    Instantiate an NlpEngine with a spaCy model
-    :param model_path: path to model / model name.
-    """
-    nlp = spacy.load(model_path)
-    nlp_configuration = {
-        "nlp_engine_name": "spacy",
-        "models": [{"lang_code": "cs", "model_name": model_path}],
-        "ner_model_configuration": {
-            "model_to_presidio_entity_mapping": {
-                "PER": "PERSON",
-                "PERSON": "PERSON",
-                "NORP": "NRP",
-                "FAC": "FACILITY",
-                "LOC": "LOCATION",
-                "GPE": "LOCATION",
-                "LOCATION": "LOCATION",
-                "ORG": "ORGANIZATION",
-                "ORGANIZATION": "ORGANIZATION",
-                "DATE": "DATE_TIME",
-                "TIME": "DATE_TIME",
-            },
-            "low_confidence_score_multiplier": 0.4,
-            "low_score_entity_names": ["ORG", "ORGANIZATION"],
-        },
-    }
-    nlp_engine = NlpEngineProvider(nlp_configuration=nlp_configuration).create_engine()
-    registry = RecognizerRegistry()
-    registry.load_predefined_recognizers(nlp_engine=nlp_engine)
-    return nlp_engine, registry
 def create_nlp_engine_with_transformers(
     model_path: str,
 ) -> Tuple[NlpEngine, RecognizerRegistry]:
     """
-    Instantiate an NlpEngine with a TransformersRecognizer and a small spaCy model.
-    The TransformersRecognizer would return results from Transformers models, the spaCy model
-    would return NlpArtifacts such as POS and lemmas.
     :param model_path: HuggingFace model path.
     """
-    print(f"Loading Transformers model: {model_path} of type {type(model_path)}")
-    hf_token = os.getenv("HUGGING_FACE_TOKEN")
-    if hf_token:
-        login(hf_token)
     tokenizer = AutoTokenizer.from_pretrained(model_path)
     model = AutoModelForTokenClassification.from_pretrained(model_path)

 import logging
 from typing import Tuple
 from presidio_analyzer import RecognizerRegistry
+from presidio_analyzer.nlp_engine import NlpEngine
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 from presidio_analyzer.nlp_engine import TransformersNlpEngine
 logger = logging.getLogger("presidio-streamlit")
 def create_nlp_engine_with_transformers(
     model_path: str,
 ) -> Tuple[NlpEngine, RecognizerRegistry]:
     """
+    Instantiate an NlpEngine with a TransformersRecognizer.
     :param model_path: HuggingFace model path.
     """
+    print(f"Loading Transformers model: {model_path}")
     tokenizer = AutoTokenizer.from_pretrained(model_path)
     model = AutoModelForTokenClassification.from_pretrained(model_path)