petrsovadina commited on
Commit
3152804
1 Parent(s): 7b02c7c

Update presidio_nlp_engine_config.py

Browse files
Files changed (1) hide show
  1. presidio_nlp_engine_config.py +3 -49
presidio_nlp_engine_config.py CHANGED
@@ -1,66 +1,20 @@
1
  import logging
2
  from typing import Tuple
3
- import os
4
- import spacy
5
  from presidio_analyzer import RecognizerRegistry
6
- from presidio_analyzer.nlp_engine import (
7
- NlpEngine,
8
- NlpEngineProvider,
9
- )
10
  from transformers import AutoTokenizer, AutoModelForTokenClassification
11
  from presidio_analyzer.nlp_engine import TransformersNlpEngine
12
- from huggingface_hub import login
13
 
14
  logger = logging.getLogger("presidio-streamlit")
15
 
16
- def create_nlp_engine_with_spacy(
17
- model_path: str,
18
- ) -> Tuple[NlpEngine, RecognizerRegistry]:
19
- """
20
- Instantiate an NlpEngine with a spaCy model
21
- :param model_path: path to model / model name.
22
- """
23
- nlp = spacy.load(model_path)
24
- nlp_configuration = {
25
- "nlp_engine_name": "spacy",
26
- "models": [{"lang_code": "cs", "model_name": model_path}],
27
- "ner_model_configuration": {
28
- "model_to_presidio_entity_mapping": {
29
- "PER": "PERSON",
30
- "PERSON": "PERSON",
31
- "NORP": "NRP",
32
- "FAC": "FACILITY",
33
- "LOC": "LOCATION",
34
- "GPE": "LOCATION",
35
- "LOCATION": "LOCATION",
36
- "ORG": "ORGANIZATION",
37
- "ORGANIZATION": "ORGANIZATION",
38
- "DATE": "DATE_TIME",
39
- "TIME": "DATE_TIME",
40
- },
41
- "low_confidence_score_multiplier": 0.4,
42
- "low_score_entity_names": ["ORG", "ORGANIZATION"],
43
- },
44
- }
45
- nlp_engine = NlpEngineProvider(nlp_configuration=nlp_configuration).create_engine()
46
- registry = RecognizerRegistry()
47
- registry.load_predefined_recognizers(nlp_engine=nlp_engine)
48
- return nlp_engine, registry
49
-
50
  def create_nlp_engine_with_transformers(
51
  model_path: str,
52
  ) -> Tuple[NlpEngine, RecognizerRegistry]:
53
  """
54
- Instantiate an NlpEngine with a TransformersRecognizer and a small spaCy model.
55
- The TransformersRecognizer would return results from Transformers models, the spaCy model
56
- would return NlpArtifacts such as POS and lemmas.
57
  :param model_path: HuggingFace model path.
58
  """
59
- print(f"Loading Transformers model: {model_path} of type {type(model_path)}")
60
-
61
- hf_token = os.getenv("HUGGING_FACE_TOKEN")
62
- if hf_token:
63
- login(hf_token)
64
 
65
  tokenizer = AutoTokenizer.from_pretrained(model_path)
66
  model = AutoModelForTokenClassification.from_pretrained(model_path)
 
1
  import logging
2
  from typing import Tuple
 
 
3
  from presidio_analyzer import RecognizerRegistry
4
+ from presidio_analyzer.nlp_engine import NlpEngine
 
 
 
5
  from transformers import AutoTokenizer, AutoModelForTokenClassification
6
  from presidio_analyzer.nlp_engine import TransformersNlpEngine
 
7
 
8
  logger = logging.getLogger("presidio-streamlit")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def create_nlp_engine_with_transformers(
11
  model_path: str,
12
  ) -> Tuple[NlpEngine, RecognizerRegistry]:
13
  """
14
+ Instantiate an NlpEngine with a TransformersRecognizer.
 
 
15
  :param model_path: HuggingFace model path.
16
  """
17
+ print(f"Loading Transformers model: {model_path}")
 
 
 
 
18
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_path)
20
  model = AutoModelForTokenClassification.from_pretrained(model_path)