Spaces:
Running
Running
petrsovadina
commited on
Commit
•
3152804
1
Parent(s):
7b02c7c
Update presidio_nlp_engine_config.py
Browse files
presidio_nlp_engine_config.py
CHANGED
@@ -1,66 +1,20 @@
|
|
1 |
import logging
|
2 |
from typing import Tuple
|
3 |
-
import os
|
4 |
-
import spacy
|
5 |
from presidio_analyzer import RecognizerRegistry
|
6 |
-
from presidio_analyzer.nlp_engine import
|
7 |
-
NlpEngine,
|
8 |
-
NlpEngineProvider,
|
9 |
-
)
|
10 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
11 |
from presidio_analyzer.nlp_engine import TransformersNlpEngine
|
12 |
-
from huggingface_hub import login
|
13 |
|
14 |
logger = logging.getLogger("presidio-streamlit")
|
15 |
|
16 |
-
def create_nlp_engine_with_spacy(
|
17 |
-
model_path: str,
|
18 |
-
) -> Tuple[NlpEngine, RecognizerRegistry]:
|
19 |
-
"""
|
20 |
-
Instantiate an NlpEngine with a spaCy model
|
21 |
-
:param model_path: path to model / model name.
|
22 |
-
"""
|
23 |
-
nlp = spacy.load(model_path)
|
24 |
-
nlp_configuration = {
|
25 |
-
"nlp_engine_name": "spacy",
|
26 |
-
"models": [{"lang_code": "cs", "model_name": model_path}],
|
27 |
-
"ner_model_configuration": {
|
28 |
-
"model_to_presidio_entity_mapping": {
|
29 |
-
"PER": "PERSON",
|
30 |
-
"PERSON": "PERSON",
|
31 |
-
"NORP": "NRP",
|
32 |
-
"FAC": "FACILITY",
|
33 |
-
"LOC": "LOCATION",
|
34 |
-
"GPE": "LOCATION",
|
35 |
-
"LOCATION": "LOCATION",
|
36 |
-
"ORG": "ORGANIZATION",
|
37 |
-
"ORGANIZATION": "ORGANIZATION",
|
38 |
-
"DATE": "DATE_TIME",
|
39 |
-
"TIME": "DATE_TIME",
|
40 |
-
},
|
41 |
-
"low_confidence_score_multiplier": 0.4,
|
42 |
-
"low_score_entity_names": ["ORG", "ORGANIZATION"],
|
43 |
-
},
|
44 |
-
}
|
45 |
-
nlp_engine = NlpEngineProvider(nlp_configuration=nlp_configuration).create_engine()
|
46 |
-
registry = RecognizerRegistry()
|
47 |
-
registry.load_predefined_recognizers(nlp_engine=nlp_engine)
|
48 |
-
return nlp_engine, registry
|
49 |
-
|
50 |
def create_nlp_engine_with_transformers(
|
51 |
model_path: str,
|
52 |
) -> Tuple[NlpEngine, RecognizerRegistry]:
|
53 |
"""
|
54 |
-
Instantiate an NlpEngine with a TransformersRecognizer
|
55 |
-
The TransformersRecognizer would return results from Transformers models, the spaCy model
|
56 |
-
would return NlpArtifacts such as POS and lemmas.
|
57 |
:param model_path: HuggingFace model path.
|
58 |
"""
|
59 |
-
print(f"Loading Transformers model: {model_path}
|
60 |
-
|
61 |
-
hf_token = os.getenv("HUGGING_FACE_TOKEN")
|
62 |
-
if hf_token:
|
63 |
-
login(hf_token)
|
64 |
|
65 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
66 |
model = AutoModelForTokenClassification.from_pretrained(model_path)
|
|
|
1 |
import logging
|
2 |
from typing import Tuple
|
|
|
|
|
3 |
from presidio_analyzer import RecognizerRegistry
|
4 |
+
from presidio_analyzer.nlp_engine import NlpEngine
|
|
|
|
|
|
|
5 |
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
6 |
from presidio_analyzer.nlp_engine import TransformersNlpEngine
|
|
|
7 |
|
8 |
logger = logging.getLogger("presidio-streamlit")
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def create_nlp_engine_with_transformers(
|
11 |
model_path: str,
|
12 |
) -> Tuple[NlpEngine, RecognizerRegistry]:
|
13 |
"""
|
14 |
+
Instantiate an NlpEngine with a TransformersRecognizer.
|
|
|
|
|
15 |
:param model_path: HuggingFace model path.
|
16 |
"""
|
17 |
+
print(f"Loading Transformers model: {model_path}")
|
|
|
|
|
|
|
|
|
18 |
|
19 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
20 |
model = AutoModelForTokenClassification.from_pretrained(model_path)
|