petrsovadina commited on
Commit
d7e23a0
1 Parent(s): aa4eed8

Update presidio_nlp_engine_config.py

Browse files
Files changed (1) hide show
  1. presidio_nlp_engine_config.py +14 -33
presidio_nlp_engine_config.py CHANGED
@@ -1,5 +1,6 @@
1
  import logging
2
  from typing import Tuple
 
3
  import spacy
4
  from presidio_analyzer import RecognizerRegistry
5
  from presidio_analyzer.nlp_engine import (
@@ -8,6 +9,7 @@ from presidio_analyzer.nlp_engine import (
8
  )
9
  from transformers import AutoTokenizer, AutoModelForTokenClassification
10
  from presidio_analyzer.nlp_engine import TransformersNlpEngine
 
11
 
12
  logger = logging.getLogger("presidio-streamlit")
13
 
@@ -21,7 +23,7 @@ def create_nlp_engine_with_spacy(
21
  nlp = spacy.load(model_path)
22
  nlp_configuration = {
23
  "nlp_engine_name": "spacy",
24
- "models": [{"lang_code": "en", "model_name": model_path}],
25
  "ner_model_configuration": {
26
  "model_to_presidio_entity_mapping": {
27
  "PER": "PERSON",
@@ -56,37 +58,16 @@ def create_nlp_engine_with_transformers(
56
  """
57
  print(f"Loading Transformers model: {model_path} of type {type(model_path)}")
58
 
59
- if model_path == "iiiorg/piiranha-v1-detect-personal-information":
60
- # Specific configuration for your model
61
- tokenizer = AutoTokenizer.from_pretrained(model_path)
62
- model = AutoModelForTokenClassification.from_pretrained(model_path)
63
-
64
- nlp_engine = TransformersNlpEngine(tokenizer=tokenizer, model=model, device="cpu")
65
-
66
- # You might want to add specific entity mappings for this model
67
- entity_mapping = {
68
- "PERSON": "PERSON",
69
- "LOCATION": "LOCATION",
70
- "ORGANIZATION": "ORGANIZATION",
71
- # Add more mappings as needed
72
- }
73
-
74
- registry = RecognizerRegistry()
75
- registry.load_predefined_recognizers(nlp_engine=nlp_engine)
76
-
77
- # You might want to add custom recognizers for this model
78
- # For example:
79
- # from presidio_analyzer import EntityRecognizer
80
- # custom_recognizer = EntityRecognizer(supported_entities=["PERSON", "LOCATION", "ORGANIZATION"])
81
- # registry.add_recognizer(custom_recognizer)
82
-
83
- else:
84
- # Default configuration for other transformer models
85
- tokenizer = AutoTokenizer.from_pretrained(model_path)
86
- model = AutoModelForTokenClassification.from_pretrained(model_path)
87
-
88
- nlp_engine = TransformersNlpEngine(tokenizer=tokenizer, model=model, device="cpu")
89
- registry = RecognizerRegistry()
90
- registry.load_predefined_recognizers(nlp_engine=nlp_engine)
91
 
92
  return nlp_engine, registry
 
1
  import logging
2
  from typing import Tuple
3
+ import os
4
  import spacy
5
  from presidio_analyzer import RecognizerRegistry
6
  from presidio_analyzer.nlp_engine import (
 
9
  )
10
  from transformers import AutoTokenizer, AutoModelForTokenClassification
11
  from presidio_analyzer.nlp_engine import TransformersNlpEngine
12
+ from huggingface_hub import login
13
 
14
  logger = logging.getLogger("presidio-streamlit")
15
 
 
23
  nlp = spacy.load(model_path)
24
  nlp_configuration = {
25
  "nlp_engine_name": "spacy",
26
+ "models": [{"lang_code": "cs", "model_name": model_path}],
27
  "ner_model_configuration": {
28
  "model_to_presidio_entity_mapping": {
29
  "PER": "PERSON",
 
58
  """
59
  print(f"Loading Transformers model: {model_path} of type {type(model_path)}")
60
 
61
+ hf_token = os.getenv("HUGGING_FACE_TOKEN")
62
+ if hf_token:
63
+ login(hf_token)
64
+
65
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
66
+ model = AutoModelForTokenClassification.from_pretrained(model_path)
67
+
68
+ nlp_engine = TransformersNlpEngine(tokenizer=tokenizer, model=model, device="cpu")
69
+
70
+ registry = RecognizerRegistry()
71
+ registry.load_predefined_recognizers(nlp_engine=nlp_engine)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  return nlp_engine, registry