petrsovadina commited on
Commit
fdf8839
1 Parent(s): e15afd8

Update openai_fake_data_generator.py

Browse files
Files changed (1) hide show
  1. openai_fake_data_generator.py +16 -33
openai_fake_data_generator.py CHANGED
@@ -1,8 +1,6 @@
1
  from collections import namedtuple
2
  from typing import Optional
3
-
4
  import openai
5
- from openai import OpenAI, AzureOpenAI
6
  import logging
7
 
8
  logger = logging.getLogger("presidio-streamlit")
@@ -12,50 +10,42 @@ OpenAIParams = namedtuple(
12
  ["openai_key", "model", "api_base", "deployment_id", "api_version", "api_type"],
13
  )
14
 
15
-
16
  def call_completion_model(
17
  prompt: str,
18
  openai_params: OpenAIParams,
19
  max_tokens: Optional[int] = 256,
20
  ) -> str:
21
- """Creates a request for the OpenAI Completion service and returns the response.
22
-
23
- :param prompt: The prompt for the completion model
24
- :param openai_params: OpenAI parameters for the completion model
25
- :param max_tokens: The maximum number of tokens to generate.
26
- """
27
  if openai_params.api_type.lower() == "azure":
28
- client = AzureOpenAI(
29
- api_version=openai_params.api_version,
30
- api_key=openai_params.openai_key,
31
- azure_endpoint=openai_params.api_base,
32
- azure_deployment=openai_params.deployment_id,
 
 
33
  )
34
  else:
35
- client = OpenAI(api_key=openai_params.openai_key)
36
-
37
- response = client.completions.create(
38
- model=openai_params.model,
39
- prompt=prompt,
40
- max_tokens=max_tokens,
41
- )
42
 
43
  return response.choices[0].text.strip()
44
 
45
-
46
  def create_prompt(anonymized_text: str) -> str:
47
  """
48
  Create the prompt with instructions to GPT-3.
49
-
50
- :param anonymized_text: Text with placeholders instead of PII values, e.g. My name is <PERSON>.
51
  """
52
-
53
  prompt = f"""
54
  Your role is to create synthetic text based on de-identified text with placeholders instead of Personally Identifiable Information (PII).
55
  Replace the placeholders (e.g. ,<PERSON>, {{DATE}}, {{ip_address}}) with fake values.
56
 
57
  Instructions:
58
-
59
  a. Use completely random numbers, so every digit is drawn between 0 and 9.
60
  b. Use realistic names that come from diverse genders, ethnicities and countries.
61
  c. If there are no placeholders, return the text as is.
@@ -63,13 +53,6 @@ def create_prompt(anonymized_text: str) -> str:
63
  e. If PII exists in the input, replace it with fake values in the output.
64
  f. Remove whitespace before and after the generated text
65
 
66
- input: [[TEXT STARTS]] How do I change the limit on my credit card {{credit_card_number}}?[[TEXT ENDS]]
67
- output: How do I change the limit on my credit card 2539 3519 2345 1555?
68
- input: [[TEXT STARTS]]<PERSON> was the chief science officer at <ORGANIZATION>.[[TEXT ENDS]]
69
- output: Katherine Buckjov was the chief science officer at NASA.
70
- input: [[TEXT STARTS]]Cameroon lives in <LOCATION>.[[TEXT ENDS]]
71
- output: Vladimir lives in Moscow.
72
-
73
  input: [[TEXT STARTS]]{anonymized_text}[[TEXT ENDS]]
74
  output:"""
75
  return prompt
 
1
  from collections import namedtuple
2
  from typing import Optional
 
3
  import openai
 
4
  import logging
5
 
6
  logger = logging.getLogger("presidio-streamlit")
 
10
  ["openai_key", "model", "api_base", "deployment_id", "api_version", "api_type"],
11
  )
12
 
 
13
  def call_completion_model(
14
  prompt: str,
15
  openai_params: OpenAIParams,
16
  max_tokens: Optional[int] = 256,
17
  ) -> str:
18
+ """Creates a request for the OpenAI Completion service and returns the response."""
19
+ openai.api_key = openai_params.openai_key
20
+
 
 
 
21
  if openai_params.api_type.lower() == "azure":
22
+ openai.api_base = openai_params.api_base
23
+ openai.api_version = openai_params.api_version
24
+ openai.api_type = "azure"
25
+ response = openai.Completion.create(
26
+ engine=openai_params.deployment_id,
27
+ prompt=prompt,
28
+ max_tokens=max_tokens,
29
  )
30
  else:
31
+ response = openai.Completion.create(
32
+ model=openai_params.model,
33
+ prompt=prompt,
34
+ max_tokens=max_tokens,
35
+ )
 
 
36
 
37
  return response.choices[0].text.strip()
38
 
 
39
  def create_prompt(anonymized_text: str) -> str:
40
  """
41
  Create the prompt with instructions to GPT-3.
42
+ :param anonymized_text: Text with placeholders instead of PII values.
 
43
  """
 
44
  prompt = f"""
45
  Your role is to create synthetic text based on de-identified text with placeholders instead of Personally Identifiable Information (PII).
46
  Replace the placeholders (e.g. ,<PERSON>, {{DATE}}, {{ip_address}}) with fake values.
47
 
48
  Instructions:
 
49
  a. Use completely random numbers, so every digit is drawn between 0 and 9.
50
  b. Use realistic names that come from diverse genders, ethnicities and countries.
51
  c. If there are no placeholders, return the text as is.
 
53
  e. If PII exists in the input, replace it with fake values in the output.
54
  f. Remove whitespace before and after the generated text
55
 
 
 
 
 
 
 
 
56
  input: [[TEXT STARTS]]{anonymized_text}[[TEXT ENDS]]
57
  output:"""
58
  return prompt