from collections import namedtuple from typing import Optional import openai import logging logger = logging.getLogger("presidio-streamlit") OpenAIParams = namedtuple( "open_ai_params", ["openai_key", "model", "api_base", "deployment_id", "api_version", "api_type"], ) def call_completion_model( prompt: str, openai_params: OpenAIParams, max_tokens: Optional[int] = 256, ) -> str: """Creates a request for the OpenAI Completion service and returns the response.""" openai.api_key = openai_params.openai_key if openai_params.api_type.lower() == "azure": openai.api_base = openai_params.api_base openai.api_version = openai_params.api_version openai.api_type = "azure" response = openai.Completion.create( engine=openai_params.deployment_id, prompt=prompt, max_tokens=max_tokens, ) else: response = openai.Completion.create( model=openai_params.model, prompt=prompt, max_tokens=max_tokens, ) return response.choices[0].text.strip() def create_prompt(anonymized_text: str) -> str: """ Create the prompt with instructions to GPT-3. :param anonymized_text: Text with placeholders instead of PII values. """ prompt = f""" Your role is to create synthetic text based on de-identified text with placeholders instead of Personally Identifiable Information (PII). Replace the placeholders (e.g. ,, {{DATE}}, {{ip_address}}) with fake values. Instructions: a. Use completely random numbers, so every digit is drawn between 0 and 9. b. Use realistic names that come from diverse genders, ethnicities and countries. c. If there are no placeholders, return the text as is. d. Keep the formatting as close to the original as possible. e. If PII exists in the input, replace it with fake values in the output. f. Remove whitespace before and after the generated text input: [[TEXT STARTS]]{anonymized_text}[[TEXT ENDS]] output:""" return prompt