File size: 3,561 Bytes
2d3888b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import re

def parse_text(input_text):
    # Define patterns for response and clarification
    response_pattern = re.compile(r'<response>(.*?)<\/response>', re.DOTALL)
    clarification_pattern = re.compile(r'<clarification>(.*?)<\/clarification>', re.DOTALL)

    # Find all matches for response and clarification
    response_matches = response_pattern.finditer(input_text)
    clarification_matches = clarification_pattern.finditer(input_text)

    # Initialize variables to keep track of the position
    last_end = 0
    combined_response = ""
    parsed_clarifications = []

    # Combine responses and capture everything in between
    for response_match in response_matches:
        # Capture text before the current response tag
        combined_response += input_text[last_end:response_match.start()].strip() + "\n"
        # Add the response content
        combined_response += response_match.group(1).strip() + "\n"
        # Update the last end position
        last_end = response_match.end()

    # Check for clarifications and parse them
    for clarification_match in clarification_matches:
        # Capture text before the current clarification tag
        combined_response += input_text[last_end:clarification_match.start()].strip() + "\n"
        # Process the clarification block
        clarification_text = clarification_match.group(1).strip()
        if clarification_text:
            # Split by "text:" to separate each question block
            question_blocks = clarification_text.split("- text:")
            
            # Loop through each block and extract the question and its options
            for block in question_blocks[1:]:
                # Extract the question using regex (up to the "options:" part)
                question_match = re.search(r'^(.*?)\s*options:', block, re.DOTALL)
                if question_match:
                    question = question_match.group(1).strip()
                    
                    # Extract the options using regex
                    options_match = re.search(r'options:\s*(.*?)$', block, re.DOTALL)
                    if options_match:
                        options = [option.strip() for option in options_match.group(1).split('-') if option.strip()]
                        
                        # Add the parsed question and options to the list
                        parsed_clarifications.append({'question': question, 'options': options})
        # Update the last end position
        last_end = clarification_match.end()

    # Capture any remaining text after the last tag
    combined_response += input_text[last_end:].strip()

    return combined_response.strip(), parsed_clarifications

# Example usage
input_text = """
Some introductory text that should be included in the response.

<response>response to previous question is provided here</response>

Some more text that should also be included in the response.

<clarification> 
questions: 
- text: What topic should the article cover? 
  options: 
    - Technology 
    - Health and Wellness 
    - Travel 
    - Other 
- text: What is the target audience for the article? 
  options: 
    - General public 
    - Professionals in a specific field 
    - Students 
    - Other 
</clarification>

Final notes that should be part of the response.
"""

parsed_data = parse_text(input_text)
print(f"Response: {parsed_data['response']}")
print("Clarifications:")
for item in parsed_data['clarifications']:
    print(f"  Question: {item['question']}")
    print("  Options:", ", ".join(item['options']))