import re def parse_text(input_text): # Define patterns for response and clarification response_pattern = re.compile(r'(.*?)<\/response>', re.DOTALL) clarification_pattern = re.compile(r'(.*?)<\/clarification>', re.DOTALL) # Find all matches for response and clarification response_matches = response_pattern.finditer(input_text) clarification_matches = clarification_pattern.finditer(input_text) # Initialize variables to keep track of the position last_end = 0 combined_response = "" parsed_clarifications = [] # Combine responses and capture everything in between for response_match in response_matches: # Capture text before the current response tag combined_response += input_text[last_end:response_match.start()].strip() + "\n" # Add the response content combined_response += response_match.group(1).strip() + "\n" # Update the last end position last_end = response_match.end() # Check for clarifications and parse them for clarification_match in clarification_matches: # Capture text before the current clarification tag combined_response += input_text[last_end:clarification_match.start()].strip() + "\n" # Process the clarification block clarification_text = clarification_match.group(1).strip() if clarification_text: # Split by "text:" to separate each question block question_blocks = clarification_text.split("- text:") # Loop through each block and extract the question and its options for block in question_blocks[1:]: # Extract the question using regex (up to the "options:" part) question_match = re.search(r'^(.*?)\s*options:', block, re.DOTALL) if question_match: question = question_match.group(1).strip() # Extract the options using regex options_match = re.search(r'options:\s*(.*?)$', block, re.DOTALL) if options_match: options = [option.strip() for option in options_match.group(1).split('-') if option.strip()] # Add the parsed question and options to the list parsed_clarifications.append({'question': question, 'options': options}) # Update the last end position last_end = clarification_match.end() # Capture any remaining text after the last tag combined_response += input_text[last_end:].strip() return combined_response.strip(), parsed_clarifications # Example usage input_text = """ Some introductory text that should be included in the response. response to previous question is provided here Some more text that should also be included in the response. questions: - text: What topic should the article cover? options: - Technology - Health and Wellness - Travel - Other - text: What is the target audience for the article? options: - General public - Professionals in a specific field - Students - Other Final notes that should be part of the response. """ parsed_data = parse_text(input_text) print(f"Response: {parsed_data['response']}") print("Clarifications:") for item in parsed_data['clarifications']: print(f" Question: {item['question']}") print(" Options:", ", ".join(item['options']))