import io IGNORE_TOKEN_ID = -100 from typing import Dict import torch import torchvision.transforms as T import transformers from .conversation import get_conv_template from PIL import Image from torch.utils.data import ConcatDataset, WeightedRandomSampler import sys def preprocess_qwen( template_name, sources, tokenizer: transformers.PreTrainedTokenizer, special_prefixs, text_only: bool = False, group_by_length: bool = False, ds_name: str = None ) -> Dict: conv = get_conv_template(template_name) roles = {'human': conv.roles[0], 'gpt': conv.roles[1]} assert len(sources) == len(special_prefixs) # Apply prompt templates conversations = [] for i, source in enumerate(sources): if roles[source[0]['from']] != conv.roles[0]: # Skip the first one if it is not from human source = source[1:] per_prefix = special_prefixs[i] conv.messages = [] for j, sentence in enumerate(source): role = roles[sentence['from']] assert role == conv.roles[j % 2], f'{i}' sentence['value'] = sentence['value'].replace("", "").strip() # llava-1.5 add to the begin of the question, remove here sentence['value'] = sentence['value'].replace("