Spaces:

Vira21
/

Khmer_SeaLLM

Running

App Files Files Community

Khmer_SeaLLM / app.py

Vira21

Update app.py

cfffff2 verified 3 days ago

raw

history blame contribute delete

2.23 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr

	# Constants for the Model
	MODEL_PATH = "SeaLLMs/SeaLLMs-v3-7B-Chat"
	MODEL_TITLE = "SeaLLMs Chat Model"
	MODEL_DESC = "A demo for the SeaLLMs-v3-7B-Chat language model."

	# Load the tokenizer
	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

	# Load the model with CPU offloading to reduce memory usage
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_PATH,
	device_map="auto",
	torch_dtype=torch.float32, # Use float32 to avoid numerical issues
	offload_folder="./offload", # Specify a folder for offloading to manage memory
	low_cpu_mem_usage=True
	)

	# Enable gradient checkpointing for memory efficiency
	model.gradient_checkpointing_enable()

	def generate_response(prompt):
	# Limit the input length to prevent excessive memory usage
	prompt = prompt[:512]
	inputs = tokenizer(prompt, return_tensors="pt")

	# Move inputs to the same device as the model
	inputs = {key: value.to(model.device) for key, value in inputs.items()}

	# Generate response
	try:
	with torch.no_grad(): # Disable gradient calculation to save memory during inference
	outputs = model.generate(
	**inputs,
	max_length=128, # Further reduced max_length to lower memory usage
	num_return_sequences=1,
	no_repeat_ngram_size=2,
	early_stopping=True,
	temperature=0.7 # Adding temperature scaling to control output
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	except RuntimeError as e:
	# Handle numerical instability gracefully
	response = "An error occurred during generation. Please try again with a different prompt."
	print(f"RuntimeError: {e}")

	return response

	# Create the Gradio interface
	iface = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(lines=5, label="Enter your message:"),
	outputs=gr.Textbox(label="Model's response:"),
	title=MODEL_TITLE,
	description=MODEL_DESC,
	theme="default" # You can specify any custom theme or remove this line
	)

	if __name__ == "__main__":
	iface.launch()