Spaces:

TabPFN
/

TabPFNPrediction

Runtime error

App Files Files Community

TabPFNPrediction / app.py

TabPFN

Update app.py

129c838 about 2 years ago

raw

history blame

3.45 kB

	import sys
	tabpfn_path = 'TabPFN'
	sys.path.insert(0, tabpfn_path) # our submodule of the TabPFN repo (at 045c8400203ebd062346970b4f2c0ccda5a40618)
	from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier

	import numpy as np
	import pandas as pd
	import torch
	import gradio as gr
	import openml


	def compute(table: np.array):
	vfunc = np.vectorize(lambda s: len(s))
	non_empty_row_mask = (vfunc(table).sum(1) != 0)
	table = table[non_empty_row_mask]
	empty_mask = table == ''
	empty_inds = np.where(empty_mask)
	if not len(empty_inds[0]):
	return "Please leave at least one field blank for prediction.", None
	if not np.all(empty_inds[1][0] == empty_inds[1]):
	return "Please only leave fields of one column blank for prediction.", None
	y_column = empty_inds[1][0]
	eval_lines = empty_inds[0]

	train_table = np.delete(table, eval_lines, axis=0)
	eval_table = table[eval_lines]

	try:
	x_train = torch.tensor(np.delete(train_table, y_column, axis=1).astype(np.float32))
	x_eval = torch.tensor(np.delete(eval_table, y_column, axis=1).astype(np.float32))

	y_train = train_table[:, y_column]
	except ValueError:
	return "Please only add numbers (to the inputs) or leave fields empty.", None

	classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu')
	classifier.fit(x_train, y_train)
	y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True)

	# print(file, type(file))
	out_table = table.copy().astype(str)
	out_table[eval_lines, y_column] = [f"{y_e} (p={p_e:.2f})" for y_e, p_e in zip(y_eval, p_eval)]
	return None, out_table


	def upload_file(file):
	if file.name.endswith('.arff'):
	dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
	X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
	dataset_format="array"
	)
	df = pd.DataFrame(X_, columns=attribute_names_)
	return df
	elif file.name.endswith('.csv') or file.name.endswith('.data'):
	df = pd.read_csv(file.name, header=None)
	df.columns = np.arange(len(df.columns))
	return df


	example = \
	[
	[1, 2, 1],
	[2, 1, 1],
	[1, 1, 1],
	[2, 2, 2],
	[3, 4, 2],
	[3, 2, 2],
	[2, 3, '']
	]

	with gr.Blocks() as demo:
	gr.Markdown("""This demo allows you to play with the TabPFN.
	The TabPFN will classify the values for all empty cells in the label column.
	Please, provide everything but the label column as numeric values.
	You can also upload datasets to fill the table automatically.
	""")
	examples = gr.Examples(examples=['iris.csv', 'balance-scale.arff'],
	inputs=[inp_file],
	outputs=[inp_table],
	fn=upload_file,
	cache_examples=True)
	inp_table = gr.DataFrame(type='numpy', value=example, headers=[''] * 3)
	upload_file('iris.csv')
	inp_file = gr.File(
	label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.')
	btn = gr.Button("Predict Empty Table Cells")

	inp_file.change(fn=upload_file, inputs=inp_file, outputs=inp_table)

	out_text = gr.Markdown()
	out_table = gr.DataFrame()

	btn.click(fn=compute, inputs=inp_table, outputs=[out_text, out_table])

	demo.launch()