import tempfile

import gradio as gr
from autodistill_fastvit import FASTVIT_IMAGENET_1K_CLASSES, FastViT
from PIL import Image

base_model = FastViT(None)


def infer(image):
    with tempfile.NamedTemporaryFile(suffix=".jpg") as temp:
        image = Image.fromarray(image.astype("uint8"), "RGB")

        image.save(temp.name)

        predictions = base_model.predict(temp.name, confidence=0.1)

        labels = [FASTVIT_IMAGENET_1K_CLASSES[i] for i in predictions.class_id.tolist()]
        confidences = predictions.confidence.tolist()

        # divide by 100 to convert to percentage
        confidences = [c / 100 for c in confidences]

        return {
            k: v
            for k, v in zip(labels, confidences)
        }


iface = gr.Interface(
    fn=infer,
    inputs="image",
    outputs="label",
    allow_flagging=False,
    title="FastViT",
    description="[FastViT](https://github.com/apple/ml-fastvit) is a fast Vision Transformer developed by Apple. FastViT was trained on the ImageNet-1k dataset.\n\nUse the space below to test FastViT on your own images.\n\nThis space uses [Autodistill FastViT](https://github.com/autodistill/autodistill-fastvit) for inference.",
)
iface.launch()