File size: 5,320 Bytes
f4de44c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbfb11f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import subprocess

# Install specific version of Gradio
subprocess.run(["pip", "install", "gradio==1.7.7"])
import gradio as gr
import fitz
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk

class PDFViewer:
    def __init__(self, pdf_path):
        self.doc = fitz.open(pdf_path)
        self.page = self.doc[0]  # Assuming you want to work with the first page
        self.page_num=0
        

        # Get the size of the first page
        self.page_width = int(self.page.rect.width)
        self.page_height = int(self.page.rect.height)

        # Create a Tkinter window
        self.root = tk.Tk()
        self.root.title("PDF Viewer")
        self.root.attributes("-topmost", True)  # Put the window at the top

        # Create a canvas to display the PDF page
        self.canvas = tk.Canvas(self.root, width=self.page_width, height=self.page_height)
        self.canvas.pack()

        # Initialize scrollbar
        self.scrollbar = tk.Scrollbar(self.root, orient="vertical", command=self.on_scroll)
        self.scrollbar.pack(side="right", fill="y")

        self.canvas.configure(yscrollcommand=self.scrollbar.set)

        # Display the first page
        self.display_page()

        # Bind mouse wheel event for scrolling
        self.canvas.bind("<MouseWheel>", self.on_mousewheel)


        # Display the PDF page on the canvas
        pix = self.page.get_pixmap(matrix=fitz.Matrix(1, 1))
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        self.photo = ImageTk.PhotoImage(image=img)
        self.canvas.create_image(0, 0, anchor=tk.NW, image=self.photo)

        # Variables to store mouse click coordinates
        self.start_x = None
        self.start_y = None

        # Bind left mouse button click and drag events
        self.canvas.bind("<ButtonPress-1>", self.on_button_press)
        self.canvas.bind("<B1-Motion>", self.on_move_press)
        self.canvas.bind("<ButtonRelease-1>", self.on_button_release)

        # Initialize rectangle drawn on canvas
        self.rect = None
    
    def display_page(self):
        # Clear canvas
        self.canvas.delete("all")

        # Get the size of the page
        self.page = self.doc[self.page_num]
        self.page_width = int(self.page.rect.width)
        self.page_height = int(self.page.rect.height)

        # Display the PDF page on the canvas
        pix = self.page.get_pixmap()
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        self.photo = ImageTk.PhotoImage(image=img)
        self.canvas.create_image(0, 0, anchor=tk.NW, image=self.photo)

        # Update scrollbar
        self.scrollbar.config(command=self.canvas.yview)
    def on_scroll(self, *args):
        # Update canvas view when scrollbar is moved
        self.canvas.yview(*args)

    def on_mousewheel(self, event):
        # Scroll up/down when mouse wheel is moved
        if event.delta < 0:
            self.page_num += 1
        else:
            self.page_num -= 1

        self.page_num = max(0, min(self.page_num, len(self.doc) - 1))
        self.display_page()
        
    def on_button_press(self, event):
        # Record the starting point of the selection
        self.start_x = self.canvas.canvasx(event.x)
        self.start_y = self.canvas.canvasy(event.y)

        # Delete any previously drawn rectangle
        if self.rect:
            self.canvas.delete(self.rect)

        # Draw a new rectangle starting from the clicked point
        self.rect = self.canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline='red')

    def on_move_press(self, event):
        # Update the size of the rectangle as the mouse moves
        cur_x = self.canvas.canvasx(event.x)
        cur_y = self.canvas.canvasy(event.y)

        self.canvas.coords(self.rect, self.start_x, self.start_y, cur_x, cur_y)

    def on_button_release(self, event):
        # Save the selected area as an image
        x1 = min(self.start_x, self.canvas.canvasx(event.x))
        y1 = min(self.start_y, self.canvas.canvasy(event.y))
        x2 = max(self.start_x, self.canvas.canvasx(event.x))
        y2 = max(self.start_y, self.canvas.canvasy(event.y))

        selected_area = fitz.Rect(x1, y1, x2, y2)
        selected_pixmap = self.page.get_pixmap(matrix=fitz.Matrix(1, 1), clip=selected_area)

        # Convert Pixmap to PIL Image
        img = Image.frombytes("RGB", [selected_pixmap.width, selected_pixmap.height], selected_pixmap.samples)

        # Save the selected area as an image
        save_path = filedialog.asksaveasfilename(defaultextension=".png", filetypes=[("PNG files", "*.png")])
        if save_path:
            img.save(save_path)

        # Destroy the Tkinter window
        self.root.destroy()

# Define the function to be called when the PDF file path is provided
def main(pdf_file):
    # Ask user to select a PDF file
    pdf_path = pdf_file.name
    if pdf_path:
        PDFViewer(pdf_path).root.mainloop()
    return "File Saved"

pdf_file = gr.inputs.File(label="Select a PDF file")

# Create the Gradio interface
interface = gr.Interface(
    fn=main,
    inputs=pdf_file,
    outputs="text",
    title="PDF Region Extraction",
    description="Select a region from a PDF file to extract.",
)
interface.launch()