import gradio as gr from transformers import DPTFeatureExtractor, DPTForDepthEstimation import torch import numpy as np from PIL import Image import open3d as o3d from pathlib import Path import os feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") def process_image(image_path): image_path = Path(image_path) image_raw = Image.open(image_path) image = image_raw.resize( (800, int(800 * image_raw.size[1] / image_raw.size[0])), Image.Resampling.LANCZOS) # prepare image for the model encoding = feature_extractor(image, return_tensors="pt") # forward pass with torch.no_grad(): outputs = model(**encoding) predicted_depth = outputs.predicted_depth # interpolate to original size prediction = torch.nn.functional.interpolate( predicted_depth.unsqueeze(1), size=image.size[::-1], mode="bicubic", align_corners=False, ).squeeze() output = prediction.cpu().numpy() depth_image = (output * 255 / np.max(output)).astype('uint8') try: gltf_path = create_3d_obj(np.array(image), depth_image, image_path) img = Image.fromarray(depth_image) return [img, gltf_path, gltf_path] except Exception as e: gltf_path = create_3d_obj( np.array(image), depth_image, image_path, depth=8) img = Image.fromarray(depth_image) return [img, gltf_path, gltf_path] except: print("Error reconstructing 3D model") raise Exception("Error reconstructing 3D model") def create_3d_obj(rgb_image, depth_image, image_path, depth=10): depth_o3d = o3d.geometry.Image(depth_image) image_o3d = o3d.geometry.Image(rgb_image) rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( image_o3d, depth_o3d, convert_rgb_to_intensity=False) w = int(depth_image.shape[1]) h = int(depth_image.shape[0]) camera_intrinsic = o3d.camera.PinholeCameraIntrinsic() camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2) pcd = o3d.geometry.PointCloud.create_from_rgbd_image( rgbd_image, camera_intrinsic) print('normals') pcd.normals = o3d.utility.Vector3dVector( np.zeros((1, 3))) # invalidate existing normals pcd.estimate_normals( search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)) pcd.orient_normals_towards_camera_location( camera_location=np.array([0., 0., 1000.])) pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) pcd.transform([[-1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) print('run Poisson surface reconstruction') with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm: mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( pcd, depth=depth, width=0, scale=1.1, linear_fit=True) voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256 print(f'voxel_size = {voxel_size:e}') mesh = mesh_raw.simplify_vertex_clustering( voxel_size=voxel_size, contraction=o3d.geometry.SimplificationContraction.Average) # vertices_to_remove = densities < np.quantile(densities, 0.001) # mesh.remove_vertices_by_mask(vertices_to_remove) bbox = pcd.get_axis_aligned_bounding_box() mesh_crop = mesh.crop(bbox) gltf_path = f'./{image_path.stem}.gltf' o3d.io.write_triangle_mesh( gltf_path, mesh_crop, write_triangle_uvs=True) return gltf_path current_directory = os.path.dirname(__file__) title = "Demo: zero-shot depth estimation with DPT + 3D Point Cloud" description = "This demo is a variation from the original DPT Demo. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object." #examples = [["examples/" + img] for img in os.listdir("examples/")] # result_image_path = os.path.join(current_directory, '..', 'result.png') # image_path = Path(result_image_path) # Load the image # rawimage = Image.open(image_path) # image_r = gr.Image(value=rawimage, type="pil", label="Input Image") #image_r.change(create_visual_demo, [],[]) theme = gr.themes.Soft( primary_hue="teal", secondary_hue="gray", ).set( body_text_color_dark='*neutral_800', background_fill_primary_dark='*neutral_50', background_fill_secondary_dark='*neutral_50', border_color_accent_dark='*primary_300', border_color_primary_dark='*neutral_200', color_accent_soft_dark='*neutral_50', link_text_color_dark='*secondary_600', link_text_color_active_dark='*secondary_600', link_text_color_hover_dark='*secondary_700', link_text_color_visited_dark='*secondary_500', code_background_fill_dark='*neutral_100', shadow_spread_dark='6px', block_background_fill_dark='white', block_label_background_fill_dark='*primary_100', block_label_text_color_dark='*primary_500', block_title_text_color_dark='*primary_500', checkbox_background_color_dark='*background_fill_primary', checkbox_background_color_selected_dark='*primary_600', checkbox_border_color_dark='*neutral_100', checkbox_border_color_focus_dark='*primary_500', checkbox_border_color_hover_dark='*neutral_300', checkbox_border_color_selected_dark='*primary_600', checkbox_label_background_fill_selected_dark='*primary_500', checkbox_label_text_color_selected_dark='white', error_background_fill_dark='#fef2f2', error_border_color_dark='#b91c1c', error_text_color_dark='#b91c1c', error_icon_color_dark='#b91c1c', input_background_fill_dark='white', input_background_fill_focus_dark='*secondary_500', input_border_color_dark='*neutral_50', input_border_color_focus_dark='*secondary_300', input_placeholder_color_dark='*neutral_400', slider_color_dark='*primary_500', stat_background_fill_dark='*primary_300', table_border_color_dark='*neutral_300', table_even_background_fill_dark='white', table_odd_background_fill_dark='*neutral_50', button_primary_background_fill_dark='*primary_500', button_primary_background_fill_hover_dark='*primary_400', button_primary_border_color_dark='*primary_00', button_secondary_background_fill_dark='whiite', button_secondary_background_fill_hover_dark='*neutral_100', button_secondary_border_color_dark='*neutral_200', button_secondary_text_color_dark='*neutral_800' ) def create_visual_demo(): iface = gr.Interface(fn=process_image, inputs=[gr.Image( type="filepath", label="Input Image")], outputs=[gr.Image(label="predicted depth", type="pil"), gr.Model3D(label="3d mesh reconstruction", clear_color=[ 1.0, 1.0, 1.0, 1.0]), gr.File(label="3d gLTF")], title=title, theme=theme, description=description, #examples=examples, live=True, allow_flagging="never", cache_examples=False) #iface.launch(debug=True, enable_queue=False, share=True)