Spaces:

aarishshahmohsin
/

ocr_reader

Running

App Files Files Community

aarishshahmohsin commited on 10 days ago

Commit

aafffbc

•

1 Parent(s): 8559818

added aarish model

Browse files

Files changed (7) hide show

app copy.py +9 -94
app.py +7 -4
image.png +0 -0
my_model/config.json +39 -0
my_model/generation_config.json +6 -0
new_app.py +9 -0
temp_app.py +119 -0

app copy.py CHANGED Viewed

@@ -1,97 +1,12 @@
-import streamlit as st
-from PIL import Image
-from surya.ocr import run_ocr
-from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
-from surya.model.recognition.model import load_model as load_rec_model
-from surya.model.recognition.processor import load_processor as load_rec_processor
-import re
-from transformers import AutoModel, AutoTokenizer
-import torch
-import tempfile
-import os
-st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# device = "cpu"
-@st.cache_resource
-def load_surya_models():
-    det_processor, det_model = load_det_processor(), load_det_model()
-    det_model.to(device)
-    rec_model, rec_processor = load_rec_model(), load_rec_processor()
-    rec_model.to(device)
-    return det_processor, det_model, rec_model, rec_processor
-@st.cache_resource
-def load_got_ocr_model():
-    tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map=device, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-    model.eval().to(device)
-    return tokenizer, model
-det_processor, det_model, rec_model, rec_processor = load_surya_models()
-tokenizer, got_model = load_got_ocr_model()
-st.title("OCR Application  (Aarish Shah Mohsin)")
-st.write("Upload an image for OCR processing. Using GOT-OCR for English translations, Picked Surya OCR Model for English+Hindi Translations")
-st.sidebar.header("Configuration")
-model_choice = st.sidebar.selectbox("Select OCR Model:", ("For English + Hindi", "For English (GOT-OCR)"))
-# Store the uploaded image in session state
-if 'uploaded_image' not in st.session_state:
-    st.session_state.uploaded_image = None
-uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
-# Update the session state if a new file is uploaded
-if uploaded_file is not None:
-    st.session_state.uploaded_image = uploaded_file
-predict_button = st.sidebar.button("Predict", key="predict")
-col1, col2 = st.columns([2, 1])
-# Display the image preview if it's already uploaded
-if st.session_state.uploaded_image:
-    image = Image.open(st.session_state.uploaded_image)
-    with col1:
-        # Display a smaller preview of the uploaded image (set width to 300px)
-        col1.image(image, caption='Uploaded Image', use_column_width=False, width=300)
-if predict_button and st.session_state.uploaded_image:
-    with col2:
-        with st.spinner("Processing..."):
-            # Save the uploaded file temporarily
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
-                temp_file.write(st.session_state.uploaded_image.getvalue())
-                temp_file_path = temp_file.name
-            image = Image.open(temp_file_path)
-            image = image.convert("RGB")
-            if model_choice == "For English + Hindi":
-                langs = ["en", "hi"]
-                predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
-                text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
-                extracted_text = ' '.join(text_list)
-                with col2:
-                    st.subheader("Extracted Text (Surya):")
-                    st.write(extracted_text)
-            elif model_choice == "For English (GOT-OCR)":
-                image_file = temp_file_path
-                res = got_model.chat(tokenizer, image_file, ocr_type='ocr')
-                with col2:
-                    st.subheader("Extracted Text (GOT-OCR):")
-                    st.write(res)
-            # Delete the temporary file after processing
-            if os.path.exists(temp_file_path):
-                os.remove(temp_file_path)
-# else:
-#     st.sidebar.warning("Please upload an image before predicting.")

+from transformers import AutoTokenizer, AutoModel
+# tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
+# model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
+tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
+model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
+model = model.eval().cpu()
+image_path = './image.png'
+english_extraction = model.chat(tokenizer, image_path, ocr_type='ocr')
+print(english_extraction)

app.py CHANGED Viewed

@@ -10,9 +10,12 @@ import torch
 import tempfile
 import os
 st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
-device = "cuda" if torch.cuda.is_available() else "cpu"
 @st.cache_resource
 def load_surya_models():
@@ -24,9 +27,9 @@ def load_surya_models():
 @st.cache_resource
 def load_got_ocr_model():
-    tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map=device, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-    model.eval().to(device)
     return tokenizer, model
 det_processor, det_model, rec_model, rec_processor = load_surya_models()

 import tempfile
 import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
 st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+device="cpu"
 @st.cache_resource
 def load_surya_models():
 @st.cache_resource
 def load_got_ocr_model():
+    tokenizer = AutoTokenizer.from_pretrained('aarishshahmohsin/got_ocr_cpu', trust_remote_code=True, device_map='cpu')
+    model = AutoModel.from_pretrained('aarishshahmohsin/got_ocr_cpu', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+    model = model.eval().to(device)
     return tokenizer, model
 det_processor, det_model, rec_model, rec_processor = load_surya_models()

image.png ADDED Viewed

my_model/config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "ucaslcl/GOT-OCR2_0",
+  "architectures": [
+    "GOTQwenForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "ucaslcl/GOT-OCR2_0--modeling_GOT.GOTConfig",
+    "AutoModel": "ucaslcl/GOT-OCR2_0--modeling_GOT.GOTQwenForCausalLM"
+  },
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "freeze_vision_tower": false,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "im_end_token": 151858,
+  "im_patch_token": 151859,
+  "im_start_token": 151857,
+  "image_token_len": 256,
+  "initializer_range": 0.02,
+  "intermediate_size": 2816,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "GOT",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 16,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.1",
+  "use_cache": true,
+  "use_im_start_end": true,
+  "use_sliding_window": false,
+  "vocab_size": 151860
+}

my_model/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "max_new_tokens": 2048,
+  "transformers_version": "4.45.1"
+}

new_app.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from transformers import AutoModel, AutoTokenizer
+model_name = "ucaslcl/GOT-OCR2_0"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name, device_map="auto")
+model.to("cpu")
+model.save_pretrained("./my_model")

temp_app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import streamlit as st
+from PIL import Image
+from surya.ocr import run_ocr
+from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
+from surya.model.recognition.model import load_model as load_rec_model
+from surya.model.recognition.processor import load_processor as load_rec_processor
+import re
+from transformers import AutoModel, AutoTokenizer
+import torch
+import tempfile
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
+# Force CPU if CUDA is not available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+@st.cache_resource
+def load_surya_models():
+    det_processor, det_model = load_det_processor(), load_det_model()
+    det_model.to(device)
+    rec_model, rec_processor = load_rec_model(), load_rec_processor()
+    rec_model.to(device)
+    return det_processor, det_model, rec_model, rec_processor
+@st.cache_resource
+def load_got_ocr_model():
+    tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map=device, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+    model.eval().to(device)
+    # Override .half() and .cuda() to ensure everything runs in float32 and on CPU
+    torch.Tensor.half = lambda x: x.float()
+    torch.Tensor.cuda = lambda x, **kwargs: x.cpu()
+    return tokenizer, model
+det_processor, det_model, rec_model, rec_processor = load_surya_models()
+tokenizer, got_model = load_got_ocr_model()
+st.title("OCR Application  (Aarish Shah Mohsin)")
+st.write("Upload an image for OCR processing. Using GOT-OCR for English translations, Picked Surya OCR Model for English+Hindi Translations")
+st.sidebar.header("Configuration")
+model_choice = st.sidebar.selectbox("Select OCR Model:", ("For English + Hindi", "For English (GOT-OCR)"))
+# Store the uploaded image and extracted text in session state
+if 'uploaded_image' not in st.session_state:
+    st.session_state.uploaded_image = None
+if 'extracted_text' not in st.session_state:
+    st.session_state.extracted_text = ""
+uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
+# Update the session state if a new file is uploaded
+if uploaded_file is not None:
+    st.session_state.uploaded_image = uploaded_file
+predict_button = st.sidebar.button("Predict", key="predict")
+col1, col2 = st.columns([2, 1])
+# Display the image preview if it's already uploaded
+if st.session_state.uploaded_image:
+    image = Image.open(st.session_state.uploaded_image)
+    with col1:
+        # Display a smaller preview of the uploaded image (set width to 300px)
+        col1.image(image, caption='Uploaded Image', use_column_width=False, width=300)
+# Handle predictions
+if predict_button and st.session_state.uploaded_image:
+    with st.spinner("Processing..."):
+        # Save the uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
+            temp_file.write(st.session_state.uploaded_image.getvalue())
+            temp_file_path = temp_file.name
+        image = Image.open(temp_file_path)
+        image = image.convert("RGB")
+        if model_choice == "For English + Hindi":
+            langs = ["en", "hi"]
+            predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
+            text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
+            extracted_text = ' '.join(text_list)
+            st.session_state.extracted_text = extracted_text  # Save extracted text in session state
+        elif model_choice == "For English (GOT-OCR)":
+            image_file = temp_file_path
+            res = got_model.chat(tokenizer, image_file, ocr_type='ocr')
+            st.session_state.extracted_text = res  # Save extracted text in session state
+        # Delete the temporary file after processing
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+# Search functionality
+if st.session_state.extracted_text:
+    search_query = st.text_input("Search in extracted text:", key="search_query", placeholder="Type to search...")
+    # Create a pattern to find the search query in a case-insensitive way
+    if search_query:
+        pattern = re.compile(re.escape(search_query), re.IGNORECASE)
+        highlighted_text = st.session_state.extracted_text
+        # Replace matching text with highlighted version (bright green)
+        highlighted_text = pattern.sub(lambda m: f"<span style='background-color: limegreen;'>{m.group(0)}</span>", highlighted_text)
+        st.markdown("### Highlighted Search Results:")
+        st.markdown(highlighted_text, unsafe_allow_html=True)
+    else:
+        # If no search query, show the original extracted text
+        st.markdown("### Extracted Text:")
+        st.markdown(st.session_state.extracted_text, unsafe_allow_html=True)