aarishshahmohsin commited on
Commit
aafffbc
1 Parent(s): 8559818

added aarish model

Browse files
Files changed (7) hide show
  1. app copy.py +9 -94
  2. app.py +7 -4
  3. image.png +0 -0
  4. my_model/config.json +39 -0
  5. my_model/generation_config.json +6 -0
  6. new_app.py +9 -0
  7. temp_app.py +119 -0
app copy.py CHANGED
@@ -1,97 +1,12 @@
1
- import streamlit as st
2
- from PIL import Image
3
- from surya.ocr import run_ocr
4
- from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
5
- from surya.model.recognition.model import load_model as load_rec_model
6
- from surya.model.recognition.processor import load_processor as load_rec_processor
7
- import re
8
- from transformers import AutoModel, AutoTokenizer
9
- import torch
10
- import tempfile
11
- import os
12
 
13
- st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
14
 
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
- # device = "cpu"
 
17
 
18
- @st.cache_resource
19
- def load_surya_models():
20
- det_processor, det_model = load_det_processor(), load_det_model()
21
- det_model.to(device)
22
- rec_model, rec_processor = load_rec_model(), load_rec_processor()
23
- rec_model.to(device)
24
- return det_processor, det_model, rec_model, rec_processor
25
-
26
- @st.cache_resource
27
- def load_got_ocr_model():
28
- tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
29
- model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map=device, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
30
- model.eval().to(device)
31
- return tokenizer, model
32
-
33
- det_processor, det_model, rec_model, rec_processor = load_surya_models()
34
- tokenizer, got_model = load_got_ocr_model()
35
-
36
- st.title("OCR Application (Aarish Shah Mohsin)")
37
- st.write("Upload an image for OCR processing. Using GOT-OCR for English translations, Picked Surya OCR Model for English+Hindi Translations")
38
-
39
- st.sidebar.header("Configuration")
40
- model_choice = st.sidebar.selectbox("Select OCR Model:", ("For English + Hindi", "For English (GOT-OCR)"))
41
-
42
- # Store the uploaded image in session state
43
- if 'uploaded_image' not in st.session_state:
44
- st.session_state.uploaded_image = None
45
-
46
- uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
47
-
48
- # Update the session state if a new file is uploaded
49
- if uploaded_file is not None:
50
- st.session_state.uploaded_image = uploaded_file
51
-
52
- predict_button = st.sidebar.button("Predict", key="predict")
53
-
54
- col1, col2 = st.columns([2, 1])
55
-
56
- # Display the image preview if it's already uploaded
57
- if st.session_state.uploaded_image:
58
- image = Image.open(st.session_state.uploaded_image)
59
-
60
- with col1:
61
- # Display a smaller preview of the uploaded image (set width to 300px)
62
- col1.image(image, caption='Uploaded Image', use_column_width=False, width=300)
63
-
64
- if predict_button and st.session_state.uploaded_image:
65
- with col2:
66
- with st.spinner("Processing..."):
67
- # Save the uploaded file temporarily
68
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
69
- temp_file.write(st.session_state.uploaded_image.getvalue())
70
- temp_file_path = temp_file.name
71
-
72
- image = Image.open(temp_file_path)
73
- image = image.convert("RGB")
74
-
75
- if model_choice == "For English + Hindi":
76
- langs = ["en", "hi"]
77
- predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
78
- text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
79
- extracted_text = ' '.join(text_list)
80
-
81
- with col2:
82
- st.subheader("Extracted Text (Surya):")
83
- st.write(extracted_text)
84
-
85
- elif model_choice == "For English (GOT-OCR)":
86
- image_file = temp_file_path
87
- res = got_model.chat(tokenizer, image_file, ocr_type='ocr')
88
-
89
- with col2:
90
- st.subheader("Extracted Text (GOT-OCR):")
91
- st.write(res)
92
-
93
- # Delete the temporary file after processing
94
- if os.path.exists(temp_file_path):
95
- os.remove(temp_file_path)
96
- # else:
97
- # st.sidebar.warning("Please upload an image before predicting.")
 
1
+ from transformers import AutoTokenizer, AutoModel
2
+ # tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
3
+ # model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
4
+ tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
5
+ model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
 
 
 
 
 
 
6
 
7
+ model = model.eval().cpu()
8
 
9
+ image_path = './image.png'
10
+ english_extraction = model.chat(tokenizer, image_path, ocr_type='ocr')
11
+ print(english_extraction)
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -10,9 +10,12 @@ import torch
10
  import tempfile
11
  import os
12
 
 
 
13
  st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
14
 
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
16
 
17
  @st.cache_resource
18
  def load_surya_models():
@@ -24,9 +27,9 @@ def load_surya_models():
24
 
25
  @st.cache_resource
26
  def load_got_ocr_model():
27
- tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
28
- model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map=device, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
29
- model.eval().to(device)
30
  return tokenizer, model
31
 
32
  det_processor, det_model, rec_model, rec_processor = load_surya_models()
 
10
  import tempfile
11
  import os
12
 
13
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
14
+
15
  st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
16
 
17
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ device="cpu"
19
 
20
  @st.cache_resource
21
  def load_surya_models():
 
27
 
28
  @st.cache_resource
29
  def load_got_ocr_model():
30
+ tokenizer = AutoTokenizer.from_pretrained('aarishshahmohsin/got_ocr_cpu', trust_remote_code=True, device_map='cpu')
31
+ model = AutoModel.from_pretrained('aarishshahmohsin/got_ocr_cpu', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
32
+ model = model.eval().to(device)
33
  return tokenizer, model
34
 
35
  det_processor, det_model, rec_model, rec_processor = load_surya_models()
image.png ADDED
my_model/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ucaslcl/GOT-OCR2_0",
3
+ "architectures": [
4
+ "GOTQwenForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "ucaslcl/GOT-OCR2_0--modeling_GOT.GOTConfig",
9
+ "AutoModel": "ucaslcl/GOT-OCR2_0--modeling_GOT.GOTQwenForCausalLM"
10
+ },
11
+ "bos_token_id": 151643,
12
+ "eos_token_id": 151643,
13
+ "freeze_vision_tower": false,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 1024,
16
+ "im_end_token": 151858,
17
+ "im_patch_token": 151859,
18
+ "im_start_token": 151857,
19
+ "image_token_len": 256,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 2816,
22
+ "max_position_embeddings": 32768,
23
+ "max_window_layers": 21,
24
+ "model_type": "GOT",
25
+ "num_attention_heads": 16,
26
+ "num_hidden_layers": 24,
27
+ "num_key_value_heads": 16,
28
+ "rms_norm_eps": 1e-06,
29
+ "rope_scaling": null,
30
+ "rope_theta": 1000000.0,
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": true,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.45.1",
35
+ "use_cache": true,
36
+ "use_im_start_end": true,
37
+ "use_sliding_window": false,
38
+ "vocab_size": 151860
39
+ }
my_model/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.45.1"
6
+ }
new_app.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModel, AutoTokenizer
2
+
3
+ model_name = "ucaslcl/GOT-OCR2_0"
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
6
+ model = AutoModel.from_pretrained(model_name, device_map="auto")
7
+
8
+ model.to("cpu")
9
+ model.save_pretrained("./my_model")
temp_app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ from surya.ocr import run_ocr
4
+ from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
5
+ from surya.model.recognition.model import load_model as load_rec_model
6
+ from surya.model.recognition.processor import load_processor as load_rec_processor
7
+ import re
8
+ from transformers import AutoModel, AutoTokenizer
9
+ import torch
10
+ import tempfile
11
+ import os
12
+
13
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
14
+
15
+ st.set_page_config(page_title="OCR Application", page_icon="🖼️", layout="wide")
16
+
17
+ # Force CPU if CUDA is not available
18
+ device = "cuda" if torch.cuda.is_available() else "cpu"
19
+
20
+ @st.cache_resource
21
+ def load_surya_models():
22
+ det_processor, det_model = load_det_processor(), load_det_model()
23
+ det_model.to(device)
24
+ rec_model, rec_processor = load_rec_model(), load_rec_processor()
25
+ rec_model.to(device)
26
+ return det_processor, det_model, rec_model, rec_processor
27
+
28
+ @st.cache_resource
29
+ def load_got_ocr_model():
30
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
31
+ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map=device, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
32
+ model.eval().to(device)
33
+
34
+ # Override .half() and .cuda() to ensure everything runs in float32 and on CPU
35
+ torch.Tensor.half = lambda x: x.float()
36
+ torch.Tensor.cuda = lambda x, **kwargs: x.cpu()
37
+
38
+ return tokenizer, model
39
+
40
+ det_processor, det_model, rec_model, rec_processor = load_surya_models()
41
+ tokenizer, got_model = load_got_ocr_model()
42
+
43
+ st.title("OCR Application (Aarish Shah Mohsin)")
44
+ st.write("Upload an image for OCR processing. Using GOT-OCR for English translations, Picked Surya OCR Model for English+Hindi Translations")
45
+
46
+ st.sidebar.header("Configuration")
47
+ model_choice = st.sidebar.selectbox("Select OCR Model:", ("For English + Hindi", "For English (GOT-OCR)"))
48
+
49
+ # Store the uploaded image and extracted text in session state
50
+ if 'uploaded_image' not in st.session_state:
51
+ st.session_state.uploaded_image = None
52
+ if 'extracted_text' not in st.session_state:
53
+ st.session_state.extracted_text = ""
54
+
55
+ uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
56
+
57
+ # Update the session state if a new file is uploaded
58
+ if uploaded_file is not None:
59
+ st.session_state.uploaded_image = uploaded_file
60
+
61
+ predict_button = st.sidebar.button("Predict", key="predict")
62
+
63
+ col1, col2 = st.columns([2, 1])
64
+
65
+ # Display the image preview if it's already uploaded
66
+ if st.session_state.uploaded_image:
67
+ image = Image.open(st.session_state.uploaded_image)
68
+
69
+ with col1:
70
+ # Display a smaller preview of the uploaded image (set width to 300px)
71
+ col1.image(image, caption='Uploaded Image', use_column_width=False, width=300)
72
+
73
+ # Handle predictions
74
+ if predict_button and st.session_state.uploaded_image:
75
+ with st.spinner("Processing..."):
76
+ # Save the uploaded file temporarily
77
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
78
+ temp_file.write(st.session_state.uploaded_image.getvalue())
79
+ temp_file_path = temp_file.name
80
+
81
+ image = Image.open(temp_file_path)
82
+ image = image.convert("RGB")
83
+
84
+ if model_choice == "For English + Hindi":
85
+ langs = ["en", "hi"]
86
+ predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
87
+ text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
88
+ extracted_text = ' '.join(text_list)
89
+
90
+ st.session_state.extracted_text = extracted_text # Save extracted text in session state
91
+
92
+ elif model_choice == "For English (GOT-OCR)":
93
+ image_file = temp_file_path
94
+ res = got_model.chat(tokenizer, image_file, ocr_type='ocr')
95
+
96
+ st.session_state.extracted_text = res # Save extracted text in session state
97
+
98
+ # Delete the temporary file after processing
99
+ if os.path.exists(temp_file_path):
100
+ os.remove(temp_file_path)
101
+
102
+ # Search functionality
103
+ if st.session_state.extracted_text:
104
+ search_query = st.text_input("Search in extracted text:", key="search_query", placeholder="Type to search...")
105
+
106
+ # Create a pattern to find the search query in a case-insensitive way
107
+ if search_query:
108
+ pattern = re.compile(re.escape(search_query), re.IGNORECASE)
109
+ highlighted_text = st.session_state.extracted_text
110
+
111
+ # Replace matching text with highlighted version (bright green)
112
+ highlighted_text = pattern.sub(lambda m: f"<span style='background-color: limegreen;'>{m.group(0)}</span>", highlighted_text)
113
+
114
+ st.markdown("### Highlighted Search Results:")
115
+ st.markdown(highlighted_text, unsafe_allow_html=True)
116
+ else:
117
+ # If no search query, show the original extracted text
118
+ st.markdown("### Extracted Text:")
119
+ st.markdown(st.session_state.extracted_text, unsafe_allow_html=True)