Mr-Vicky-01 commited on
Commit
57c6e94
1 Parent(s): aab8f2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py CHANGED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pickle
3
+ import numpy as np
4
+ import tensorflow as tf
5
+ from tensorflow.keras.applications import EfficientNetB7
6
+ from tensorflow.keras.applications.efficientnet import preprocess_input
7
+ from tensorflow.keras.preprocessing.image import load_img, img_to_array
8
+ from tensorflow.keras.preprocessing.text import Tokenizer
9
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
10
+ from tensorflow.keras.models import Model
11
+
12
+
13
+ # load vgg16 model
14
+ pre_trained_model = EfficientNetB7(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
15
+ # Freeze the base model
16
+ pre_trained_model.trainable = False
17
+
18
+ pre_trained_model = tf.keras.Sequential([
19
+ pre_trained_model,
20
+ tf.keras.layers.GlobalAveragePooling2D()
21
+ ])
22
+ # restructure the model
23
+ pre_trained_model = Model(inputs=pre_trained_model.inputs, outputs=pre_trained_model.layers[-1].output)
24
+
25
+ model = tf.keras.models.load_model("Image_Captioner_model.h5")
26
+
27
+ tokenizer = Tokenizer()
28
+ with open("Image_Captioner_tokenizer.pkl", "rb") as f:
29
+ tokenizer = pickle.load(f)
30
+
31
+ def idx_to_word(integer, tokenizer):
32
+ for word, index in tokenizer.word_index.items():
33
+ if index == integer:
34
+ return word
35
+ return None
36
+
37
+ # generate caption for an image
38
+ def predict_caption(model, image, tokenizer, max_length):
39
+ # add start tag for generation process
40
+ in_text = 'startseq'
41
+ # iterate over the max length of sequence
42
+ for i in range(max_length):
43
+ # encode input sequence
44
+ sequence = tokenizer.texts_to_sequences([in_text])[0]
45
+ # pad the sequence
46
+ sequence = pad_sequences([sequence], max_length)
47
+ # predict next word
48
+ yhat = model.predict([image, sequence], verbose=0)
49
+ # get index with high probability
50
+ yhat = np.argmax(yhat)
51
+ # convert index to word
52
+ word = idx_to_word(yhat, tokenizer)
53
+ # stop if word not found
54
+ if word is None:
55
+ break
56
+ # append word as input for generating next word
57
+ in_text += " " + word
58
+ # stop if we reach end tag
59
+ if word == 'endseq':
60
+ break
61
+ cut_text = ' '.join(in_text.split()[1:-1])
62
+ return cut_text
63
+
64
+ def google_image_testing(inp):
65
+ # Convert input into jpg file
66
+ input_image = Image.fromarray(inp)
67
+ input_image.save("input_image.jpg")
68
+ # Load input Image
69
+ image_path = 'input_image.jpg'
70
+ image = load_img(image_path, target_size=(224, 224))
71
+ # convert image pixels to numpy array
72
+ image = img_to_array(image)
73
+ # reshape data for model
74
+ image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
75
+ # preprocess image for vgg
76
+ image = preprocess_input(image)
77
+ # extract features
78
+ img_feature = pre_trained_model.predict(image, verbose=0)
79
+ # predict the caption
80
+ predicted = predict_caption(model, img_feature, tokenizer, max_length=35)
81
+ return predicted
82
+
83
+ demo = gr.Interface(fn=google_image_testing, inputs='image',outputs='text',title='Image Captioner')
84
+ demo.launch(debug=True,share=True)