Spaces:
Build error
Build error
Prepare for multiple indices
Browse files- apb2022.json +42 -0
- app.py +10 -7
- videohash.py +2 -1
- videomatch.py +19 -26
apb2022.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/algemene-politieke-beschouwingen-1e-termijn-kamer-0",
|
4 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/0010e470-ce9d-4424-9462-8e8e10efc5af/download-20220921082013Z.mp4"
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/mededelingen-1690",
|
8 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/0010e470-ce9d-4424-9462-8e8e10efc5af/download-20220921133023Z.mp4"
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/regeling-van-werkzaamheden-1227",
|
12 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/0010e470-ce9d-4424-9462-8e8e10efc5af/download-20220921133038Z.mp4"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/algemene-politieke-beschouwingen-voortzetting-1e-termijn-kamer",
|
16 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/a702d77f-49c5-4a69-a6f7-28e29ae4d1fe/20220921_pz_155828-010143_v2.mp4"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/mededelingen-1691",
|
20 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/74c2e164-212d-4eda-a3ef-2dc406eea19b/download-20220922081620Z.mp4"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/algemene-politieke-beschouwingen-antwoord-1e-termijn-rest-deel-1",
|
24 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/74c2e164-212d-4eda-a3ef-2dc406eea19b/download-20220922081653Z.mp4"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/mededelingen-1692",
|
28 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/74c2e164-212d-4eda-a3ef-2dc406eea19b/download-20220922125211Z.mp4"
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/regeling-van-werkzaamheden-1228",
|
32 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/74c2e164-212d-4eda-a3ef-2dc406eea19b/download-20220922125220Z.mp4"
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/algemene-politieke-beschouwingen-antwoord-1e-termijn-rest-deel-2",
|
36 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/74c2e164-212d-4eda-a3ef-2dc406eea19b/download-20220922125338Z.mp4"
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"url": "https://debatgemist.tweedekamer.nl/debatten/stemmingen-852",
|
40 |
+
"mp4": "https://amcpwetkms-euwe.streaming.media.azure.net/74c2e164-212d-4eda-a3ef-2dc406eea19b/download-20220922213719Z.mp4"
|
41 |
+
}
|
42 |
+
]
|
app.py
CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
|
|
4 |
|
5 |
from config import *
|
6 |
from videomatch import index_hashes_for_video, get_decent_distance, \
|
7 |
-
|
8 |
from plot import plot_comparison, plot_multi_comparison
|
9 |
|
10 |
logging.basicConfig()
|
@@ -13,8 +13,9 @@ logging.getLogger().setLevel(logging.INFO)
|
|
13 |
|
14 |
def get_comparison(url, target, MIN_DISTANCE = 4):
|
15 |
""" Function for Gradio to combine all helper functions"""
|
16 |
-
video_index, hash_vectors
|
17 |
-
|
|
|
18 |
fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
|
19 |
return fig
|
20 |
|
@@ -24,8 +25,9 @@ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
|
|
24 |
if distance == None:
|
25 |
return None
|
26 |
raise gr.Error("No matches found!")
|
27 |
-
video_index, hash_vectors
|
28 |
-
|
|
|
29 |
# fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
|
30 |
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
31 |
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
|
@@ -38,8 +40,9 @@ def get_auto_edit_decision(url, target, smoothing_window_size=10):
|
|
38 |
if distance == None:
|
39 |
return None
|
40 |
raise gr.Error("No matches found!")
|
41 |
-
video_index, hash_vectors
|
42 |
-
|
|
|
43 |
|
44 |
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
45 |
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST")
|
|
|
4 |
|
5 |
from config import *
|
6 |
from videomatch import index_hashes_for_video, get_decent_distance, \
|
7 |
+
get_video_index, compare_videos, get_change_points, get_videomatch_df
|
8 |
from plot import plot_comparison, plot_multi_comparison
|
9 |
|
10 |
logging.basicConfig()
|
|
|
13 |
|
14 |
def get_comparison(url, target, MIN_DISTANCE = 4):
|
15 |
""" Function for Gradio to combine all helper functions"""
|
16 |
+
video_index, hash_vectors = get_video_index(url)
|
17 |
+
target_index, _ = get_video_index(target)
|
18 |
+
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = MIN_DISTANCE)
|
19 |
fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
|
20 |
return fig
|
21 |
|
|
|
25 |
if distance == None:
|
26 |
return None
|
27 |
raise gr.Error("No matches found!")
|
28 |
+
video_index, hash_vectors = get_video_index(url)
|
29 |
+
target_index, _ = get_video_index(target)
|
30 |
+
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
31 |
# fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
|
32 |
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
33 |
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
|
|
|
40 |
if distance == None:
|
41 |
return None
|
42 |
raise gr.Error("No matches found!")
|
43 |
+
video_index, hash_vectors = get_video_index(url)
|
44 |
+
target_index, _ = get_video_index(target)
|
45 |
+
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
46 |
|
47 |
df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
|
48 |
change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method="ROBUST")
|
videohash.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
import urllib.request
|
|
|
3 |
import logging
|
4 |
import hashlib
|
5 |
|
@@ -20,7 +21,7 @@ def download_video_from_url(url):
|
|
20 |
filepath = filepath_from_url(url)
|
21 |
if not os.path.exists(filepath):
|
22 |
with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
|
23 |
-
|
24 |
logging.info(f"Downloaded video from {url} to {filepath}.")
|
25 |
else:
|
26 |
logging.info(f"Skipping downloading from {url} because {filepath} already exists.")
|
|
|
1 |
import os
|
2 |
import urllib.request
|
3 |
+
import shutil
|
4 |
import logging
|
5 |
import hashlib
|
6 |
|
|
|
21 |
filepath = filepath_from_url(url)
|
22 |
if not os.path.exists(filepath):
|
23 |
with (urllib.request.urlopen(url)) as f, open(filepath, 'wb') as fileout:
|
24 |
+
shutil.copyfileobj(f, fileout, length=16*1024)
|
25 |
logging.info(f"Downloaded video from {url} to {filepath}.")
|
26 |
else:
|
27 |
logging.info(f"Skipping downloading from {url} because {filepath} already exists.")
|
videomatch.py
CHANGED
@@ -38,45 +38,37 @@ def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
|
|
38 |
logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
|
39 |
return index
|
40 |
|
41 |
-
def
|
42 |
-
""""
|
43 |
-
on the matches between the target and the original video over time. The matches are determined
|
44 |
-
based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
|
45 |
-
|
46 |
args:
|
47 |
-
-
|
48 |
-
- target: url of the target video (longer video which is a superset of the source video)
|
49 |
-
- MIN_DISTANCE: integer representing the minimum distance between hashes on bit-level before its considered a match
|
50 |
"""
|
51 |
-
# TODO: Fix crash if no matches are found
|
52 |
-
|
53 |
# Url (short video)
|
54 |
-
video_index = index_hashes_for_video(
|
55 |
video_index.make_direct_map() # Make sure the index is indexable
|
56 |
hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
|
57 |
|
58 |
-
|
59 |
-
target_indices = [index_hashes_for_video(x) for x in [target]]
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
# The results are returned as a triplet of 1D arrays
|
67 |
# lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
|
68 |
# (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
|
69 |
-
|
70 |
-
|
71 |
-
return lims, D, I, hash_vectors
|
72 |
|
73 |
-
def get_decent_distance(
|
74 |
""" To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
|
75 |
until the number of matches found is equal to or higher than the number of frames in the source video"""
|
76 |
for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
|
77 |
distance = int(distance)
|
78 |
-
video_index, hash_vectors
|
79 |
-
|
|
|
80 |
nr_source_frames = video_index.ntotal
|
81 |
nr_matches = len(D)
|
82 |
logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
|
@@ -103,8 +95,9 @@ def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
|
|
103 |
|
104 |
def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
|
105 |
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
106 |
-
|
107 |
-
|
|
|
108 |
|
109 |
target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
110 |
target_s = [i/FPS for j in target for i in j]
|
|
|
38 |
logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
|
39 |
return index
|
40 |
|
41 |
+
def get_video_index(url: str):
|
42 |
+
"""" Builds up a FAISS index for a video.
|
|
|
|
|
|
|
43 |
args:
|
44 |
+
- filepath: location of the source video
|
|
|
|
|
45 |
"""
|
|
|
|
|
46 |
# Url (short video)
|
47 |
+
video_index = index_hashes_for_video(url)
|
48 |
video_index.make_direct_map() # Make sure the index is indexable
|
49 |
hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
|
50 |
|
51 |
+
return video_index, hash_vectors
|
|
|
52 |
|
53 |
+
def compare_videos(hash_vectors, target_index, MIN_DISTANCE = 3):
|
54 |
+
""" The comparison between the target and the original video will be plotted based
|
55 |
+
on the matches between the target and the original video over time. The matches are determined
|
56 |
+
based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
|
57 |
+
"""
|
58 |
# The results are returned as a triplet of 1D arrays
|
59 |
# lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
|
60 |
# (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
|
61 |
+
lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
|
62 |
+
return lims, D, I, hash_vectors
|
|
|
63 |
|
64 |
+
def get_decent_distance(filepath, target, MIN_DISTANCE, MAX_DISTANCE):
|
65 |
""" To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
|
66 |
until the number of matches found is equal to or higher than the number of frames in the source video"""
|
67 |
for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
|
68 |
distance = int(distance)
|
69 |
+
video_index, hash_vectors = get_video_index(filepath)
|
70 |
+
target_index, _ = get_video_index(target)
|
71 |
+
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
72 |
nr_source_frames = video_index.ntotal
|
73 |
nr_matches = len(D)
|
74 |
logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
|
|
|
95 |
|
96 |
def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
|
97 |
distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
|
98 |
+
_, hash_vectors = get_video_index(url)
|
99 |
+
target_index, _ = get_video_index(target)
|
100 |
+
lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
|
101 |
|
102 |
target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
|
103 |
target_s = [i/FPS for j in target for i in j]
|