File size: 3,552 Bytes
0b149d1
 
 
 
b76cf08
0b149d1
 
 
 
 
 
b76cf08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import hashlib
import os
import shutil
import sqlite3
import uuid
from datetime import datetime

import gradio as gr
import huggingface_hub
import pandas as pd
import pytz
from apscheduler.schedulers.background import BackgroundScheduler


class TrafficDataHandler:
    _DB_FILE_PATH = "./traffic_data.db"
    _DB_TEMP_PATH = "./data/traffic_data.db"
    _TOKEN = os.environ.get("HUB_TOKEN")
    _TZ = "Europe/Stockholm"
    _INTERVAL_MIN_UPDATE = 30

    def __init__(self, dataset_repo="Riksarkivet/traffic_demo_data"):
        self._repo = huggingface_hub.Repository(
            local_dir="data", repo_type="dataset", clone_from=dataset_repo, use_auth_token=self._TOKEN
        )
        self._pull_repo_data()
        self._setup_database()

    def _pull_repo_data(self):
        self._repo.git_pull()
        shutil.copyfile(self._DB_TEMP_PATH, self._DB_FILE_PATH)

    def _hash_ip(self, ip_address):
        return hashlib.sha256(ip_address.encode()).hexdigest()

    def _current_time_in_sweden(self):
        swedish_tz = pytz.timezone(self._TZ)
        return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")

    def onload_store_metric_data(self, request: gr.Request):
        self._session_uuid = str(uuid.uuid1())
        hashed_host = self._hash_ip(request.client.host)
        self._backup_and_update_database(hashed_host, "load")

    def store_metric_data(self, action, request: gr.Request):
        self._session_uuid = str(uuid.uuid1())
        hashed_host = self._hash_ip(request.client.host)
        self._backup_and_update_database(hashed_host, action)

    def _commit_host_to_database(self, hashed_host, action):
        with sqlite3.connect(self._DB_FILE_PATH) as db:
            db.execute(
                "INSERT INTO ip_data(current_time, hashed_ip, session_uuid, action) VALUES(?,?,?,?)",
                [self._current_time_in_sweden(), hashed_host, self._session_uuid, action],
            )

    def _setup_database(self):
        with sqlite3.connect(self._DB_FILE_PATH) as db:
            try:
                db.execute("SELECT * FROM ip_data").fetchall()
            except sqlite3.OperationalError:
                db.execute(
                    """
                    CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
                                          current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
                                          hashed_ip TEXT,
                                          session_uuid TEXT,
                                          action TEXT)
                    """
                )

    def _backup_and_update_database(self, hashed_host, action):
        self._commit_host_to_database(hashed_host, action)
        shutil.copyfile(self._DB_FILE_PATH, self._DB_TEMP_PATH)

        with sqlite3.connect(self._DB_FILE_PATH) as db:
            ip_data = db.execute("SELECT * FROM ip_data").fetchall()
            pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip", "session_uuid", "action"]).to_csv(
                "./data/ip_data.csv", index=False
            )

        self._repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")

    def _initialize_and_schedule_backup(self, hashed_host, action):
        self._backup_and_update_database(hashed_host, action)
        scheduler = BackgroundScheduler()
        scheduler.add_job(
            self._backup_and_update_database, "interval", minutes=self._INTERVAL_MIN_UPDATE, args=(hashed_host, action)
        )
        scheduler.start()