File size: 1,783 Bytes
c9baa67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from collections import OrderedDict

import torch
import torch.nn as nn

from torch.utils import model_zoo

from .features.alexnet import RGBalexnet
from .modules import FeatureExtractor, Finalizer, DeepGazeII as TorchDeepGazeII


class DeepGazeI(TorchDeepGazeII):
    """DeepGaze I model

    Please note that this version of DeepGaze I is not exactly the one from the original paper.
    The original model used caffe for AlexNet and theano for the linear readout and was trained using the SFO optimizer.
    Here, we use the torch implementation of AlexNet (without any adaptations), which doesn't use the two-steam architecture,
    and the DeepGaze II torch implementation with a simple linear readout network.
    The model has been retrained with Adam, but still on the same dataset (all images of MIT1003 which are of size 1024x768).
    Also, we don't use the sparsity penalty anymore.

    Reference:
    Kümmerer, M., Theis, L., & Bethge, M. (2015). Deep Gaze I: Boosting Saliency Prediction with Feature Maps Trained on ImageNet. ICLR Workshop Track. http://arxiv.org/abs/1411.1045
    """
    def __init__(self, pretrained=True):
        features = RGBalexnet()
        feature_extractor = FeatureExtractor(features, ['1.features.10'])

        readout_network = nn.Sequential(OrderedDict([
            ('conv0', nn.Conv2d(256, 1, (1, 1), bias=False)),
        ]))

        super().__init__(
            features=feature_extractor,
            readout_network=readout_network,
            downsample=2,
            readout_factor=4,
            saliency_map_factor=4,
        )

        if pretrained:
            self.load_state_dict(model_zoo.load_url('https://github.com/matthias-k/DeepGaze/releases/download/v1.01/deepgaze1.pth', map_location=torch.device('cpu')))