fuse_validate_model.py

import numpy as np
from keras.applications.inception_v3 import InceptionV3
from keras.models import Sequential, load_model, Model
from keras.layers import Input, average, concatenate, GlobalAveragePooling2D
from keras.layers import TimeDistributed, GlobalAveragePooling1D
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam
from keras.layers.normalization import BatchNormalization

class ResearchModels():
    def __init__(self, nb_classes, n_snip, opt_flow_len, image_shape = (224, 224), saved_model=None, saved_temporal_weights=None, saved_spatial_weights=None):
        """
        `nb_classes` = the number of classes to predict
        `opt_flow_len` = the length of optical flow frames
        `image_shape` = shape of image frame
        `saved_model` = the path to a saved Keras model to load
        """
        self.nb_classes = nb_classes
        self.n_snip = n_snip
        self.opt_flow_len = opt_flow_len
        self.load_model = load_model
        self.saved_model = saved_model
        self.saved_temporal_weights = saved_temporal_weights
        self.saved_spatial_weights = saved_spatial_weights

        self.input_shape_spatial = (image_shape[0], image_shape[1], 3)
        self.input_shape_temporal = (image_shape[0], image_shape[1], opt_flow_len * 2)
        self.input_shape_spatial_multi = (self.n_snip, image_shape[0], image_shape[1], 3)
        self.input_shape_temporal_multi = (self.n_snip, image_shape[0], image_shape[1], opt_flow_len * 2)

        # Set the metrics. Only use top k if there's a need.
        metrics = ['accuracy']
        if self.nb_classes >= 10:
            metrics.append('top_k_categorical_accuracy')

        # Load model
        # If saved fuse model exists, directly load
        if self.saved_model is not None: 
            print("\nLoading model %s" % self.saved_model)
            self.model = load_model(self.saved_model)
        # Otherwise build the model and load weights for both streams
        else: 
            print("\nLoading the two-stream model...")
            self.model = self.two_stream_fuse()

        optimizer = Adam()
#        optimizer = SGD(lr=0.01, momentum=0.9, nesterov=True)

        self.model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=metrics)

    # Two-stream fused model
    def two_stream_fuse(self):
        # spatial stream (frozen)
        cnn_spatial_multi = self.cnn_spatial_multi()

        # temporal stream (frozen)
        cnn_temporal_multi = self.cnn_temporal_multi()

        # fused by taking average
        outputs = average([cnn_spatial_multi.output, cnn_temporal_multi.output])

        model = Model([cnn_spatial_multi.input, cnn_temporal_multi.input], outputs)

        return model

    # CNN model for the temporal stream with multiple inputs
    def cnn_spatial_multi(self):
        # spatial stream (frozen)
        cnn_spatial = self.cnn_spatial()
        if self.saved_spatial_weights is None:
            print("[ERROR] No saved_spatial_weights weights file!")
        else:
            cnn_spatial.load_weights(self.saved_spatial_weights)
        for layer in cnn_spatial.layers:
            layer.trainable = False

        # building inputs and output
        model = Sequential()
        model.add(TimeDistributed((cnn_spatial), input_shape=self.input_shape_spatial_multi))
        model.add(GlobalAveragePooling1D())

        return model

    # CNN model for the temporal stream with multiple inputs
    def cnn_temporal_multi(self):
        # spatial stream (frozen)
        cnn_temporal = self.cnn_temporal()
        if self.saved_temporal_weights is None:
            print("[ERROR] No saved_temporal_weights weights file!")
        else:
            cnn_temporal.load_weights(self.saved_temporal_weights)
        for layer in cnn_temporal.layers:
            layer.trainable = False

        # building inputs and output
        model = Sequential()
        model.add(TimeDistributed((cnn_temporal), input_shape=self.input_shape_temporal_multi))
        model.add(GlobalAveragePooling1D())

        return model

    # CNN model for the spatial stream
    def cnn_spatial(self):
        base_model = InceptionV3(weights='imagenet', include_top=False)
    
        # add a global spatial average pooling layer
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        # let's add a fully-connected layer
        x = Dense(1024, activation='relu')(x)
        # and a logistic layer
        predictions = Dense(self.nb_classes, activation='softmax')(x)
    
        model = Model(inputs=base_model.input, outputs=predictions)
        return model

    # CNN model for the temporal stream
    def cnn_temporal(self):
        #model
        model = Sequential()

        #conv1
        model.add(Conv2D(96, (7, 7), strides=2, padding='same', input_shape=self.input_shape_temporal))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        #conv2
        model.add(Conv2D(256, (5, 5), strides=2, padding='same'))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        #conv3
        model.add(Conv2D(512, (3, 3), strides=1, activation='relu', padding='same'))

        #conv4
        model.add(Conv2D(512, (3, 3), strides=1, activation='relu', padding='same'))

        #conv5
        model.add(Conv2D(512, (3, 3), strides=1, activation='relu', padding='same'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        #full6
        model.add(Flatten())
        model.add(Dense(4096, activation='relu'))
        model.add(Dropout(0.9))

        #full7
        model.add(Dense(2048, activation='relu'))
        model.add(Dropout(0.9))

        #softmax
        model.add(Dense(self.nb_classes, activation='softmax'))

        return model