example/gluon/embedding_learning/data.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import random

import numpy as np

import mxnet as mx
from mxnet import nd

def transform(data, target_wd, target_ht, is_train, box):
    """Crop and normnalize an image nd array."""
    if box is not None:
        x, y, w, h = box
        data = data[y:min(y+h, data.shape[0]), x:min(x+w, data.shape[1])]

    # Resize to target_wd * target_ht.
    data = mx.image.imresize(data, target_wd, target_ht)

    # Normalize in the same way as the pre-trained model.
    data = data.astype(np.float32) / 255.0
    data = (data - mx.nd.array([0.485, 0.456, 0.406])) / mx.nd.array([0.229, 0.224, 0.225])

    if is_train:
        if random.random() < 0.5:
            data = nd.flip(data, axis=1)
        data, _ = mx.image.random_crop(data, (224, 224))
    else:
        data, _ = mx.image.center_crop(data, (224, 224))

    # Transpose from (target_wd, target_ht, 3)
    # to (3, target_wd, target_ht).
    data = nd.transpose(data, (2, 0, 1))

    # If image is greyscale, repeat 3 times to get RGB image.
    if data.shape[0] == 1:
        data = nd.tile(data, (3, 1, 1))
    return data.reshape((1,) + data.shape)


class CUB200Iter(mx.io.DataIter):
    """Iterator for the CUB200-2011 dataset.
    Parameters
    ----------
    data_path : str,
        The path to dataset directory.
    batch_k : int,
        Number of images per class in a batch.
    batch_size : int,
        Batch size.
    batch_size : tupple,
        Data shape. E.g. (3, 224, 224).
    is_train : bool,
        Training data or testig data. Training batches are randomly sampled.
        Testing batches are loaded sequentially until reaching the end.
    """
    def __init__(self, data_path, batch_k, batch_size, data_shape, is_train):
        super(CUB200Iter, self).__init__(batch_size)
        self.data_shape = (batch_size,) + data_shape
        self.batch_size = batch_size
        self.provide_data = [('data', self.data_shape)]
        self.batch_k = batch_k
        self.is_train = is_train

        self.train_image_files = [[] for _ in range(100)]
        self.test_image_files = []
        self.test_labels = []
        self.boxes = {}
        self.test_count = 0

        with open(os.path.join(data_path, 'images.txt'), 'r') as f_img, \
             open(os.path.join(data_path, 'image_class_labels.txt'), 'r') as f_label, \
             open(os.path.join(data_path, 'bounding_boxes.txt'), 'r') as f_box:
            for line_img, line_label, line_box in zip(f_img, f_label, f_box):
                fname = os.path.join(data_path, 'images', line_img.strip().split()[-1])
                label = int(line_label.strip().split()[-1]) - 1
                box = [int(float(v)) for v in line_box.split()[-4:]]
                self.boxes[fname] = box

                # Following "Deep Metric Learning via Lifted Structured Feature Embedding" paper,
                # we use the first 100 classes for training, and the remaining for testing.
                if label < 100:
                    self.train_image_files[label].append(fname)
                else:
                    self.test_labels.append(label)
                    self.test_image_files.append(fname)

        self.n_test = len(self.test_image_files)

    def get_image(self, img, is_train):
        """Load and transform an image."""
        img_arr = mx.image.imread(img)
        img_arr = transform(img_arr, 256, 256, is_train, self.boxes[img])
        return img_arr

    def sample_train_batch(self):
        """Sample a training batch (data and label)."""
        batch = []
        labels = []
        num_groups = self.batch_size // self.batch_k

        # For CUB200, we use the first 100 classes for training.
        sampled_classes = np.random.choice(100, num_groups, replace=False)
        for i in range(num_groups):
            img_fnames = np.random.choice(self.train_image_files[sampled_classes[i]],
                                          self.batch_k, replace=False)
            batch += [self.get_image(img_fname, is_train=True) for img_fname in img_fnames]
            labels += [sampled_classes[i] for _ in range(self.batch_k)]

        return nd.concatenate(batch, axis=0), labels

    def get_test_batch(self):
        """Sample a testing batch (data and label)."""

        batch_size = self.batch_size
        batch = [self.get_image(self.test_image_files[(self.test_count*batch_size + i)
                                                      % len(self.test_image_files)],
                                is_train=False) for i in range(batch_size)]
        labels = [self.test_labels[(self.test_count*batch_size + i)
                                   % len(self.test_image_files)] for i in range(batch_size)]
        return nd.concatenate(batch, axis=0), labels

    def reset(self):
        """Reset an iterator."""
        self.test_count = 0

    def next(self):
        """Return a batch."""
        if self.is_train:
            data, labels = self.sample_train_batch()
        else:
            if self.test_count * self.batch_size < len(self.test_image_files):
                data, labels = self.get_test_batch()
                self.test_count += 1
            else:
                self.test_count = 0
                raise StopIteration
        return mx.io.DataBatch(data=[data], label=[labels])

def cub200_iterator(data_path, batch_k, batch_size, data_shape):
    """Return training and testing iterator for the CUB200-2011 dataset."""
    return (CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=True),
            CUB200Iter(data_path, batch_k, batch_size, data_shape, is_train=False))