Global Data Science Forum

Train a cnn model on a large dataset of images

  • 1.  Train a cnn model on a large dataset of images

    Posted Wed November 11, 2020 10:49 AM
    I have a dataset of 10K images and masks 384x384 pixels each. I cannot load all data in memory of my computer. When I load it in chunks it seem to  forget model weights in between chuncks.

    import numpy as np
    import glob
    import os
    from keras_unet.models import custom_unet
    from os import path
    from keras.callbacks import ModelCheckpoint
    from keras.optimizers import Adam
    from keras_unet.metrics import iou, iou_thresholded
    import cv2
    import tensorflow as tf
    from keras_unet.utils import get_augmented
    from sklearn.model_selection import train_test_split

    D_TYPE = 'my_dataset'
    EPOCHS = 5
    BATCH_SIZE = 9
    STEPS_PER_EPOCH = 3
    INNER_EPOCHS = 3
    folder_range = 29
    ocs = 384


    def init_model(model_filename):
    a_model = custom_unet(
    (ocs, ocs, 3),
    filters=32,
    use_batch_norm=True,
    dropout=0.3,
    dropout_change_per_layer=0.0,
    num_layers=4
    )

    a_model.compile(
    optimizer=Adam(),
    loss='binary_crossentropy',
    metrics=[iou, iou_thresholded]
    )

    if os.path.isfile(model_filename):
    a_model.load_weights(model_filename)

    return a_model


    def init_callback_checkpoint(a_model_filename):
    a_callback_checkpoint = ModelCheckpoint(
    a_model_filename,
    verbose=1,
    monitor='val_loss',
    save_best_only=True,
    )
    return a_callback_checkpoint


    F_NAME = f'{D_TYPE}'
    model_filename = path.join('models', f'{F_NAME}.h5')
    log_path = path.join('logs', f'log_{F_NAME}.csv')
    model = init_model(model_filename)
    callback_checkpoint = init_callback_checkpoint(model_filename)
    hist_path = path.join('logs', f'checkpoint_{F_NAME}')
    model_cb = tf.keras.callbacks.ModelCheckpoint(filepath=hist_path) # NOTE: SAVES HISTORY
    history_cb = tf.keras.callbacks.CSVLogger(log_path, separator=",", append=True) # NOTE: SAVES HISTORY

    for epoch in range(EPOCHS):
    print(f'epoch {epoch} started')
    for batch in range(0, folder_range):
    masks = glob.glob(path.join('batches', f'{batch}', 'masks', F_NAME, '*.png'))
    imgs = list(map(lambda x: x.replace('masks', 'imgs').replace(F_NAME, 'im').replace('.png', '.jpg'), masks))
    imgs_list = []
    masks_list = []
    for image_path, mask_path in zip(imgs, masks):
    imgs_list.append(cv2.imread(image_path))
    masks_list.append(cv2.imread(mask_path)[:, :, 0])

    print(f'batch: {batch}, imgs_list.shape: {len(imgs_list)}')
    imgs_np = np.array(imgs_list)
    masks_np = np.array(masks_list)
    # del(imgs_list) # This did not help
    # del(masks_list) # This did not help
    i_max = imgs_np.max()
    m_max = masks_np.max()
    x = np.asarray(imgs_np, dtype=np.float16) / i_max
    y = np.asarray(masks_np, dtype=np.float16) / m_max # float32
    if batch == 0:
    print(x.max(), y.max())
    print(x.shape, y.shape)
    y = y.reshape(y.shape[0], y.shape[1], y.shape[2], 1)
    if batch == 0:
    print(x.shape, y.shape)

    x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=0)
    # del(x) # This did not help
    # del(y) # This did not help

    train_gen = get_augmented(
    x_train, y_train, batch_size=BATCH_SIZE, # TODO: this was working, but little improvement
    # x_train, y_train, batch_size=x_train.shape[0], # TODO: this might run out of memory
    data_gen_args=dict(
    rotation_range=5.,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=40,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='constant'
    ))

    # history = model.fit_generator(
    history = model.fit_generator(
    train_gen,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=INNER_EPOCHS,
    validation_data=(x_val, y_val),
    # callbacks=[callback_checkpoint, model_cb, history_cb]
    callbacks=[callback_checkpoint, history_cb]
    # callbacks=[callback_checkpoint]
    )

    # del(x_train) # This did not help
    # del(y_train) # This did not help
    # del(x_val) # This did not help
    # del(y_val) # This did not help

    if batch == folder_range - 1:
    print(f'EPOCH {epoch} ended. saving weights')
    new_model_filename = path.join('models', f'{F_NAME}_epoch_{epoch}.h5')
    model.save_weights(new_model_filename)

    print('done')

    How to tell unet model to read chunks of files from disk, but not lose the weights in between chunks?

    ------------------------------
    Stepan Len
    ------------------------------