I have a dataset of 10K images and masks 384x384 pixels each. I cannot load all data in memory of my computer. When I load it in chunks it seem to forget model weights in between chuncks.
import numpy as np
import glob
import os
from keras_unet.models import custom_unet
from os import path
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras_unet.metrics import iou, iou_thresholded
import cv2
import tensorflow as tf
from keras_unet.utils import get_augmented
from sklearn.model_selection import train_test_split
D_TYPE = 'my_dataset'
EPOCHS = 5
BATCH_SIZE = 9
STEPS_PER_EPOCH = 3
INNER_EPOCHS = 3
folder_range = 29
ocs = 384
def init_model(model_filename):
a_model = custom_unet(
(ocs, ocs, 3),
filters=32,
use_batch_norm=True,
dropout=0.3,
dropout_change_per_layer=0.0,
num_layers=4
)
a_model.compile(
optimizer=Adam(),
loss='binary_crossentropy',
metrics=[iou, iou_thresholded]
)
if os.path.isfile(model_filename):
a_model.load_weights(model_filename)
return a_model
def init_callback_checkpoint(a_model_filename):
a_callback_checkpoint = ModelCheckpoint(
a_model_filename,
verbose=1,
monitor='val_loss',
save_best_only=True,
)
return a_callback_checkpoint
F_NAME = f'{D_TYPE}'
model_filename = path.join('models', f'{F_NAME}.h5')
log_path = path.join('logs', f'log_{F_NAME}.csv')
model = init_model(model_filename)
callback_checkpoint = init_callback_checkpoint(model_filename)
hist_path = path.join('logs', f'checkpoint_{F_NAME}')
model_cb = tf.keras.callbacks.ModelCheckpoint(filepath=hist_path) # NOTE: SAVES HISTORY
history_cb = tf.keras.callbacks.CSVLogger(log_path, separator=",", append=True) # NOTE: SAVES HISTORY
for epoch in range(EPOCHS):
print(f'epoch {epoch} started')
for batch in range(0, folder_range):
masks = glob.glob(path.join('batches', f'{batch}', 'masks', F_NAME, '*.png'))
imgs = list(map(lambda x: x.replace('masks', 'imgs').replace(F_NAME, 'im').replace('.png', '.jpg'), masks))
imgs_list = []
masks_list = []
for image_path, mask_path in zip(imgs, masks):
imgs_list.append(cv2.imread(image_path))
masks_list.append(cv2.imread(mask_path)[:, :, 0])
print(f'batch: {batch}, imgs_list.shape: {len(imgs_list)}')
imgs_np = np.array(imgs_list)
masks_np = np.array(masks_list)
# del(imgs_list) # This did not help
# del(masks_list) # This did not help
i_max = imgs_np.max()
m_max = masks_np.max()
x = np.asarray(imgs_np, dtype=np.float16) / i_max
y = np.asarray(masks_np, dtype=np.float16) / m_max # float32
if batch == 0:
print(x.max(), y.max())
print(x.shape, y.shape)
y = y.reshape(y.shape[0], y.shape[1], y.shape[2], 1)
if batch == 0:
print(x.shape, y.shape)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=0)
# del(x) # This did not help
# del(y) # This did not help
train_gen = get_augmented(
x_train, y_train, batch_size=BATCH_SIZE, # TODO: this was working, but little improvement
# x_train, y_train, batch_size=x_train.shape[0], # TODO: this might run out of memory
data_gen_args=dict(
rotation_range=5.,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=40,
zoom_range=0.2,
horizontal_flip=True,
vertical_flip=False,
fill_mode='constant'
))
# history = model.fit_generator(
history = model.fit_generator(
train_gen,
steps_per_epoch=STEPS_PER_EPOCH,
epochs=INNER_EPOCHS,
validation_data=(x_val, y_val),
# callbacks=[callback_checkpoint, model_cb, history_cb]
callbacks=[callback_checkpoint, history_cb]
# callbacks=[callback_checkpoint]
)
# del(x_train) # This did not help
# del(y_train) # This did not help
# del(x_val) # This did not help
# del(y_val) # This did not help
if batch == folder_range - 1:
print(f'EPOCH {epoch} ended. saving weights')
new_model_filename = path.join('models', f'{F_NAME}_epoch_{epoch}.h5')
model.save_weights(new_model_filename)
print('done')
How to tell unet model to read chunks of files from disk, but not lose the weights in between chunks?
------------------------------
Stepan Len
------------------------------
#GlobalAIandDataScience#GlobalDataScience