Superresolución utilizando MNIST no funciona correctamente

0

Pregunta

Soy nuevo en el Aprendizaje Profundo y me hizo un modelo que pretende lujo de un 14x14 imagen a un 28 x 28. Por eso, yo entrenaba a los newtork utilizando MNIST repositorio como un primer intento de resolver este problema.

Para la fabricación de la estructura del modelo, he seguido este papel: https://arxiv.org/pdf/1608.00367.pdf

import numpy as np
from tensorflow.keras import optimizers
from tensorflow.keras import layers
from tensorflow.keras import models
import os
import cv2
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import initializers
import matplotlib.pyplot as plt
import pickle
import time

# Tensorboard Stuff:
NAME = "MNIST_FSRCNN_test -{}".format(
    int(time.time()))  # This is the name of our try, change it if it's a
# new try.
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))  # defining tensorboard directory.

# Path of the data
train_small_path = "D:/MNIST/training/small_train"
train_normal_path = "D:/MNIST/training/normal_train"

test_small_path = "D:/MNIST/testing/small_test"
test_normal_path = "D:/MNIST/testing/normal_test"

# Image reading from the directories. MNIST is in grayscale so we read it that way.
train_small_array = []
for img in os.listdir(train_small_path):
    try:
        train_small_array.append(np.array(cv2.imread(os.path.join(train_small_path, img), cv2.IMREAD_GRAYSCALE)))
    except Exception as e:
        print("problem with image reading in train small")
        pass
train_normal_array = []
for img in os.listdir(train_normal_path):
    try:
        train_normal_array.append(np.array(cv2.imread(os.path.join(train_normal_path, img), cv2.IMREAD_GRAYSCALE)))
    except Exception as e:
        print("problem with image reading in train normal")
        pass
test_small_array = []
for img in os.listdir(test_small_path):
    try:
        test_small_array.append(cv2.imread(os.path.join(test_small_path, img), cv2.IMREAD_GRAYSCALE))
    except Exception as e:
        print("problem with image reading in test small")
        pass

test_normal_array = []
for img in os.listdir(test_normal_path):
    try:
        test_normal_array.append(cv2.imread(os.path.join(test_normal_path, img), cv2.IMREAD_GRAYSCALE))
    except Exception as e:
        print("problem with image reading in test normal")
        pass

train_small_array = np.array(train_small_array).reshape((60000, 14, 14, 1))
train_normal_array = np.array(train_normal_array).reshape((60000, 28, 28, 1))

test_small_array = np.array(test_small_array).reshape((10000, 14, 14, 1))
test_normal_array = np.array(test_normal_array).reshape((10000, 28, 28, 1))




training_data = []
training_data.append([train_small_array, train_normal_array])

testing_data = []
testing_data.append([test_small_array, test_normal_array])


# ---SAVE DATA--
# We are saving our data
pickle_out = open("X.pickle", "wb")
pickle.dump(y, pickle_out)
pickle_out.close()
# for reading it:
pickle_in = open("X.pickle", "rb")
X = pickle.load(pickle_in)
# -----------


# MAKING THE NETWORK
d = 56
s = 12
m = 4
upscaling = 2

model = models.Sequential()
bias = True

# Feature extraction:
model.add(layers.Conv2D(filters=d,
                        kernel_size=5,
                        padding='SAME',
                        data_format="channels_last",
                        use_bias=bias,
                        kernel_initializer=initializers.he_normal(),
                        input_shape=(None, None, 1),
                        activation='relu'))

# Shrinking:
model.add(layers.Conv2D(filters=s,
                        kernel_size=1,
                        padding='same',
                        use_bias=bias,
                        kernel_initializer=initializers.he_normal(),
                        activation='relu'))

for i in range(m):
    model.add(layers.Conv2D(filters=s,
                            kernel_size=3,
                            padding="same",
                            use_bias=bias,
                            kernel_initializer=initializers.he_normal(),
                            activation='relu'),
              )

# Expanding
model.add(layers.Conv2D(filters=d,
                        kernel_size=1,
                        padding='same',
                        use_bias=bias,
                        kernel_initializer=initializers.he_normal,
                        activation='relu'))

# Deconvolution
model.add(layers.Conv2DTranspose(filters=1,
                                 kernel_size=9,
                                 strides=(upscaling, upscaling),
                                 padding='same',
                                 use_bias=bias,
                                 kernel_initializer=initializers.random_normal(mean=0.0, stddev=0.001),
                                 activation='relu'))

# MODEL COMPILATION
model.compile(loss='mse',
              optimizer=optimizers.RMSprop(learning_rate=1e-3),  
              metrics=['acc'])




model.fit(x=train_small_array, y=train_normal_array,
          epochs=10,
          batch_size=1500,
          validation_split=0.2,
          callbacks=[tensorboard])


print(model.evaluate(test_small_array, test_normal_array))



# -DEMO-----------------------------------------------------------------
from PIL import Image
import PIL.ImageOps
import os

dir = 'C:/Users/marcc/OneDrive/Escritorio'
os.chdir(dir)

myImage = Image.open("ImageTest.PNG").convert('L')  # convert to black and white
myImage = myImage.resize((14, 14))


myImage_array = np.array(myImage)

plt.imshow(myImage_array, cmap=plt.cm.binary)
plt.show()

myImage_array = myImage_array.astype('float32') / 255
myImage_array = myImage_array.reshape(1, 14, 14, 1)



newImage = model.predict(myImage_array)
newImage = newImage.reshape(28,28)
plt.imshow(newImage, cmap=plt.cm.binary)
plt.show()

El problema que tengo es que con 10 épocas parece que funciona, se transforma en esta imagen:14x14 MNIST

en este otro: 10 épocas 28 x 28

Pero cuando hago 20 épocas puedo conseguir20 épocas 28 x 28

Quiero saber qué pasa. Primero pensé que tal vez el modelo fue el sobreajuste, pero cuando reviso la pérdida de la función de entrenamiento y de validación no parece overfit: el entrenamiento y validación de la pérdida de

1

Mejor respuesta

1

He utilizado tu código y trató de reproducir el error, pero funcionó muy bien para mí. Me cargan los mnist imágenes y cambiar el tamaño de ellos (14, 14) el uso de skimage.transform.resize. La formación durante 200 épocas da :

Época 1/200 32/32 - 6s 91ms/paso - pérdida: 4380.9126 - acc: 0.1659 - val_loss: 3406.4109 - val_acc: 0.3661
Época 2/200 32/32 - 3s 80ms/paso - pérdida: 2827.0591 - acc: 0.5598 - val_loss: 2255.1472 - val_acc: 0.6366
...
Época 199/200 32/32 - 3s 86ms/paso - de la pérdida: 149.0597 - acc: 0.8035 - val_loss: 191.1202 - val_acc: 0.8072
Época 200/200 32/32 - 3s 85ms/paso - de la pérdida: 145.8007 - acc: 0.8035 - val_loss: 207.3333 - val_acc: 0.8072

val_loss tiende a fluctuar entre épocas, pero disminuye a nivel mundial.
Algunos resultados : here

Aquí está el código para trazar las figuras:

     def plot_images(num_img):
      fig, axs = plt.subplots(2, 2)
      my_normal_image = test_normal_array[num_img, :, :, 0]
      axs[0, 0].set(title='input normal image')
      axs[0, 0].imshow(my_normal_image, cmap=plt.cm.binary)
      axs[1, 0].set(title = 'small img')
      my_resized_image = resize(my_normal_image, anti_aliasing=True, output_shape=(14, 14))
      axs[1, 0].imshow(my_resized_image, cmap=plt.cm.binary)
      axs[0, 1].set(title='super resolution')
      my_super_res_image = model.predict(my_resized_image[np.newaxis, :, :, np.newaxis])[0, :, :, 0]
      axs[0, 1].imshow(my_super_res_image, cmap=plt.cm.binary)
      axs[1, 1].set(title='small resized')
      my_rr_image = resize(my_resized_image, output_shape=(28, 28), anti_aliasing=True)
      axs[1, 1].imshow(my_rr_image, cmap=plt.cm.binary)
      plt.show()

    index = 8
    plot_images(np.argwhere(y_test==index)[0][0])
    index = 4
    plot_images(np.argwhere(y_test==index)[0][0])

Además, aquí también es la forma de construir el conjunto de datos :

    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    train_normal_array = np.expand_dims(x_train, axis=3)
    test_normal_array = np.expand_dims(x_test, axis=3)
    train_small_array = np.zeros((train_normal_array.shape[0], 14, 14, 1))
    for i in tqdm.tqdm(range(train_normal_array.shape[0])):
      train_small_array[i, :, :] = resize(train_normal_array[i], (14, 14), anti_aliasing=True)
    test_small_array = np.zeros((test_normal_array.shape[0], 14, 14, 1))
    for i in tqdm.tqdm(range(test_normal_array.shape[0])):
      test_small_array[i, :, :] = resize(test_normal_array[i], (14, 14), anti_aliasing=True)
    training_data = []
    training_data.append([train_small_array.astype('float32'), train_normal_array.astype('float32') / 255])
    
    testing_data = []
    testing_data.append([test_small_array.astype('float32'), test_normal_array.astype('float32') / 255])

Tenga en cuenta que yo no dividir train_small_array y test_small_array por 255 como cambiar el tamaño hace el trabajo.

2021-11-20 15:58:35

Se agradece un montón que yo podría estar haciendo algo mal con mis pruebas. Puedes publicar el código que usas?
Noether

En otros idiomas

Esta página está en otros idiomas

Русский
..................................................................................................................
Italiano
..................................................................................................................
Polski
..................................................................................................................
Română
..................................................................................................................
한국어
..................................................................................................................
हिन्दी
..................................................................................................................
Français
..................................................................................................................
Türk
..................................................................................................................
Česk
..................................................................................................................
Português
..................................................................................................................
ไทย
..................................................................................................................
中文
..................................................................................................................
Slovenský
..................................................................................................................