Lung-and-colon-cancer-Classification2 / mobilenet-paper-code.ipynb
mobilenet-paper-code.ipynb
Raw

import os
import time
import shutil
import pathlib
import itertools
from PIL import Image

# Import data handling tools
import cv2
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

# Import deep learning libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import DenseNet121,InceptionV3,MobileNetV2
from tensorflow.keras.models import Model

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

print('Modules loaded')

# Generate data paths with labels
data_dir = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set'
filepaths = []
labels = []
folds = os.listdir(data_dir)

# Generate paths and labels
for fold in folds:
    foldpath = os.path.join(data_dir, fold)
    flist = os.listdir(foldpath)
    for f in flist:
        f_path = os.path.join(foldpath, f)
        filelist = os.listdir(f_path)
        for file in filelist:
            fpath = os.path.join(f_path, file)
            filepaths.append(fpath)
            if f == 'colon_aca':
                labels.append('Colon Adenocarcinoma')
            elif f == 'colon_n':
                labels.append('Colon Benign Tissue')
            elif f == 'lung_aca':
                labels.append('Lung Adenocarcinoma')
            elif f == 'lung_n':
                labels.append('Lung Benign Tissue')
            elif f == 'lung_scc':
                labels.append('Lung Squamous Cell Carcinoma')

# Concatenate data paths with labels into a DataFrame
df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})

# Split dataset into train, validation, and test sets
train_df, temp_df = train_test_split(df, train_size=0.8, stratify=df['labels'], random_state=42)
valid_df, test_df = train_test_split(temp_df, train_size=0.5, stratify=temp_df['labels'], random_state=42)

# Define image size, channels, and batch size
batch_size = 64
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

# Create ImageDataGenerator for training and validation
train_datagen = ImageDataGenerator()
valid_datagen = ImageDataGenerator()

train_gen = train_datagen.flow_from_dataframe(train_df, x_col='filepaths', y_col='labels',
                                              target_size=img_size, class_mode='categorical',
                                              batch_size=batch_size, shuffle=True)

valid_gen = valid_datagen.flow_from_dataframe(valid_df, x_col='filepaths', y_col='labels',
                                              target_size=img_size, class_mode='categorical',
                                              batch_size=batch_size, shuffle=True)

test_gen = valid_datagen.flow_from_dataframe(test_df, x_col='filepaths', y_col='labels',
                                             target_size=img_size, class_mode='categorical',
                                             batch_size=batch_size, shuffle=False)

# Get class names
num_classes = len(train_gen.class_indices)

# Define the model
#base_model = DenseNet121(input_shape=img_shape, include_top=False, weights='imagenet')
#base_model = InceptionV3(input_shape=img_shape, include_top=False, weights='imagenet')
#base_model = DenseNet121(input_shape=img_shape, include_top=False, weights='imagenet')
base_model = MobileNetV2(input_shape=img_shape, include_top=False, weights='imagenet')
base_model.trainable = True

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

model_DenseNet = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model_DenseNet.compile(optimizer=Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Define callbacks
callbacks = [
    ModelCheckpoint(filepath='best_model.keras', monitor='val_loss', save_best_only=True, verbose=1),
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
]

# Helper function to calculate metrics
def calculate_metrics(generator, model):
    preds = model.predict(generator)
    y_true = generator.classes
    y_pred = np.argmax(preds, axis=1)
    
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    
    return precision, recall, f1

# Train the model and calculate metrics for each epoch
class MetricsCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        # Training metrics
        train_precision, train_recall, train_f1 = calculate_metrics(train_gen, self.model)
        print(f'Epoch {epoch+1} Training Precision: {train_precision:.4f}, Recall: {train_recall:.4f}, F1 Score: {train_f1:.4f}')
        
        # Validation metrics
        val_precision, val_recall, val_f1 = calculate_metrics(valid_gen, self.model)
        print(f'Epoch {epoch+1} Validation Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1 Score: {val_f1:.4f}')

# Measure training time
start_time = time.time()

# Train the model with the custom metrics callback
history = model_DenseNet.fit(train_gen, validation_data=valid_gen, epochs=20, callbacks=[MetricsCallback()] + callbacks)

end_time = time.time()
training_time = end_time - start_time
print(f'Total Training Time: {training_time:.2f} seconds')

# Plot training history (accuracy and loss)
plt.figure(figsize=(12, 5))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Measure testing time
start_time = time.time()

# Evaluate on the test set
test_loss, test_acc = model_DenseNet.evaluate(test_gen)

end_time = time.time()
testing_time = end_time - start_time

print(f'Test Accuracy: {test_acc:.4f}')
print(f'Total Testing Time: {testing_time:.2f} seconds')

# Final metrics on the test set
test_precision, test_recall, test_f1 = calculate_metrics(test_gen, model_DenseNet)
print(f'Test Precision: {test_precision:.4f}, Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}')

Modules loaded
Found 20000 validated image filenames belonging to 5 classes.
Found 2500 validated image filenames belonging to 5 classes.
Found 2500 validated image filenames belonging to 5 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
9406464/9406464 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Epoch 1/20


WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1727871783.847757      65 service.cc:145] XLA service 0x7d57b4003da0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1727871783.847818      65 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


  1/313 ━━━━━━━━━━━━━━━━━━━━ 4:18:54 50s/step - accuracy: 0.1719 - loss: 2.0469

I0000 00:00:1727871805.732449      65 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


313/313 ━━━━━━━━━━━━━━━━━━━━ 97s 302ms/step
Epoch 1 Training Precision: 0.2057, Recall: 0.1981, F1 Score: 0.1714
40/40 ━━━━━━━━━━━━━━━━━━━━ 13s 326ms/step
Epoch 1 Validation Precision: 0.1902, Recall: 0.1996, F1 Score: 0.1713

Epoch 1: val_loss improved from inf to 1.60402, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 441s 1s/step - accuracy: 0.9419 - loss: 0.1607 - val_accuracy: 0.5972 - val_loss: 1.6040 - learning_rate: 0.0010
Epoch 2/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 97s 310ms/step
Epoch 2 Training Precision: 0.2205, Recall: 0.2004, F1 Score: 0.1462
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 290ms/step
Epoch 2 Validation Precision: 0.1512, Recall: 0.1956, F1 Score: 0.1424

Epoch 2: val_loss did not improve from 1.60402
313/313 ━━━━━━━━━━━━━━━━━━━━ 221s 699ms/step - accuracy: 0.9943 - loss: 0.0188 - val_accuracy: 0.5292 - val_loss: 4.7299 - learning_rate: 0.0010
Epoch 3/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 92s 294ms/step
Epoch 3 Training Precision: 0.2253, Recall: 0.1987, F1 Score: 0.1502
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 295ms/step
Epoch 3 Validation Precision: 0.2495, Recall: 0.1952, F1 Score: 0.1457

Epoch 3: val_loss did not improve from 1.60402
313/313 ━━━━━━━━━━━━━━━━━━━━ 254s 673ms/step - accuracy: 0.9974 - loss: 0.0083 - val_accuracy: 0.5976 - val_loss: 5.3271 - learning_rate: 0.0010
Epoch 4/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 95s 304ms/step
Epoch 4 Training Precision: 0.1962, Recall: 0.1968, F1 Score: 0.1750
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 291ms/step
Epoch 4 Validation Precision: 0.1914, Recall: 0.1996, F1 Score: 0.1751

Epoch 4: val_loss did not improve from 1.60402

Epoch 4: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
313/313 ━━━━━━━━━━━━━━━━━━━━ 214s 679ms/step - accuracy: 0.9978 - loss: 0.0066 - val_accuracy: 0.7340 - val_loss: 2.1683 - learning_rate: 0.0010
Epoch 5/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 95s 303ms/step
Epoch 5 Training Precision: 0.1973, Recall: 0.1976, F1 Score: 0.1940
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 295ms/step
Epoch 5 Validation Precision: 0.1991, Recall: 0.1936, F1 Score: 0.1921

Epoch 5: val_loss improved from 1.60402 to 0.60021, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 215s 682ms/step - accuracy: 0.9999 - loss: 7.2275e-04 - val_accuracy: 0.8860 - val_loss: 0.6002 - learning_rate: 2.0000e-04
Epoch 6/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 91s 291ms/step
Epoch 6 Training Precision: 0.2059, Recall: 0.2058, F1 Score: 0.2058
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 288ms/step
Epoch 6 Validation Precision: 0.1971, Recall: 0.1972, F1 Score: 0.1971

Epoch 6: val_loss improved from 0.60021 to 0.05372, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 261s 680ms/step - accuracy: 0.9996 - loss: 0.0010 - val_accuracy: 0.9856 - val_loss: 0.0537 - learning_rate: 2.0000e-04
Epoch 7/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 91s 289ms/step
Epoch 7 Training Precision: 0.1979, Recall: 0.1979, F1 Score: 0.1979
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 293ms/step
Epoch 7 Validation Precision: 0.2047, Recall: 0.2048, F1 Score: 0.2048

Epoch 7: val_loss improved from 0.05372 to 0.01571, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 213s 674ms/step - accuracy: 0.9999 - loss: 4.3082e-04 - val_accuracy: 0.9960 - val_loss: 0.0157 - learning_rate: 2.0000e-04
Epoch 8/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 91s 290ms/step
Epoch 8 Training Precision: 0.1968, Recall: 0.1968, F1 Score: 0.1968
40/40 ━━━━━━━━━━━━━━━━━━━━ 11s 275ms/step
Epoch 8 Validation Precision: 0.1907, Recall: 0.1908, F1 Score: 0.1908

Epoch 8: val_loss improved from 0.01571 to 0.01212, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 210s 664ms/step - accuracy: 0.9998 - loss: 4.0760e-04 - val_accuracy: 0.9968 - val_loss: 0.0121 - learning_rate: 2.0000e-04
Epoch 9/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 93s 298ms/step
Epoch 9 Training Precision: 0.1988, Recall: 0.1988, F1 Score: 0.1988
40/40 ━━━━━━━━━━━━━━━━━━━━ 13s 316ms/step
Epoch 9 Validation Precision: 0.1944, Recall: 0.1944, F1 Score: 0.1944

Epoch 9: val_loss improved from 0.01212 to 0.00903, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 213s 676ms/step - accuracy: 0.9999 - loss: 7.3630e-04 - val_accuracy: 0.9976 - val_loss: 0.0090 - learning_rate: 2.0000e-04
Epoch 10/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 90s 287ms/step
Epoch 10 Training Precision: 0.2003, Recall: 0.2003, F1 Score: 0.2003
40/40 ━━━━━━━━━━━━━━━━━━━━ 11s 275ms/step
Epoch 10 Validation Precision: 0.2016, Recall: 0.2016, F1 Score: 0.2016

Epoch 10: val_loss improved from 0.00903 to 0.00608, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 259s 664ms/step - accuracy: 0.9998 - loss: 5.2550e-04 - val_accuracy: 0.9976 - val_loss: 0.0061 - learning_rate: 2.0000e-04
Epoch 11/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 91s 292ms/step
Epoch 11 Training Precision: 0.1986, Recall: 0.1986, F1 Score: 0.1986
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 299ms/step
Epoch 11 Validation Precision: 0.2073, Recall: 0.2072, F1 Score: 0.2072

Epoch 11: val_loss did not improve from 0.00608
313/313 ━━━━━━━━━━━━━━━━━━━━ 214s 678ms/step - accuracy: 0.9999 - loss: 1.8689e-04 - val_accuracy: 0.9976 - val_loss: 0.0062 - learning_rate: 2.0000e-04
Epoch 12/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 92s 293ms/step
Epoch 12 Training Precision: 0.2001, Recall: 0.2001, F1 Score: 0.2001
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 289ms/step
Epoch 12 Validation Precision: 0.2096, Recall: 0.2096, F1 Score: 0.2096

Epoch 12: val_loss improved from 0.00608 to 0.00523, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 213s 673ms/step - accuracy: 1.0000 - loss: 9.4459e-05 - val_accuracy: 0.9980 - val_loss: 0.0052 - learning_rate: 2.0000e-04
Epoch 13/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 90s 288ms/step
Epoch 13 Training Precision: 0.2013, Recall: 0.2013, F1 Score: 0.2013
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 297ms/step
Epoch 13 Validation Precision: 0.1921, Recall: 0.1920, F1 Score: 0.1920

Epoch 13: val_loss did not improve from 0.00523
313/313 ━━━━━━━━━━━━━━━━━━━━ 211s 667ms/step - accuracy: 1.0000 - loss: 4.1392e-04 - val_accuracy: 0.9972 - val_loss: 0.0068 - learning_rate: 2.0000e-04
Epoch 14/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 89s 283ms/step
Epoch 14 Training Precision: 0.2014, Recall: 0.2014, F1 Score: 0.2014
40/40 ━━━━━━━━━━━━━━━━━━━━ 11s 279ms/step
Epoch 14 Validation Precision: 0.1985, Recall: 0.1984, F1 Score: 0.1984

Epoch 14: val_loss did not improve from 0.00523
313/313 ━━━━━━━━━━━━━━━━━━━━ 261s 663ms/step - accuracy: 1.0000 - loss: 1.4714e-04 - val_accuracy: 0.9972 - val_loss: 0.0084 - learning_rate: 2.0000e-04
Epoch 15/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 88s 281ms/step
Epoch 15 Training Precision: 0.1979, Recall: 0.1979, F1 Score: 0.1979
40/40 ━━━━━━━━━━━━━━━━━━━━ 11s 285ms/step
Epoch 15 Validation Precision: 0.2007, Recall: 0.2008, F1 Score: 0.2007

Epoch 15: val_loss did not improve from 0.00523

Epoch 15: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
313/313 ━━━━━━━━━━━━━━━━━━━━ 207s 654ms/step - accuracy: 0.9997 - loss: 5.8473e-04 - val_accuracy: 0.9972 - val_loss: 0.0067 - learning_rate: 2.0000e-04
Epoch 16/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 91s 292ms/step
Epoch 16 Training Precision: 0.1999, Recall: 0.1999, F1 Score: 0.1999
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 294ms/step
Epoch 16 Validation Precision: 0.1864, Recall: 0.1864, F1 Score: 0.1864

Epoch 16: val_loss improved from 0.00523 to 0.00509, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 212s 670ms/step - accuracy: 1.0000 - loss: 1.7205e-04 - val_accuracy: 0.9972 - val_loss: 0.0051 - learning_rate: 4.0000e-05
Epoch 17/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 87s 279ms/step
Epoch 17 Training Precision: 0.1956, Recall: 0.1956, F1 Score: 0.1956
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 300ms/step
Epoch 17 Validation Precision: 0.2056, Recall: 0.2056, F1 Score: 0.2056

Epoch 17: val_loss improved from 0.00509 to 0.00495, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 212s 670ms/step - accuracy: 1.0000 - loss: 1.1527e-04 - val_accuracy: 0.9972 - val_loss: 0.0050 - learning_rate: 4.0000e-05
Epoch 18/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 89s 283ms/step
Epoch 18 Training Precision: 0.1981, Recall: 0.1981, F1 Score: 0.1981
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 294ms/step
Epoch 18 Validation Precision: 0.1992, Recall: 0.1992, F1 Score: 0.1992

Epoch 18: val_loss improved from 0.00495 to 0.00400, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 208s 658ms/step - accuracy: 1.0000 - loss: 7.9588e-05 - val_accuracy: 0.9980 - val_loss: 0.0040 - learning_rate: 4.0000e-05
Epoch 19/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 92s 294ms/step
Epoch 19 Training Precision: 0.1988, Recall: 0.1988, F1 Score: 0.1988
40/40 ━━━━━━━━━━━━━━━━━━━━ 11s 286ms/step
Epoch 19 Validation Precision: 0.2100, Recall: 0.2100, F1 Score: 0.2100

Epoch 19: val_loss improved from 0.00400 to 0.00298, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 210s 666ms/step - accuracy: 1.0000 - loss: 7.2323e-05 - val_accuracy: 0.9988 - val_loss: 0.0030 - learning_rate: 4.0000e-05
Epoch 20/20
313/313 ━━━━━━━━━━━━━━━━━━━━ 89s 283ms/step
Epoch 20 Training Precision: 0.2003, Recall: 0.2003, F1 Score: 0.2003
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 288ms/step
Epoch 20 Validation Precision: 0.1832, Recall: 0.1832, F1 Score: 0.1832

Epoch 20: val_loss improved from 0.00298 to 0.00246, saving model to best_model.keras
313/313 ━━━━━━━━━━━━━━━━━━━━ 209s 663ms/step - accuracy: 1.0000 - loss: 4.7209e-05 - val_accuracy: 0.9992 - val_loss: 0.0025 - learning_rate: 4.0000e-05
Restoring model weights from the end of the best epoch: 20.
Total Training Time: 4658.02 seconds

png

40/40 ━━━━━━━━━━━━━━━━━━━━ 19s 468ms/step - accuracy: 1.0000 - loss: 1.4570e-04
Test Accuracy: 1.0000
Total Testing Time: 19.66 seconds
40/40 ━━━━━━━━━━━━━━━━━━━━ 12s 297ms/step
Test Precision: 1.0000, Recall: 1.0000, F1 Score: 1.0000