Convlstm-SM / ConvLSTM-Inclusive and Exclusive.ipynb
ConvLSTM-Inclusive and Exclusive.ipynb
Raw
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "5i4umWRLgpRk"
   },
   "outputs": [],
   "source": [
    "#Input Data\n",
    "#Uncompress the input variables .tar archieve \n",
    "#!tar xf var5.tar #Use this instead for five-variables models\n",
    "!tar xf var3.tar\n",
    "#!tar xf var2.tar #Use this instead for two-variables models\n",
    "\n",
    "#Uncompress the Land Use/Land Cover .tar archieve (Contains one file)\n",
    "!tar xf LULC.tar\n",
    "#Uncompress the Rainfall  .tar archieve\n",
    "!tar xf Rainfall.tar\n",
    "\n",
    "\n",
    "#Output Data\n",
    "#Uncompress the Soil Moisture  .tar archieve\n",
    "!tar xf NWM_OUT_SM.tar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "W18FIggwwsT-"
   },
   "outputs": [],
   "source": [
    "#Runthis\n",
    "from osgeo import gdal\n",
    "import os\n",
    "import numpy as np\n",
    "\n",
    "#num_of_variables = 5  #Use this instead for five-variables models\n",
    "num_of_variables = 3    \n",
    "#num_of_variables = 2  #Use this instead for two-variables models\n",
    "\n",
    "number_of_sequences = (365 + 153)*8 - 2\n",
    "\n",
    "channels = num_of_variables + 2\n",
    "\n",
    "#Dimensions of the inut\n",
    "rows =65 \n",
    "columns = 55\n",
    "\n",
    "#Populate a list of input variables files\n",
    "filesList = sorted(os.listdir('./var'+ str(num_of_variables)))\n",
    "\n",
    "\n",
    "#filesList = filesList[0:-(num_of_variables*4)]                   #Use this instead for exclusive convLSTM sequence = 5 \n",
    "filesList = filesList[num_of_variables*2:-(num_of_variables*4)]   #Use this instead for exclusive convLSTM sequence = 3 \n",
    "#filesList = filesList[num_of_variables*1:-(num_of_variables*3)]  #Use this instead for Inclusive convLSTM sequence = 5 \n",
    "#filesList = filesList[num_of_variables*3:-(num_of_variables*3)]  #Use this instead for Inclusive convLSTM sequence = 3\n",
    "\n",
    "#Populate a list of rainfall files\n",
    "RFFilesList = sorted(os.listdir('./Rainfall'))\n",
    "\n",
    "#RFFilesList = RFFilesList[1:-3] #Use this instead for exclusive convLSTM sequence = 5 \n",
    "RFFilesList = RFFilesList[3:-3]  #Use this instead for exclusive convLSTM sequence = 3 \n",
    "#RFFilesList = RFFilesList[2:-2] #Use this instead for inclusive convLSTM sequence = 5 \n",
    "#RFFilesList = RFFilesList[4:-2] #Use this instead for inclusive convLSTM sequence = 3\n",
    "\n",
    "\n",
    "#Generate a complete list of inputs including Land use / Land Cover\n",
    "NewCompletelist = []\n",
    "for i in range(0,len(filesList),num_of_variables):\n",
    "    for j in range(0,num_of_variables):\n",
    "        NewCompletelist.append(filesList[i+j])\n",
    "    NewCompletelist.append(RFFilesList[i//num_of_variables])\n",
    "    NewCompletelist.append('LULC.tif_1km.tif')\n",
    "\n",
    "\n",
    "#A function to load input data\n",
    "def read_input_files(mylist):\n",
    "    finallist=[]\n",
    "    count =0\n",
    "    for fn in (mylist):\n",
    "        count =count +1\n",
    "        if 'wrfsfcf' in fn:\n",
    "            raster = gdal.Open(os.path.join('./var' + str(num_of_variables), fn))\n",
    "        elif 'GaugeCorr' in fn:\n",
    "            raster = gdal.Open(os.path.join('./Rainfall', fn))\n",
    "        else:\n",
    "            raster = gdal.Open(os.path.join('./LULC', fn))\n",
    "        if raster is None:\n",
    "            print ('Unable to open %s')\n",
    "            break\n",
    "        band = raster.GetRasterBand(1)\n",
    "        array = band.ReadAsArray()\n",
    "        finallist.append(array)   \n",
    "    return finallist\n",
    "\n",
    "#Allmerged contains all input data \n",
    "Allmerged = np.array(read_input_files(NewCompletelist))\n",
    "\n",
    "#Allmerged reshaped in the form: (number_of_entries, channels, 65, 55)\n",
    "length =(Allmerged.shape[0])//(channels)\n",
    "Allmerged = Allmerged.reshape((length,(channels ),*Allmerged.shape[-2:]))\n",
    "\n",
    "#Reshaping the input to be in the form: (number_of_entries, 65, 55, channels)\n",
    "Allmerged= np.moveaxis(Allmerged, 1, -1)\n",
    "print(Allmerged.shape)\n",
    "\n",
    "#Generate a list of input sequences\n",
    "new_final_sequences_list =[]  \n",
    "\n",
    "#Use the following loop for sequence = 3\n",
    "for i in range(0,number_of_sequences*3,3): \n",
    "    newarray=np.stack((Allmerged [i],Allmerged [i+1],Allmerged [i+2]),axis =0)\n",
    "    new_final_sequences_list.append(newarray)\n",
    "\n",
    "#Instead Use the following loop for sequence = 5\n",
    "# for i in range(0,number_of_sequences*3,3): \n",
    "#     newarray=np.stack((Allmerged [i],Allmerged [i+1],Allmerged[i+2],Allmerged[i+3],Allmerged[i+4]),axis =0)\n",
    "#     new_final_sequences_list.append(newarray)\n",
    "\n",
    "#Convert the input sequences list into array\n",
    "X = np.array(new_final_sequences_list)\n",
    "\n",
    "#Performing Min-Max scalling for the input\n",
    "for index in range(0, channels):\n",
    "    max = np.max(X[:,:,:,:,index])\n",
    "    min = np.min(X[:,:,:,:,index])\n",
    "    X[:,:,:,:,index] = (X[:,:,:,:,index] - min)/(max-min)\n",
    "\n",
    "print(X.shape) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "pejYK_emyl1i"
   },
   "outputs": [],
   "source": [
    "#A function to load output filenames and data\n",
    "def read_output_files(foldername):\n",
    "    finallistnames = []  #A list to store output file names\n",
    "    finallistdata=[]  #A list to store output data\n",
    "\n",
    "    path = './' + foldername\n",
    "    for fn in sorted(os.listdir(path)):\n",
    "        raster = gdal.Open(os.path.join(path, fn))\n",
    "        if raster is None:\n",
    "            print ('Unable to open %s')\n",
    "            break\n",
    "        band = raster.GetRasterBand(1)\n",
    "        array = band.ReadAsArray()\n",
    "        finallistnames.append(fn)\n",
    "        finallistdata.append(array)\n",
    "    return [finallistnames, finallistdata]\n",
    "\n",
    "SM_output_List = read_output_files('NWM_OUT_SM')\n",
    "\n",
    "#SMFileNames contains a list of all outputs filenames\n",
    "SMFileNames = SM_output_List[0][2:]\n",
    "\n",
    "#SMDataset contains an array of output data \n",
    "SMDataset = np.array(SM_output_List[1])[2:]\n",
    "\n",
    "#Removing negative values\n",
    "y = SMDataset\n",
    "y[y<0] =0\n",
    "\n",
    "#Reshaping the output as (number_of_entries, 65, 55, 1)\n",
    "y = np.expand_dims(y, axis=3)\n",
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "XnKa8yYKJycB"
   },
   "outputs": [],
   "source": [
    "#Defining the model\n",
    "from keras.models import Sequential\n",
    "from keras.layers.convolutional import Conv2D\n",
    "from keras.layers.convolutional_recurrent import ConvLSTM2D\n",
    "from keras.layers.normalization import BatchNormalization\n",
    "from keras.layers import Dense\n",
    "from keras import callbacks\n",
    "import numpy as np\n",
    "import pylab as plt  \n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "\n",
    "#First Layer\n",
    "model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same',\n",
    "                     return_sequences=True,\n",
    "                     input_shape=(None, rows , columns,channels)\n",
    "                     ))\n",
    "#batch-norm layer\n",
    "model.add(BatchNormalization())\n",
    "\n",
    "\n",
    "#Second Layer\n",
    "model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same',\n",
    "                     return_sequences=True\n",
    "                     )) \n",
    "#batch-norm layer\n",
    "model.add(BatchNormalization())\n",
    "\n",
    "#Third Layer\n",
    "model.add(ConvLSTM2D(filters=50, kernel_size=(3, 3), padding='same',\n",
    "                     return_sequences=True\n",
    "                     ))\n",
    "#batch-norm layer\n",
    "model.add(BatchNormalization())\n",
    "\n",
    "#Fourth Layer\n",
    "model.add(ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same',\n",
    "                     return_sequences=False\n",
    "                    ))\n",
    "#batch-norm layer\n",
    "model.add(BatchNormalization())\n",
    "\n",
    "#Final layer\n",
    "model.add(Conv2D(filters=1, kernel_size=(1, 1), \n",
    "                   activation='sigmoid',\n",
    "                   padding='same', \n",
    "                ))\n",
    "#Defining loss function,optimizer and model compilation\n",
    "model.compile(loss='binary_crossentropy', optimizer='rmsprop')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "quiTV1SoJ7BE"
   },
   "outputs": [],
   "source": [
    "#Preparing training, validation and testing datasets\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "#Training+validation dataset\n",
    "X_reduced = X[0:3038]\n",
    "y_reduced = y[0:3038]\n",
    "\n",
    "#splitting into training dataset and  validation dataset\n",
    "X_train, X_validate, y_train, y_validate = train_test_split(X_reduced, y_reduced,  test_size=0.1, random_state=3)\n",
    "\n",
    "#Testing dataset\n",
    "X_test = X[3038:]\n",
    "y_test = y[3038:]\n",
    "\n",
    "#Defining\n",
    "callback = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)\n",
    "\n",
    "#Model fitting\n",
    "history=model.fit(X_train, y_train, batch_size=5, epochs=50,verbose=2, validation_data=(X_validate, y_validate),callbacks=[callback])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "0wECPa2TVKfE"
   },
   "outputs": [],
   "source": [
    "#Saving the trained model and its weights\n",
    "from keras.models import load_model\n",
    "\n",
    "model.save('ConvLSTM_model.h5')  \n",
    "\n",
    "model.save_weights('ConvLSTM_model_weights.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ppQmUxdVUAas"
   },
   "outputs": [],
   "source": [
    "#Generate predictions to the whole dataset (training + validation + test)\n",
    "\n",
    "predictions = model.predict(X)\n",
    "\n",
    "#Defining a function to convert a numpy array to raster format\n",
    "\n",
    "import  osr\n",
    "\n",
    "def array2raster(newRasterfn,rasterfn,array):\n",
    "    raster = gdal.Open(rasterfn)\n",
    "    geotransform = raster.GetGeoTransform()\n",
    "    originX = geotransform[0]\n",
    "    originY = geotransform[3]\n",
    "    pixelWidth = geotransform[1]\n",
    "    pixelHeight = geotransform[5]\n",
    "    cols = array.shape[1]\n",
    "    rows = array.shape[0]\n",
    "\n",
    "    driver = gdal.GetDriverByName('GTiff')\n",
    "    outRaster = driver.Create(newRasterfn, cols, rows, 1, gdal.GDT_Float32)\n",
    "    outRaster.SetGeoTransform((originX, pixelWidth, 0, originY, 0, pixelHeight))\n",
    "    outband = outRaster.GetRasterBand(1)\n",
    "    outband.WriteArray(array)\n",
    "    outRasterSRS = osr.SpatialReference()\n",
    "    outRasterSRS.ImportFromWkt(raster.GetProjectionRef())\n",
    "    outRaster.SetProjection(outRasterSRS.ExportToWkt())\n",
    "    outband.FlushCache()\n",
    "\n",
    "#create a folder to save the predictions in raster format\n",
    "!mkdir predicted\n",
    "\n",
    "for i in range (0,len(SMFileNames)):\n",
    "  #read original output filename\n",
    "  originalfilename = SMFileNames[i]\n",
    "\n",
    "  sample_prediction = np.squeeze(predictions[i]) \n",
    "\n",
    "  #Generate predicted output filename\n",
    "  filename = originalfilename [0:-3] + \"predicted\"+ \".tif\"\n",
    "  predictedpath = \"predicted/\"+ filename\n",
    "  #save theoutput prediction in tif format\n",
    "  array2raster(predictedpath,'/content/LULC/LULC.tif_1km.tif',sample_prediction)\n",
    "\n",
    "#Archieve all predictions in one .tar file\n",
    "!tar cf  predicted_convLSTM.tar predicted"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "ConvLSTM-Inclusive and Exclusive.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}