{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "5i4umWRLgpRk" }, "outputs": [], "source": [ "#Input Data\n", "#Uncompress the input variables .tar archieve \n", "#!tar xf var5.tar #Use this instead for five-variables models\n", "!tar xf var3.tar\n", "#!tar xf var2.tar #Use this instead for two-variables models\n", "\n", "#Uncompress the Land Use/Land Cover .tar archieve (Contains one file)\n", "!tar xf LULC.tar\n", "#Uncompress the Rainfall .tar archieve\n", "!tar xf Rainfall.tar\n", "\n", "\n", "#Output Data\n", "#Uncompress the Soil Moisture .tar archieve\n", "!tar xf NWM_OUT_SM.tar" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "W18FIggwwsT-" }, "outputs": [], "source": [ "#Runthis\n", "from osgeo import gdal\n", "import os\n", "import numpy as np\n", "\n", "#num_of_variables = 5 #Use this instead for five-variables models\n", "num_of_variables = 3 \n", "#num_of_variables = 2 #Use this instead for two-variables models\n", "\n", "number_of_sequences = (365 + 153)*8 - 2\n", "\n", "channels = num_of_variables + 2\n", "\n", "#Dimensions of the inut\n", "rows =65 \n", "columns = 55\n", "\n", "#Populate a list of input variables files\n", "filesList = sorted(os.listdir('./var'+ str(num_of_variables)))\n", "\n", "\n", "#filesList = filesList[0:-(num_of_variables*4)] #Use this instead for exclusive convLSTM sequence = 5 \n", "filesList = filesList[num_of_variables*2:-(num_of_variables*4)] #Use this instead for exclusive convLSTM sequence = 3 \n", "#filesList = filesList[num_of_variables*1:-(num_of_variables*3)] #Use this instead for Inclusive convLSTM sequence = 5 \n", "#filesList = filesList[num_of_variables*3:-(num_of_variables*3)] #Use this instead for Inclusive convLSTM sequence = 3\n", "\n", "#Populate a list of rainfall files\n", "RFFilesList = sorted(os.listdir('./Rainfall'))\n", "\n", "#RFFilesList = RFFilesList[1:-3] #Use this instead for exclusive convLSTM sequence = 5 \n", "RFFilesList = RFFilesList[3:-3] #Use this instead for exclusive convLSTM sequence = 3 \n", "#RFFilesList = RFFilesList[2:-2] #Use this instead for inclusive convLSTM sequence = 5 \n", "#RFFilesList = RFFilesList[4:-2] #Use this instead for inclusive convLSTM sequence = 3\n", "\n", "\n", "#Generate a complete list of inputs including Land use / Land Cover\n", "NewCompletelist = []\n", "for i in range(0,len(filesList),num_of_variables):\n", " for j in range(0,num_of_variables):\n", " NewCompletelist.append(filesList[i+j])\n", " NewCompletelist.append(RFFilesList[i//num_of_variables])\n", " NewCompletelist.append('LULC.tif_1km.tif')\n", "\n", "\n", "#A function to load input data\n", "def read_input_files(mylist):\n", " finallist=[]\n", " count =0\n", " for fn in (mylist):\n", " count =count +1\n", " if 'wrfsfcf' in fn:\n", " raster = gdal.Open(os.path.join('./var' + str(num_of_variables), fn))\n", " elif 'GaugeCorr' in fn:\n", " raster = gdal.Open(os.path.join('./Rainfall', fn))\n", " else:\n", " raster = gdal.Open(os.path.join('./LULC', fn))\n", " if raster is None:\n", " print ('Unable to open %s')\n", " break\n", " band = raster.GetRasterBand(1)\n", " array = band.ReadAsArray()\n", " finallist.append(array) \n", " return finallist\n", "\n", "#Allmerged contains all input data \n", "Allmerged = np.array(read_input_files(NewCompletelist))\n", "\n", "#Allmerged reshaped in the form: (number_of_entries, channels, 65, 55)\n", "length =(Allmerged.shape[0])//(channels)\n", "Allmerged = Allmerged.reshape((length,(channels ),*Allmerged.shape[-2:]))\n", "\n", "#Reshaping the input to be in the form: (number_of_entries, 65, 55, channels)\n", "Allmerged= np.moveaxis(Allmerged, 1, -1)\n", "print(Allmerged.shape)\n", "\n", "#Generate a list of input sequences\n", "new_final_sequences_list =[] \n", "\n", "#Use the following loop for sequence = 3\n", "for i in range(0,number_of_sequences*3,3): \n", " newarray=np.stack((Allmerged [i],Allmerged [i+1],Allmerged [i+2]),axis =0)\n", " new_final_sequences_list.append(newarray)\n", "\n", "#Instead Use the following loop for sequence = 5\n", "# for i in range(0,number_of_sequences*3,3): \n", "# newarray=np.stack((Allmerged [i],Allmerged [i+1],Allmerged[i+2],Allmerged[i+3],Allmerged[i+4]),axis =0)\n", "# new_final_sequences_list.append(newarray)\n", "\n", "#Convert the input sequences list into array\n", "X = np.array(new_final_sequences_list)\n", "\n", "#Performing Min-Max scalling for the input\n", "for index in range(0, channels):\n", " max = np.max(X[:,:,:,:,index])\n", " min = np.min(X[:,:,:,:,index])\n", " X[:,:,:,:,index] = (X[:,:,:,:,index] - min)/(max-min)\n", "\n", "print(X.shape) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "pejYK_emyl1i" }, "outputs": [], "source": [ "#A function to load output filenames and data\n", "def read_output_files(foldername):\n", " finallistnames = [] #A list to store output file names\n", " finallistdata=[] #A list to store output data\n", "\n", " path = './' + foldername\n", " for fn in sorted(os.listdir(path)):\n", " raster = gdal.Open(os.path.join(path, fn))\n", " if raster is None:\n", " print ('Unable to open %s')\n", " break\n", " band = raster.GetRasterBand(1)\n", " array = band.ReadAsArray()\n", " finallistnames.append(fn)\n", " finallistdata.append(array)\n", " return [finallistnames, finallistdata]\n", "\n", "SM_output_List = read_output_files('NWM_OUT_SM')\n", "\n", "#SMFileNames contains a list of all outputs filenames\n", "SMFileNames = SM_output_List[0][2:]\n", "\n", "#SMDataset contains an array of output data \n", "SMDataset = np.array(SM_output_List[1])[2:]\n", "\n", "#Removing negative values\n", "y = SMDataset\n", "y[y<0] =0\n", "\n", "#Reshaping the output as (number_of_entries, 65, 55, 1)\n", "y = np.expand_dims(y, axis=3)\n", "y.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "XnKa8yYKJycB" }, "outputs": [], "source": [ "#Defining the model\n", "from keras.models import Sequential\n", "from keras.layers.convolutional import Conv2D\n", "from keras.layers.convolutional_recurrent import ConvLSTM2D\n", "from keras.layers.normalization import BatchNormalization\n", "from keras.layers import Dense\n", "from keras import callbacks\n", "import numpy as np\n", "import pylab as plt \n", "\n", "\n", "model = Sequential()\n", "\n", "#First Layer\n", "model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same',\n", " return_sequences=True,\n", " input_shape=(None, rows , columns,channels)\n", " ))\n", "#batch-norm layer\n", "model.add(BatchNormalization())\n", "\n", "\n", "#Second Layer\n", "model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same',\n", " return_sequences=True\n", " )) \n", "#batch-norm layer\n", "model.add(BatchNormalization())\n", "\n", "#Third Layer\n", "model.add(ConvLSTM2D(filters=50, kernel_size=(3, 3), padding='same',\n", " return_sequences=True\n", " ))\n", "#batch-norm layer\n", "model.add(BatchNormalization())\n", "\n", "#Fourth Layer\n", "model.add(ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same',\n", " return_sequences=False\n", " ))\n", "#batch-norm layer\n", "model.add(BatchNormalization())\n", "\n", "#Final layer\n", "model.add(Conv2D(filters=1, kernel_size=(1, 1), \n", " activation='sigmoid',\n", " padding='same', \n", " ))\n", "#Defining loss function,optimizer and model compilation\n", "model.compile(loss='binary_crossentropy', optimizer='rmsprop')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "quiTV1SoJ7BE" }, "outputs": [], "source": [ "#Preparing training, validation and testing datasets\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "#Training+validation dataset\n", "X_reduced = X[0:3038]\n", "y_reduced = y[0:3038]\n", "\n", "#splitting into training dataset and validation dataset\n", "X_train, X_validate, y_train, y_validate = train_test_split(X_reduced, y_reduced, test_size=0.1, random_state=3)\n", "\n", "#Testing dataset\n", "X_test = X[3038:]\n", "y_test = y[3038:]\n", "\n", "#Defining\n", "callback = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)\n", "\n", "#Model fitting\n", "history=model.fit(X_train, y_train, batch_size=5, epochs=50,verbose=2, validation_data=(X_validate, y_validate),callbacks=[callback])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "0wECPa2TVKfE" }, "outputs": [], "source": [ "#Saving the trained model and its weights\n", "from keras.models import load_model\n", "\n", "model.save('ConvLSTM_model.h5') \n", "\n", "model.save_weights('ConvLSTM_model_weights.h5')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ppQmUxdVUAas" }, "outputs": [], "source": [ "#Generate predictions to the whole dataset (training + validation + test)\n", "\n", "predictions = model.predict(X)\n", "\n", "#Defining a function to convert a numpy array to raster format\n", "\n", "import osr\n", "\n", "def array2raster(newRasterfn,rasterfn,array):\n", " raster = gdal.Open(rasterfn)\n", " geotransform = raster.GetGeoTransform()\n", " originX = geotransform[0]\n", " originY = geotransform[3]\n", " pixelWidth = geotransform[1]\n", " pixelHeight = geotransform[5]\n", " cols = array.shape[1]\n", " rows = array.shape[0]\n", "\n", " driver = gdal.GetDriverByName('GTiff')\n", " outRaster = driver.Create(newRasterfn, cols, rows, 1, gdal.GDT_Float32)\n", " outRaster.SetGeoTransform((originX, pixelWidth, 0, originY, 0, pixelHeight))\n", " outband = outRaster.GetRasterBand(1)\n", " outband.WriteArray(array)\n", " outRasterSRS = osr.SpatialReference()\n", " outRasterSRS.ImportFromWkt(raster.GetProjectionRef())\n", " outRaster.SetProjection(outRasterSRS.ExportToWkt())\n", " outband.FlushCache()\n", "\n", "#create a folder to save the predictions in raster format\n", "!mkdir predicted\n", "\n", "for i in range (0,len(SMFileNames)):\n", " #read original output filename\n", " originalfilename = SMFileNames[i]\n", "\n", " sample_prediction = np.squeeze(predictions[i]) \n", "\n", " #Generate predicted output filename\n", " filename = originalfilename [0:-3] + \"predicted\"+ \".tif\"\n", " predictedpath = \"predicted/\"+ filename\n", " #save theoutput prediction in tif format\n", " array2raster(predictedpath,'/content/LULC/LULC.tif_1km.tif',sample_prediction)\n", "\n", "#Archieve all predictions in one .tar file\n", "!tar cf predicted_convLSTM.tar predicted" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "name": "ConvLSTM-Inclusive and Exclusive.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 1 }