{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5i4umWRLgpRk"
},
"outputs": [],
"source": [
"#Input Data\n",
"#Uncompress the input variables .tar archieve \n",
"#!tar xf var5.tar #Use this instead for five-variables models\n",
"!tar xf var3.tar\n",
"#!tar xf var2.tar #Use this instead for two-variables models\n",
"\n",
"#Uncompress the Land Use/Land Cover .tar archieve (Contains one file)\n",
"!tar xf LULC.tar\n",
"#Uncompress the Rainfall .tar archieve\n",
"!tar xf Rainfall.tar\n",
"\n",
"\n",
"#Output Data\n",
"#Uncompress the Soil Moisture .tar archieve\n",
"!tar xf NWM_OUT_SM.tar"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "W18FIggwwsT-"
},
"outputs": [],
"source": [
"#Runthis\n",
"from osgeo import gdal\n",
"import os\n",
"import numpy as np\n",
"\n",
"#num_of_variables = 5 #Use this instead for five-variables models\n",
"num_of_variables = 3 \n",
"#num_of_variables = 2 #Use this instead for two-variables models\n",
"\n",
"number_of_sequences = (365 + 153)*8 - 2\n",
"\n",
"number_of_all_possible_sequences = (365 + 153)*24 -8\n",
"\n",
"channels = num_of_variables + 2\n",
"\n",
"#Dimensions of the inut\n",
"rows =65 \n",
"columns = 55\n",
"\n",
"#Populate a list of input variables files\n",
"filesList = sorted(os.listdir('./var'+ str(num_of_variables)))\n",
"\n",
"\n",
"#filesList = filesList[0:-(num_of_variables*4)] #Use this instead for exclusive convLSTM sequence = 5 \n",
"filesList = filesList[num_of_variables*2:-(num_of_variables*4)] #Use this instead for exclusive convLSTM sequence = 3 \n",
"#filesList = filesList[num_of_variables*1:-(num_of_variables*3)] #Use this instead for Inclusive convLSTM sequence = 5 \n",
"#filesList = filesList[num_of_variables*3:-(num_of_variables*3)] #Use this instead for Inclusive convLSTM sequence = 3\n",
"\n",
"#Populate a list of rainfall files\n",
"RFFilesList = sorted(os.listdir('./Rainfall'))\n",
"\n",
"#RFFilesList = RFFilesList[1:-3] #Use this instead for exclusive convLSTM sequence = 5 \n",
"RFFilesList = RFFilesList[3:-3] #Use this instead for exclusive convLSTM sequence = 3 \n",
"#RFFilesList = RFFilesList[2:-2] #Use this instead for inclusive convLSTM sequence = 5 \n",
"#RFFilesList = RFFilesList[4:-2] #Use this instead for inclusive convLSTM sequence = 3\n",
"\n",
"\n",
"#Generate a complete list of inputs including Land use / Land Cover\n",
"NewCompletelist = []\n",
"for i in range(0,len(filesList),num_of_variables):\n",
" for j in range(0,num_of_variables):\n",
" NewCompletelist.append(filesList[i+j])\n",
" NewCompletelist.append(RFFilesList[i//num_of_variables])\n",
" NewCompletelist.append('LULC.tif_1km.tif')\n",
"\n",
"\n",
"#A function to load input data\n",
"def read_input_files(mylist):\n",
" finallist=[]\n",
" count =0\n",
" for fn in (mylist):\n",
" count =count +1\n",
" if 'wrfsfcf' in fn:\n",
" raster = gdal.Open(os.path.join('./var' + str(num_of_variables), fn))\n",
" elif 'GaugeCorr' in fn:\n",
" raster = gdal.Open(os.path.join('./Rainfall', fn))\n",
" else:\n",
" raster = gdal.Open(os.path.join('./LULC', fn))\n",
" if raster is None:\n",
" print ('Unable to open %s')\n",
" break\n",
" band = raster.GetRasterBand(1)\n",
" array = band.ReadAsArray()\n",
" finallist.append(array) \n",
" return finallist\n",
"\n",
"#Allmerged contains all input data \n",
"Allmerged = np.array(read_input_files(NewCompletelist))\n",
"\n",
"#Allmerged reshaped in the form: (number_of_entries, channels, 65, 55)\n",
"length =(Allmerged.shape[0])//(channels)\n",
"Allmerged = Allmerged.reshape((length,(channels ),*Allmerged.shape[-2:]))\n",
"\n",
"#Reshaping the input to be in the form: (number_of_entries, 65, 55, channels)\n",
"Allmerged= np.moveaxis(Allmerged, 1, -1)\n",
"print(Allmerged.shape)\n",
"\n",
"#Generate a list of input sequences\n",
"new_final_sequences_list =[] \n",
"\n",
"#Use the following loop for sequence = 3\n",
"for i in range(0,number_of_all_sequences): \n",
" newarray=np.stack((Allmerged [i],Allmerged [i+1],Allmerged [i+2]),axis =0)\n",
" new_final_sequences_list.append(newarray)\n",
"\n",
"#Instead Use the following loop for sequence = 5\n",
"# for i in range(0,number_of_all_sequences): \n",
"# newarray=np.stack((Allmerged [i],Allmerged [i+1],Allmerged[i+2],Allmerged[i+3],Allmerged[i+4]),axis =0)\n",
"# new_final_sequences_list.append(newarray)\n",
"\n",
"\n",
"#Convert the input sequences list into array\n",
"X = np.array(new_final_sequences_list)\n",
"\n",
"#Performing Min-Max scalling for the input\n",
"for index in range(0, channels):\n",
" max = np.max(X[:,:,:,:,index])\n",
" min = np.min(X[:,:,:,:,index])\n",
" X[:,:,:,:,index] = (X[:,:,:,:,index] - min)/(max-min)\n",
"\n",
"print(X.shape) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pejYK_emyl1i"
},
"outputs": [],
"source": [
"#A function to read and generate output filenames \"following the same existing naming conventions\"\n",
"def read_generate_output_files(foldername):\n",
" finallistnames = []\n",
" \n",
" path = './' + foldername\n",
" \n",
" for fn in sorted(os.listdir(path)):\n",
" splitted = fn.split('.',1)\n",
" first_split = splitted[0]\n",
" second_split = splitted[1]\n",
" \n",
" file_name_first_part = first_split [0:8]\n",
" file_name_second_part = first_split [8:10]\n",
" \n",
" finallistnames.append(fn)\n",
" \n",
" base = int(file_name_second_part)\n",
" base_next = base + 1\n",
" base_next_next = base + 2\n",
" if base_next < 10:\n",
" base_next = \"0\"+ str(base_next)\n",
" \n",
" if base_next_next < 10:\n",
" base_next_next = \"0\"+ str(base_next_next)\n",
" \n",
" file_2 = file_name_first_part + str(base_next) + \"00.\" + second_split\n",
" finallistnames.append(file_2)\n",
" \n",
" file_3 = file_name_first_part + str(base_next_next) + \"00.\" + second_split\n",
" finallistnames.append(file_3) \n",
" return finallistnames\n",
"\n",
"#Read and generate soil moisture output file names\n",
"SMFileNames = read_generate_output_files('NWM_OUT_SM')\n",
"SMFileNames = SMFileNames[6:-2]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fsyTJkD5mpN3"
},
"outputs": [],
"source": [
"#Load a previously saved model\n",
"from keras.models import load_model\n",
"\n",
"model = load_model('ConvLSTM_model.h5')\n",
"\n",
"print(model.summary())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ppQmUxdVUAas"
},
"outputs": [],
"source": [
"#Generate predictions to the whole dataset \n",
"\n",
"predictions = model.predict(X)\n",
"\n",
"#Defining a function to convert a numpy array to raster format\n",
"\n",
"import osr\n",
"\n",
"def array2raster(newRasterfn,rasterfn,array):\n",
" raster = gdal.Open(rasterfn)\n",
" geotransform = raster.GetGeoTransform()\n",
" originX = geotransform[0]\n",
" originY = geotransform[3]\n",
" pixelWidth = geotransform[1]\n",
" pixelHeight = geotransform[5]\n",
" cols = array.shape[1]\n",
" rows = array.shape[0]\n",
"\n",
" driver = gdal.GetDriverByName('GTiff')\n",
" outRaster = driver.Create(newRasterfn, cols, rows, 1, gdal.GDT_Float32)\n",
" outRaster.SetGeoTransform((originX, pixelWidth, 0, originY, 0, pixelHeight))\n",
" outband = outRaster.GetRasterBand(1)\n",
" outband.WriteArray(array)\n",
" outRasterSRS = osr.SpatialReference()\n",
" outRasterSRS.ImportFromWkt(raster.GetProjectionRef())\n",
" outRaster.SetProjection(outRasterSRS.ExportToWkt())\n",
" outband.FlushCache()\n",
"\n",
"#create a folder to save the predictions in raster format\n",
"!mkdir predicted\n",
"\n",
"for i in range (0,len(SMFileNames)):\n",
" #read original output filename\n",
" originalfilename = SMFileNames[i]\n",
"\n",
" sample_prediction = np.squeeze(predictions[i]) \n",
"\n",
" #Generate predicted output filename\n",
" filename = originalfilename [0:-3] + \"predicted\"+ \".tif\"\n",
" predictedpath = \"predicted/\"+ filename\n",
" #save theoutput prediction in tif format\n",
" array2raster(predictedpath,'/content/LULC/LULC.tif_1km.tif',sample_prediction)\n",
"\n",
"#Archieve all predictions in one .tar file\n",
"!tar cf All_sequences_predicted_convLSTM.tar predicted"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"name": "Generate-Sequences-ConvLSTM-Inclusive and Exclusive.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 1
}