We have to make a model which translate Italian to English
1. Download the Italian to English translation dataset from here 2. Preprocess that data. 3. Encoder and Decoder architecture with Encoder - with 1 layer LSTM Decoder - with 1 layer LSTM attention - 4. In Global attention, we have 3 types of scoring functions. As a part of this assignment you need to create 3 models for each scoring function.In model 1 you need to implemnt "dot" score function In model 3 you need to implemnt "concat" score function 5. Using attention weights, we have plot the attention plots. 6. BLEU score as metric to evaluate the model and SparseCategoricalCrossentropy as a loss.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import re
import tensorflow as tf
from tqdm import tqdm
import math
import os
import time
import matplotlib.ticker as ticker
import random
import nltk.translate.bleu_score as bleu
from sklearn.model_selection import train_test_split
import joblib
import pickle
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding,Flatten,Dense,Concatenate,BatchNormalization,Dropout,Conv2D,Conv1D,MaxPooling1D,LSTM,Softmax,GRU
from tensorflow.keras.models import Model
%load_ext tensorboard
from google.colab import drive
drive.mount('/content/drive/')
txt=open('/content/drive/My Drive/seq2seq/ita-eng/Ita.txt','r')
d=txt.readlines()
def pre_txt(data):
eng=[]
ita=[]
for i in tqdm(data):
u=i.lower()
u=re.sub(r"'m", ' am', u)
u=re.sub(r"'ll", ' will', u)
u=re.sub(r"'d", ' had', u)
u=re.sub(r"'s", ' is', u)
u=re.sub(r"'ve", ' have', u)
u=re.sub(r"'re", ' are', u)
u=re.sub(r"won't", 'would not', u)
u=re.sub(r"can't", 'can not', u)
u=re.sub(r"o'clock", '', u)
u=re.sub(r"n't", ' not ', u)#"haven't", ' don't
u=re.sub(r"([?.!,¿])", r" \1 ", u)
u=u.split('\t')
p= re.sub(r"[^a-zA-Z?.!,¿]+", " ", u[0])
q= re.sub(r"[^a-zA-Z?.!,¿]+", " ", u[1])
eng_inp='<sos> ' + p + '<eos>'
ita_inp='<sos> ' + q + '<eos>'
if ita_inp.split('<eos>')[0][-1].isalpha()==True:
ita_inp=ita_inp.replace('<eos>',' <eos>')
eng.append(eng_inp)
ita.append(ita_inp)
return eng,ita
eng_txt,ita_txt=np.array(pre_txt(d))
eng_txt.shape,ita_txt.shape
counts, bin_edges = np.histogram([len(i.split(' ')) for i in ita_txt], bins=18,density = True,)
pdf = counts/(sum(counts))
print('pdf : ',pdf,'\n');
print('bin edge : ',bin_edges,'\n')
cdf = np.cumsum(pdf)
plt.figure(figsize=(8,6))
plt.plot(bin_edges[1:],pdf,label='Histogram of Italian Text')
plt.plot(bin_edges[1:], cdf,label='Cumulative distribution of Italian Text')
plt.title('histogram and cumulative distribution of Italian Text')
plt.legend()
plt.grid()
c=0
q=[]
for i in pdf:
c=c+i
q.append(c)
print('outlier : ',q)
counts, bin_edges = np.histogram([len(i.split(' ')) for i in eng_txt], bins=18,density = True,)
pdf = counts/(sum(counts))
print('pdf : ',pdf,'\n');
print('bin edge : ',bin_edges,'\n')
cdf = np.cumsum(pdf)
plt.figure(figsize=(8,6))
plt.plot(bin_edges[1:],pdf,label='Histogram of Italian Text')
plt.plot(bin_edges[1:], cdf,label='Cumulative distribution of Italian Text')
plt.title('histogram and cumulative distribution of Italian Text')
plt.legend()
plt.grid()
c=0
q=[]
for i in pdf:
c=c+i
q.append(c)
print('outlier : ',q)
def tokenize(lang):
lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
lang_tokenizer.fit_on_texts(lang)
tensor = lang_tokenizer.texts_to_sequences(lang)
tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
padding='post')
print(len(lang_tokenizer.word_index)+1)
#len(tok.word_index) + 1
return tensor, lang_tokenizer
ita_txt_new=[]
eng_txt_new=[]
for t,e in zip(ita_txt,eng_txt):
if len(t.split(' '))<=16 and len(e.split(' '))<=16:
eng_txt_new.append(e)
ita_txt_new.append(t)
eng_txt=eng_txt_new
ita_txt=ita_txt_new
input_tensor, inp_lang_tokenizer = tokenize(ita_txt)
target_tensor, targ_lang_tokenizer = tokenize(eng_txt)
print('input shape : ',input_tensor.shape)
print('target shape : ',target_tensor.shape)
pickle.dump(input_tensor, open('/content/drive/My Drive/seq2seq/input_tensor', 'wb'))
pickle.dump(input_tensor, open('/content/drive/My Drive/seq2seq/target_tensor', 'wb'))
pickle.dump(inp_lang_tokenizer, open('/content/drive/My Drive/seq2seq/inp_lang_tokenizer', 'wb'))
pickle.dump(targ_lang_tokenizer, open('/content/drive/My Drive/seq2seq/targ_lang_tokenizer', 'wb'))
input_tensor=pickle.load(open('/content/drive/My Drive/seq2seq/input_tensor', 'rb'))
target_tensor=pickle.load(open('/content/drive/My Drive/seq2seq/target_tensor', 'rb'))
inp_lang_tokenizer=pickle.load(open('/content/drive/My Drive/seq2seq/inp_lang_tokenizer', 'rb'))
targ_lang_tokenizer=pickle.load(open('/content/drive/My Drive/seq2seq/targ_lang_tokenizer', 'rb'))
print('input shape : ',input_tensor.shape)
print('target shape : ',target_tensor.shape)
decoder_input_target_tensor=[]
for i in target_tensor:
i=list(i)
if 1 in i:
i.remove(2)
i.append(0)
decoder_input_target_tensor.append(i)
decoder_input_target_tensor=np.array(decoder_input_target_tensor)
decoder_output_target_tensor=[]
for i in target_tensor:
i=list(i)
if 1 in i:
i.remove(1)
i.append(0)
decoder_output_target_tensor.append(i)
decoder_output_target_tensor=np.array(decoder_output_target_tensor)
input_tensor_train, input_tensor_val,decoder_input_target_tensor_train,decoder_input_target_tensor_val ,decoder_output_target_tensor_train, decoder_output_target_tensor_val, = train_test_split(input_tensor, decoder_input_target_tensor,decoder_output_target_tensor, test_size=0.18,random_state=42)
print('train input size : ',input_tensor_train.shape)
print('train input-output size : ',decoder_input_target_tensor_train.shape)
print('train output-output size : ',decoder_output_target_tensor_train.shape)
print('train input size : ',input_tensor_val.shape)
print('train input-output size : ',decoder_input_target_tensor_val.shape)
print('train output-output size : ',decoder_output_target_tensor_val.shape)
def convert(lang, tensor):
for t in tensor:
if t!=0:
print ("%d ----> %s" % (t, lang.index_word[t]))
print ("Input Language; index to word mapping")
convert(inp_lang_tokenizer, input_tensor[10])
print()
print ("Target input Language; index to word mapping")
convert(targ_lang_tokenizer, decoder_input_target_tensor[10])
print()
print ("Target output Language; index to word mapping")
convert(targ_lang_tokenizer, decoder_output_target_tensor[10])
s=273552
input_tensor_train=input_tensor_train[:s]#269280
decoder_input_target_tensor_train=decoder_input_target_tensor_train[:s]
decoder_output_target_tensor_train=decoder_output_target_tensor_train[:s]
print('train input size : ',input_tensor_train.shape)
print('train input-output size : ',decoder_input_target_tensor_train.shape)
print('train output-output size : ',decoder_output_target_tensor_train.shape)
p=60048
input_tensor_val=input_tensor_val[:p]#67296
decoder_input_target_tensor_val=decoder_input_target_tensor_val[:p]
decoder_output_target_tensor_val=decoder_output_target_tensor_val[:p]
print('train input size : ',input_tensor_val.shape)
print('train input-output size : ',decoder_input_target_tensor_val.shape)
print('train output-output size : ',decoder_output_target_tensor_val.shape)
class Encoder(tf.keras.Model):
def __init__(self,vocab_size,embedding_size,lstm_size,input_length):
super().__init__()
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.input_length = input_length
self.lstm_size= lstm_size
self.lstm_output = 0
self.state_h=0
self.state_c=0
self.embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embedding_size, input_length=self.input_length)
self.lstm = LSTM(self.lstm_size, return_state=True, return_sequences=True, name="Encoder_LSTM")
def call(self,input_sequence,states):
input_embedd = self.embedding(input_sequence)
self.lstm_output, self.lstm_state_h,self.lstm_state_c = self.lstm(input_embedd,initial_state = states)
return self.lstm_output, self.lstm_state_h,self.lstm_state_c
def initialize_states(self,batch_size):
return tf.zeros((batch_size, self.lstm_size)),tf.zeros((batch_size, self.lstm_size))
def grader_check_encoder():
vocab_size=12
embedding_size=20
lstm_size=32
input_length=8
batch_size=16
encoder=Encoder(vocab_size,embedding_size,lstm_size,input_length)
input_sequence=tf.random.uniform(shape=[batch_size,input_length],maxval=vocab_size,minval=0,dtype=tf.int32)
initial_state=encoder.initialize_states(batch_size)
print
encoder_output,state_h,state_c=encoder(input_sequence,initial_state)
assert(encoder_output.shape==(batch_size,input_length,lstm_size) and state_h.shape==(batch_size,lstm_size) and state_c.shape==(batch_size,lstm_size))
return True
print(grader_check_encoder())
class Attention(tf.keras.Model):
def __init__(self,scoring_function, att_units):
super().__init__()
self.scoring_function=scoring_function
self.att_units=att_units
self.softmax=Softmax()
if self.scoring_function=='dot':
pass
elif scoring_function == 'concat':
self.W = tf.keras.layers.Dense(att_units,activation='relu',kernel_initializer='he_uniform')
self.V = tf.keras.layers.Dense(1)
def call(self,decoder_hidden_state,encoder_output):
'''
Attention mechanism takes two inputs current step -- decoder_hidden_state and all the encoder_outputs.
* Based on the scoring function we will find the score or similarity between decoder_hidden_state and encoder_output.
Multiply the score function with your encoder_outputs to get the context vector.
Function returns context vector and attention weights(softmax - scores)
'''
if self.scoring_function == 'dot':
state_h=decoder_hidden_state
state= tf.expand_dims(state_h, 1)
prob=[]
for i in range(encoder_output.shape[0]):
eo=tf.transpose(encoder_output[i])
dot=tf.matmul(state[i],eo)
soft_out=self.softmax(dot[0])
prob.append(soft_out)
attention_weights=tf.reshape(tf.convert_to_tensor(prob),(encoder_output.shape[0],encoder_output.shape[1],1))
context_vector=attention_weights * encoder_output
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector,attention_weights
elif self.scoring_function == 'concat':
state = tf.expand_dims(decoder_hidden_state, 1)
state= tf.tile(state,[1,encoder_output.shape[1],1])
score=self.V(tf.nn.tanh(self.W(tf.concat([encoder_output,state],axis=-1))))
score=tf.transpose(score,[0,2,1])
attention_weights = tf.nn.softmax(score,axis=2)
context_vector = tf.matmul(attention_weights , encoder_output)
context_vector=tf.reshape(context_vector,shape=(context_vector.shape[0],context_vector.shape[2]))
attention_weights=tf.reshape(attention_weights,shape=(attention_weights.shape[0],attention_weights.shape[2],attention_weights.shape[1]))
return context_vector, attention_weights
def grader_check_attention(scoring_fun):
input_length=10
batch_size=16
att_units=32
state_h=tf.random.uniform(shape=[batch_size,att_units])
encoder_output=tf.random.uniform(shape=[batch_size,input_length,att_units])
attention=Attention(scoring_fun,att_units)
context_vector,attention_weights=attention(state_h,encoder_output)
assert(context_vector.shape==(batch_size,att_units) and attention_weights.shape==(batch_size,input_length,1))
return True
print(grader_check_attention('dot'))
print(grader_check_attention('concat'))
class One_Step_Decoder(tf.keras.Model):
def __init__(self,tar_vocab_size, embedding_dim, input_length, dec_units ,score_fun ,att_units):
# Initialize decoder embedding layer, LSTM and any other objects needed
super().__init__()
self.tar_vocab_size = tar_vocab_size
self.embedding_dim = embedding_dim
self.input_length = input_length
self.dec_units= dec_units
self.score_fun = score_fun
self.att_units=att_units
self.attention=Attention(score_fun,att_units)
self.softmax=Softmax()
self.dense=Dense(self.tar_vocab_size)
self.embedding = Embedding(input_dim=self.tar_vocab_size, output_dim=self.embedding_dim,
input_length=1)
self.lstm = LSTM(self.dec_units, return_state=True, return_sequences=True, name="Encoder_LSTM")
def call(self,input_to_decoder, encoder_output, state_h,state_c):
#A
emb=self.embedding(input_to_decoder)
#B
context_vector,attention_weights=self.attention(state_h,encoder_output)
context_vector=tf.expand_dims(context_vector,1)
#C
con=Concatenate()([emb,context_vector])
#D
decoder_out,hidden_state,cell_state=self.lstm(con,initial_state = [state_h,state_c])
dense_out=self.dense(decoder_out)
return tf.reshape(dense_out,(dense_out.shape[0],dense_out.shape[2])),hidden_state,cell_state,attention_weights,tf.reshape(context_vector,(context_vector.shape[0],context_vector.shape[2]))
#One step decoder mechanisim step by step:
#A. Pass the input_to_decoder to the embedding layer and then get the output(1,1,embedding_dim)
#B. Using the encoder_output and decoder hidden state, compute the context vector.
#C. Concat the context vector with the step A output
#D. Pass the Step-C output to LSTM/GRU and get the decoder output and states(hidden and cell state)
#E. Pass the decoder output to dense layer(vocab size) and store the result into output.
#F. Return the states from step D, output from Step E, attention weights from Step -B
def grader_onestepdecoder(score_fun):
vocab_size=13
embedding_dim=12
input_length=10
dec_units=16
att_units=16
batch_size=32
onestepdecoder=One_Step_Decoder(vocab_size, embedding_dim, input_length, dec_units ,score_fun ,att_units)
input_to_decoder=tf.random.uniform(shape=(batch_size,1),maxval=10,minval=0,dtype=tf.int32)
encoder_output=tf.random.uniform(shape=[batch_size,input_length,dec_units])
state_h=tf.random.uniform(shape=[batch_size,dec_units])
state_c=tf.random.uniform(shape=[batch_size,dec_units])
output,state_h,state_c,attention_weights,context_vector=onestepdecoder(input_to_decoder,encoder_output,state_h,state_c)
assert(output.shape==(batch_size,vocab_size))
assert(state_h.shape==(batch_size,dec_units))
assert(state_c.shape==(batch_size,dec_units))
assert(attention_weights.shape==(batch_size,input_length,1))
assert(context_vector.shape==(batch_size,dec_units))
return True
print(grader_onestepdecoder('dot'))
print(grader_onestepdecoder('concat'))
class Decoder(tf.keras.Model):
def __init__(self,out_vocab_size, embedding_dim, output_length, dec_units ,score_fun ,att_units,input_length):
super().__init__()
self.onestepDecoder=One_Step_Decoder(out_vocab_size, embedding_dim, input_length, dec_units ,score_fun ,att_units)
def call(self, input_to_decoder,encoder_output,decoder_hidden_state,decoder_cell_state ):
#Initialize an empty Tensor array, that will store the outputs at each and every time step
all_outputs=tf.TensorArray(tf.float32,size=tf.shape(input_to_decoder)[1], name='output_array')
for timestep in range(0,tf.shape(input_to_decoder)[1]):
output,decoder_hidden_state,decoder_cell_state,_,_=self.onestepDecoder(input_to_decoder[:,timestep:timestep+1],encoder_output,decoder_hidden_state,decoder_cell_state)
#storing the one step decoder outputs to the tensor array
all_outputs=all_outputs.write(timestep,output)
all_outputs=tf.transpose(all_outputs.stack(), [1,0,2])
return all_outputs
def grader_decoder(score_fun):
out_vocab_size=13
embedding_dim=12
input_length=10
output_length=11
dec_units=16
att_units=16
batch_size=32
target_sentences=tf.random.uniform(shape=(batch_size,output_length),maxval=10,minval=0,dtype=tf.int32)
encoder_output=tf.random.uniform(shape=[batch_size,input_length,dec_units])
state_h=tf.random.uniform(shape=[batch_size,dec_units])
state_c=tf.random.uniform(shape=[batch_size,dec_units])
decoder=Decoder(out_vocab_size, embedding_dim, output_length, dec_units ,score_fun ,att_units,input_length)
output=decoder(target_sentences,encoder_output, state_h,state_c)
assert(output.shape==(batch_size,output_length,out_vocab_size))#(32,11,13)
return True
print(grader_decoder('dot'))
print(grader_decoder('concat'))
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction='none')
def loss_function(real, pred):
mask = tf.math.logical_not(tf.math.equal(real, 0))
loss_ = loss_object(real, pred)
mask = tf.cast(mask, dtype=loss_.dtype)
loss_ *= mask
return tf.reduce_mean(loss_)
class encoder_decoder(tf.keras.Model):
def __init__(self,vocab_inp_size,embedding_size,lstm_units,input_length,batch_size,vocab_tar_size,output_length,scoring_fun):
super().__init__()
#Intialize objects from encoder decoder
#1
self.encoder=Encoder(vocab_inp_size,embedding_size,lstm_units,input_length)
self.initial_state=self.encoder.initialize_states(batch_size)
#2
self.decoder=Decoder(vocab_tar_size, embedding_size, output_length, lstm_units ,scoring_fun ,lstm_units,input_length)
def call(self,input):
input_sequence=input[0]
target_sentences=input[1]
encoder_output,state_h,state_c=self.encoder(input_sequence,self.initial_state)
output=self.decoder(target_sentences,encoder_output, state_h,state_c)
return output
max_length_inp=input_tensor.shape[1]
max_length_targ=target_tensor.shape[1]
def preprocess_sentence(data):
u=data.lower()
u=re.sub(r"'m", ' am', u)
u=re.sub(r"'ll", ' will', u)
u=re.sub(r"'d", ' had', u)
u=re.sub(r"'s", ' is', u)
u=re.sub(r"'ve", ' have', u)
u=re.sub(r"'re", ' are', u)
u=re.sub(r"won't", 'would not', u)
u=re.sub(r"can't", 'can not', u)
u=re.sub(r"o'clock", '', u)
u=re.sub(r"n't", ' not ', u)#"haven't", ' don't
u=re.sub(r"([?.!,¿])", r" \1 ", u)
#u=re.sub(r'[^a-zA_Z0-9]',' ',u)
q= re.sub(r"[^a-zA-Z?.!,¿]+", " ", u)
sen='<sos> ' + q + '<eos>'
return sen
def evaluate(sentence):
attention_plot = np.zeros((max_length_targ, max_length_inp))
sentence = preprocess_sentence(sentence)
inputs = [inp_lang_tokenizer.word_index[i] for i in sentence.split(' ')]
inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
maxlen=max_length_inp,
padding='post')
inputs = tf.convert_to_tensor(inputs)
result = ''
enc_hidden = encoder.initialize_states(1)
enc_output, enc_hidden,enc_cell = model.layers[0](inputs, enc_hidden)
dec_hidden = enc_hidden
dec_cell=enc_cell
dec_input = tf.expand_dims([targ_lang_tokenizer.word_index['<sos>']], 0)
for t in range(max_length_targ):
predictions, dec_hidden, dec_cell,attention_weights,context_vector = model.layers[1].onestepDecoder(dec_input,enc_output,dec_hidden,dec_cell,training=False)
# storing the attention weights to plot later on
attention_weights = tf.reshape(attention_weights, (-1, ))
attention_plot[t] = attention_weights.numpy()
predicted_id = tf.argmax(predictions[0]).numpy()
result += targ_lang_tokenizer.index_word[predicted_id] + ' '
if targ_lang_tokenizer.index_word[predicted_id] == '<eos>':
return result, sentence, attention_plot
# the predicted ID is fed back into the model
dec_input = tf.expand_dims([predicted_id], 0)
return result, sentence, attention_plot
def plot_attention(attention, sentence, predicted_sentence):
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(1, 1, 1)
ax.matshow(attention, cmap='gray')
fontdict = {'fontsize': 14}
ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90)
ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
plt.show()
def translate(sentence):
result, sentence, attention_plot = evaluate(sentence)
print('Input: %s' % (sentence))
print('Predicted translation: {}'.format(result))
attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))]
plot_attention(attention_plot, sentence.split(' '), result.split(' '))
def tok2word1(data,tokenizer):
a=''
for i in data:
if tokenizer.index_word[i]=='<eos>':
break
a=a+' '+tokenizer.index_word[i]
a=a.split('<sos>')[1][1:]+' '
return a
def tok2word2(data,tokenizer):
a=''
for i in data:
if tokenizer.index_word[i]=='<eos>':
break
a=a+' '+tokenizer.index_word[i]
return a
index=random.sample(range(0,input_tensor_val.shape[0]),1000)
def bleu_score(input_val,target_val):
score=0
for i in index:
inn=input_val[i]
out=target_val[i]
in_sen=tok2word1(inn,inp_lang_tokenizer)
out_sen=tok2word2(out,targ_lang_tokenizer)
ref=[out_sen.split(),]
translation,_,_ = evaluate(in_sen)
trans=translation.split()[:-1]
res=bleu.sentence_bleu(ref, trans,)
score=score+res
score=score/1000
print('avg. bleu score : ',score)
vocab_inp_size = len(inp_lang_tokenizer.word_index)+1
vocab_tar_size = len(targ_lang_tokenizer.word_index)+1
embedding_size=378
lstm_units=470
input_length=input_tensor.shape[1]
output_length=decoder_input_target_tensor_train.shape[1]
batch_size=48
encoder=Encoder(vocab_inp_size,embedding_size,lstm_units,input_length)
initial_state=encoder.initialize_states(batch_size)
scoring_fun='dot'
model = encoder_decoder(vocab_inp_size,embedding_size,lstm_units,input_length,batch_size,vocab_tar_size,output_length,scoring_fun)
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,loss=loss_function)
model.fit([input_tensor_train,decoder_input_target_tensor_train], decoder_output_target_tensor_train,epochs=6,batch_size=48,validation_data=([input_tensor_val,decoder_input_target_tensor_val], decoder_output_target_tensor_val))
model.save_weights("/content/drive/My Drive/model_dot_1/dot_pos2.hdf5")
model.load_weights("/content/drive/My Drive/model_dot_1/dot_pos2.hdf5")
translate(u'amo la mela ')
print('Actual eng sentence : i love apple')
translate('non posso rispondere alla tua domanda ')
print('Actual eng sentence : i can not answer your question ')
translate(u'tom non sembrava essere molto interessato alla scuola ')
print('Actual eng sentence : tom did not seem to be very interested in school ')
bleu_score(input_tensor_val,decoder_output_target_tensor_val)
vocab_inp_size = len(inp_lang_tokenizer.word_index)+1
vocab_tar_size = len(targ_lang_tokenizer.word_index)+1
embedding_size=378
lstm_units=470
input_length=input_tensor.shape[1]
output_length=decoder_input_target_tensor_train.shape[1]
batch_size=48
steps_per_epoch = ((len(input_tensor_train)+1)//batch_size)+1
scoring_fun='concat'
encoder=Encoder(vocab_inp_size,embedding_size,lstm_units,input_length)
initial_state=encoder.initialize_states(batch_size)
model = encoder_decoder(vocab_inp_size,embedding_size,lstm_units,input_length,batch_size,vocab_tar_size,output_length,scoring_fun)
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,loss=loss_function)
model.fit([input_tensor_train,decoder_input_target_tensor_train], decoder_output_target_tensor_train,epochs=6,batch_size=48,validation_data=([input_tensor_val,decoder_input_target_tensor_val], decoder_output_target_tensor_val))
model.load_weights('/content/drive/My Drive/model_concat_1/con_pos2.hdf5')
model.load_weights("/content/drive/My Drive/model_concat_1/con_pos2.hdf5")
translate(u'amo la mela .')
print('Actual eng sentence : i love apple')
translate('non posso rispondere alla tua domanda .')
print('Actual eng sentence : i can not answer your question ')
translate(u'tom non sembrava essere molto interessato alla scuola .')
print('Actual eng sentence : tom did not seem to be very interested in school .')
bleu_score(input_tensor_val,decoder_output_target_tensor_val)