prediction_Validation_Insertion.py · wafer-detection

from app_logs.logger import App_logger
from Predict_raw_data_validation.predictionRawValidation import prediction_data_validation
from DataTransform_prediction.DataTransformation import dataTransform
from Data_Insertion_prediction.DataInsertion import dbOperations

class Pred_validation:
	"""

	"""
	def __init__(self, path):
		self.raw_validation = prediction_data_validation(path)
		self.dataTransform = dataTransform()
		self.log_file = open("Prediction_Logs/Prediction_Main_Log.txt", 'a+')
		self.dbOperations = dbOperations()
		self.logger = App_logger()

	def prediction_validation(self):
		try:
			self.logger.log(self.log_file, "Starting Prediction Data validation ...")
			# Extract values from training schema
			LengthOfDateStampInFile, LengthOfTimeStampInFile, NumberofColumns, ColName = self.raw_validation.valuesFromSchema()
			# Get regex pattern for filename validation
			regex = self.raw_validation.schemaRegex()
			# Validate file names in training data and move to Good/Bad directories
			self.raw_validation.validateFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile)
			# Validate no of columns for files in Good directory and otherwise move to Bad directory
			self.raw_validation.validateColumnLength(NumberofColumns)
			# Validate if any column has all missing values
			self.raw_validation.validateAllMissingValuesInColumns()
			# Get information on good/bad files
			total_files, good_files, bad_files = self.raw_validation.goodBadFileCount()
			msg = f'Total files: {total_files}, Good files: {good_files}, Bad files: {bad_files}'
			self.logger.log(self.log_file, msg)
			self.logger.log(self.log_file, "Prediction Data validation completed !!")

			self.logger.log(self.log_file, "Starting Prediction Data Transformation ...")
			# replace missing values with Null
			self.dataTransform.replaceMissingWithNull()
			self.logger.log(self.log_file, "Prediction Data Transformation completed !!")

			self.logger.log(self.log_file, "Creating Prediction Database and tables per given schema ...")
			# Create database, table and add columns
			self.dbOperations.createTableDb('Prediction', ColName)
			self.logger.log(self.log_file, "Table creation completed!!!")
			self.logger.log(self.log_file, "Insertion of data into table started ...")
			# Insert csv data into the table
			self.dbOperations.insertDataIntoTable('Prediction')
			self.logger.log(self.log_file, "Insertion in table completed!!!")
			self.logger.log(self.log_file, "Deleting Good Data Folder ...")
			# Delete good data folder
			self.raw_validation.deleteExistingGoodRawFolder()
			self.logger.log(self.log_file, "Good data folder deleted !!!")
			self.logger.log(self.log_file, "Moving Bad Files to Archive and deleting Bad Data Folder ...")
			self.raw_validation.moveBadDataToArchive()
			self.logger.log(self.log_file, "Bad data archived successfully !!!")
			self.logger.log(self.log_file, "Extracting csv file from table ...")
			# Export data from DB table into a csv
			self.dbOperations.exportFromDbtoCsv('Prediction')
			self.logger.log(self.log_file, "Data exported to csv successfully !!!")

			self.log_file.close()


		except Exception as e:
			msg = f'Error occurred: {e}'
			self.logger.log(self.log_file, msg)
			self.log_file.close()
			raise e

#### Test ###
# path = "Prediction_Batch_Files"
# val_data = Pred_validation(path)
# val_data.prediction_validation()