wafer-detection / training_Validation_Insertion.py
training_Validation_Insertion.py
Raw
from app_logs.logger import App_logger
from Train_raw_data_validation.rawValidation import raw_data_validation
from DataTransform_training.DataTransformation import dataTransform
from Data_Insertion_training.DataInsertion import dbOperations

class train_validation:
	"""

	"""
	def __init__(self, path):
		self.raw_validation = raw_data_validation(path)
		self.dataTransform = dataTransform()
		self.log_file = open("Training_logs/Training_Main_Log.txt", 'a+')
		self.dbOperations = dbOperations()
		self.logger = App_logger()

	def train_validation(self):
		try:
			self.logger.log(self.log_file, "Starting Raw Data validation ...")
			# Extract values from training schema
			LengthOfDateStampInFile, LengthOfTimeStampInFile, NumberofColumns, ColName = self.raw_validation.valuesFromSchema()
			# Get regex pattern for filename validation
			regex = self.raw_validation.schemaRegex()
			# Validate file names in training data and move to Good/Bad directories
			self.raw_validation.validateFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile)
			# Validate no of columns for files in Good directory and otherwise move to Bad directory
			self.raw_validation.validateColumnLength(NumberofColumns)
			# Validate if any column has all missing values
			self.raw_validation.validateAllMissingValuesInColumns()
			# Get information on good/bad files
			total_files, good_files, bad_files = self.raw_validation.goodBadFileCount()
			msg = f'Total files: {total_files}, Good files: {good_files}, Bad files: {bad_files}'
			self.logger.log(self.log_file, msg)
			self.logger.log(self.log_file, "Raw Data validation completed !!")

			self.logger.log(self.log_file, "Starting Data Transformation ...")
			# replace missing values with Null
			self.dataTransform.replaceMissingWithNull()
			self.logger.log(self.log_file, "Data Transformation completed !!")

			self.logger.log(self.log_file, "Creating Training Database and tables per given schema ...")
			# Create database, table and add columns
			self.dbOperations.createTableDb('Training', ColName)
			self.logger.log(self.log_file, "Table creation completed!!!")
			self.logger.log(self.log_file, "Insertion of data into table started ...")
			# Insert csv data into the table
			self.dbOperations.insertDataIntoTable('Training')
			self.logger.log(self.log_file, "Insertion in table completed!!!")
			self.logger.log(self.log_file, "Deleting Good Data Folder ...")
			# Delete good data folder
			self.raw_validation.deleteExistingGoodRawFolder()
			self.logger.log(self.log_file, "Good data folder deleted !!!")
			self.logger.log(self.log_file, "Moving Bad Files to Archive and deleting Bad Data Folder ...")
			self.raw_validation.moveBadDataToArchive()
			self.logger.log(self.log_file, "Bad data archived successfully !!!")
			self.logger.log(self.log_file, "Extracting csv file from table ...")
			# Export data from DB table into a csv
			self.dbOperations.exportFromDbtoCsv('Training')
			self.logger.log(self.log_file, "Data exported to csv successfully !!!")

			self.log_file.close()


		except Exception as e:
			msg = f'Error occurred: {e}'
			self.logger.log(self.log_file, msg)
			self.log_file.close()
			raise e

#### Test ###
# path = "Training_Batch_Files"
# val_data = train_validation(path)
# val_data.train_validation()