from app_logs.logger import App_logger from Predict_raw_data_validation.predictionRawValidation import prediction_data_validation from DataTransform_prediction.DataTransformation import dataTransform from Data_Insertion_prediction.DataInsertion import dbOperations class Pred_validation: """ """ def __init__(self, path): self.raw_validation = prediction_data_validation(path) self.dataTransform = dataTransform() self.log_file = open("Prediction_Logs/Prediction_Main_Log.txt", 'a+') self.dbOperations = dbOperations() self.logger = App_logger() def prediction_validation(self): try: self.logger.log(self.log_file, "Starting Prediction Data validation ...") # Extract values from training schema LengthOfDateStampInFile, LengthOfTimeStampInFile, NumberofColumns, ColName = self.raw_validation.valuesFromSchema() # Get regex pattern for filename validation regex = self.raw_validation.schemaRegex() # Validate file names in training data and move to Good/Bad directories self.raw_validation.validateFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) # Validate no of columns for files in Good directory and otherwise move to Bad directory self.raw_validation.validateColumnLength(NumberofColumns) # Validate if any column has all missing values self.raw_validation.validateAllMissingValuesInColumns() # Get information on good/bad files total_files, good_files, bad_files = self.raw_validation.goodBadFileCount() msg = f'Total files: {total_files}, Good files: {good_files}, Bad files: {bad_files}' self.logger.log(self.log_file, msg) self.logger.log(self.log_file, "Prediction Data validation completed !!") self.logger.log(self.log_file, "Starting Prediction Data Transformation ...") # replace missing values with Null self.dataTransform.replaceMissingWithNull() self.logger.log(self.log_file, "Prediction Data Transformation completed !!") self.logger.log(self.log_file, "Creating Prediction Database and tables per given schema ...") # Create database, table and add columns self.dbOperations.createTableDb('Prediction', ColName) self.logger.log(self.log_file, "Table creation completed!!!") self.logger.log(self.log_file, "Insertion of data into table started ...") # Insert csv data into the table self.dbOperations.insertDataIntoTable('Prediction') self.logger.log(self.log_file, "Insertion in table completed!!!") self.logger.log(self.log_file, "Deleting Good Data Folder ...") # Delete good data folder self.raw_validation.deleteExistingGoodRawFolder() self.logger.log(self.log_file, "Good data folder deleted !!!") self.logger.log(self.log_file, "Moving Bad Files to Archive and deleting Bad Data Folder ...") self.raw_validation.moveBadDataToArchive() self.logger.log(self.log_file, "Bad data archived successfully !!!") self.logger.log(self.log_file, "Extracting csv file from table ...") # Export data from DB table into a csv self.dbOperations.exportFromDbtoCsv('Prediction') self.logger.log(self.log_file, "Data exported to csv successfully !!!") self.log_file.close() except Exception as e: msg = f'Error occurred: {e}' self.logger.log(self.log_file, msg) self.log_file.close() raise e #### Test ### # path = "Prediction_Batch_Files" # val_data = Pred_validation(path) # val_data.prediction_validation()