import pandas as pd from lifelines import KaplanMeierFitter import matplotlib.pyplot as plt from datetime import date def plotThecurve(kmf): # Plot the Kaplan-Meier survival curve plt.figure(figsize=(10, 6)) kmf.plot_survival_function() plt.title("Kaplan-Meier Survival Curve") plt.xlabel("Time (days)") plt.ylabel("Survival Probability") plt.grid() # Save the plot plt.savefig('plots/Kaplan_Meier_Survival_Curve.png') plt.show() # Function to calculate the survival time def calculate_survival_time(row): if row["Event"]: # If the event (death) occurred return (row["Date of Death"] - row["Date of Diagnosis"]).days else: # If the event (death) did not occur return (row["Todays_date"] - row["Date of Diagnosis"]).days # Main function def main(): df = pd.read_csv("KaplanMeierDataset/Book1CSV.csv") df["Todays_date"] = pd.to_datetime(date.today()) # Adding Todays date to get till date survival days if no death recorded print(df.head(5)) # Printing first 5 records # Data Preprocessing # Convert date columns to datetime date_columns = ["Date of Birth", "Date of Diagnosis", "Date of Death", "Date of Last Follow-Up", "Date of Admission", "Date of Discharge"] for col in date_columns: df[col] = pd.to_datetime(df[col], format="%d.%m.%Y", errors="coerce") # Filter date of diagnosis which are of future , reason to have t correct survival time, # current survival time is coming negative df = df[(df['Date of Diagnosis'] <= df['Date of Last Follow-Up']) & (df['Date of Diagnosis'] <= df["Todays_date"])] print(df.shape) # Create a new column to indicate whether the event occurred or not. # if event is occurred set it to (1), else set it to (0) df["Event"] = df["Date of Death"].notnull().astype(int) print(df.head(5)) # Apply the function to each row of the DataFrame to compute the survival time df["Survival Time days"] = df.apply(calculate_survival_time, axis=1) print(df.head(5)) # Fit the Kaplan-Meier estimator kmf = KaplanMeierFitter() kmf.fit(durations=df["Survival Time days"], event_observed=df["Event"]) # Plot the curve plotThecurve(kmf) # Main program starts from here if __name__ == "__main__": main()