import pandas as pd import os import matplotlib.pyplot as plt import json import copy checkpoint_path = "foo/bar/checkpoint_1500" def load_progress(checkpoint_path, show_progress=True, checkpoint_increment=500): print("If the learning curves look weird, check the rolling average window size (may be too large depending on the size of epochs)") checkpoint_path = checkpoint_path.rstrip('/') experiment_path = os.path.dirname(checkpoint_path) variant_path = os.path.join(experiment_path, 'params.json') with open(variant_path, 'r') as f: variant = json.load(f) reward_dict = variant["environment_params"]["evaluation"]["kwargs"]["weight_dic"] try: weight_imit = variant["environment_params"]["evaluation"]["kwargs"]["imit_weights"]["imitation"] weight_goal = variant["environment_params"]["evaluation"]["kwargs"]["imit_weights"]["goal"] except: weight_imit = None weight_goal = None _input = pd.read_csv(experiment_path + "/progress.csv", sep=",") epochs = _input["epoch"] reward_keys = {} for key in list(_input.columns): if "-mean-mean" in key: reward_keys.update({key: key.replace("-mean-mean", "")}) """Get best training and test return""" best_training_return = -99999 best_training_returns = [] best_training_epochs = [] for idx, _return in enumerate(_input["training/return-average"]): if _return > best_training_return: best_training_return = _return best_training_returns.append(_return) best_training_epochs.append(epochs[idx]) best_training_epochs.append(epochs.iloc[-1]) best_training_returns.append(best_training_returns[-1]) best_test_return = -99999 best_test_returns = [] best_test_epochs = [] best_checkpoint_idx = 0 best_checkpoint_return = -999999 for idx, _return in enumerate(_input["evaluation/return-average"]): if _return > best_test_return: best_test_return = _return best_test_returns.append(_return) best_test_epochs.append(epochs[idx]) if idx % checkpoint_increment == 0: if _return > best_checkpoint_return: best_checkpoint_return = _return best_checkpoint_idx = idx if idx != 0 else checkpoint_increment best_test_epochs.append(epochs.iloc[-1]) best_test_returns.append(best_test_returns[-1]) """ Training return """ rolling_average_window = 8 if show_progress: fig = plt.figure(figsize=(18, 6)) plt.subplot(2, 2, 1) plt.title('Training returns') plt.plot(epochs, _input["training/return-average"].rolling( window=rolling_average_window).mean(), label='Average return', alpha=1.0) plt.plot(epochs, _input["training/return-min"].rolling( window=rolling_average_window).mean(), label='Min return', alpha=0.3) plt.plot(epochs, _input["training/return-max"].rolling( window=rolling_average_window).mean(), label='Max Return', alpha=0.3) plt.fill_between(epochs, _input["training/return-average"].rolling(window=rolling_average_window).mean()+_input["training/return-std"].rolling(window=rolling_average_window).mean(), _input["training/return-average"].rolling(window=rolling_average_window).mean()-_input["training/return-std"].rolling(window=rolling_average_window).mean(), facecolor='blue', alpha=0.1) plt.plot(best_training_epochs, best_training_returns, label="Best training returns") plt.legend(loc='best', bbox_to_anchor=(1, 1)) plt.subplot(2, 2, 3) plt.title("Training episode length") plt.plot( epochs, _input["training/episode-length-avg"].rolling(window=rolling_average_window).mean()) """ Test return """ rolling_average_window = 40 plt.subplot(2, 2, 2) plt.title('Test returns') plt.plot(epochs, _input["evaluation/return-average"].rolling( window=rolling_average_window).mean(), label='Average return', alpha=1.0) plt.plot(epochs, _input["evaluation/return-min"].rolling( window=rolling_average_window).mean(), label='Min return', alpha=0.3) plt.plot(epochs, _input["evaluation/return-max"].rolling( window=rolling_average_window).mean(), label='Max Return', alpha=0.3) plt.fill_between(epochs, _input["evaluation/return-average"].rolling(window=rolling_average_window).mean()+_input["evaluation/return-std"].rolling(window=rolling_average_window).mean(), _input["evaluation/return-average"].rolling(window=rolling_average_window).mean( )-_input["evaluation/return-std"].rolling(window=rolling_average_window).mean(), facecolor='blue', alpha=0.1) plt.plot(best_test_epochs, best_test_returns, label="Best test returns") plt.legend(loc='best', bbox_to_anchor=(1, 1)) plt.subplot(2, 2, 4) plt.title("Test episode length") plt.plot(epochs, _input["evaluation/episode-length-avg"].rolling( window=rolling_average_window).mean()) """ Training return by sub rewards """ rolling_average_window = 40 if weight_imit: fig = plt.figure(figsize=(18, 6)) plt.subplot(1, 2, 1) for key in reward_keys: if "evaluation" in key and "imitation" in key: name = copy.copy(key) name = name.replace("evaluation/env_infos/", "") name = name.replace("-mean-mean", "") if name == "imitation_contact_reward": weight = reward_dict["imit_eef_contact_reward"] elif name == "imitation_foot_orientation_reward": weight = reward_dict["imit_eef_orientation_reward"] elif name == "imitation_foot_pos_reward": weight = reward_dict["imit_eef_pos_reward"] elif name == "imitation_joint_pos_reward": weight = reward_dict["imit_joint_pos_reward"] elif name == "imitation_contact_term": weight = reward_dict["imit_eef_contact_reward"] elif name == "imitation_joint_vel_reward": weight = 1 elif name == "reward_imitation": weight = 10 else: assert 3 == 4, "Term %s does not exist" % name if name == "reward_imitation": plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean()/weight, label=name, linewidth=3) else: plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean()/weight, label=name) plt.legend() if weight_goal: plt.subplot(1, 2, 2) for key in reward_keys: if "evaluation" in key and not "imitation" in key: name = copy.copy(key) name = name.replace("evaluation/env_infos/", "") name = name.replace("-mean-mean", "") if name == "reward_goal": weight = 10 else: weight = reward_dict["weight_"+name] if weight: if name == "reward_goal": plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean()/weight, label=name, linewidth=3) else: plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean()/weight, label=name) plt.legend() if weight_goal is None and weight_imit is None: fig = plt.figure(figsize=(18, 6)) plt.subplot(1, 2, 1) for key in reward_keys: if "evaluation" in key: name = copy.copy(key) name = name.replace("evaluation/env_infos/", "") name = name.replace("-mean-mean", "") if "l" in name[0] and "pos" in name and not "joint" in name: weight = reward_dict["weight_"+name] if weight: plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean(), label=name) plt.legend() plt.subplot(1, 2, 2) for key in reward_keys: if "evaluation" in key and "r" in key and "pos" in key and not "joint" in key: name = copy.copy(key) name = name.replace("evaluation/env_infos/", "") name = name.replace("-mean-mean", "") if "r" in name[0] and "pos" in name and not "joint" in name: weight = reward_dict["weight_"+name] if weight: plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean(), label=name) plt.legend() fig = plt.figure(figsize=(18, 6)) plt.subplot(1, 2, 1) for key in reward_keys: if "evaluation" in key and "vel" in key and not "joint" in key: name = copy.copy(key) name = name.replace("evaluation/env_infos/", "") name = name.replace("-mean-mean", "") weight = reward_dict["weight_"+name] if weight: plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean(), label=name) plt.legend() plt.subplot(1, 2, 2) for key in reward_keys: if "evaluation" in key and "joint" in key: name = copy.copy(key) name = name.replace("evaluation/env_infos/", "") name = name.replace("-mean-mean", "") weight = reward_dict["weight_"+name] if weight: plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean(), label=name) plt.legend() fig = plt.figure(figsize=(18, 6)) plt.subplot(1, 2, 1) for key in reward_keys: if "evaluation" in key and "vel" not in key and "joint" not in key and "pos" not in key: name = copy.copy(key) name = name.replace("evaluation/env_infos/", "") name = name.replace("-mean-mean", "") weight = reward_dict["weight_"+name] if weight: plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean(), label=name) plt.legend() plt.subplot(1, 2, 2) for key in reward_keys: if "evaluation" in key and "box_pos" in key: name = copy.copy(key) name = name.replace("evaluation/env_infos/", "") name = name.replace("-mean-mean", "") weight = reward_dict["weight_"+name] if weight: plt.plot(epochs, _input[key].rolling( window=rolling_average_window).mean(), label=name) plt.legend() plt.show() return best_checkpoint_return, best_checkpoint_idx