import argparse import csv import json import pathlib import ecole as ec import numpy as np import torch # set up the proper agent, environment and goal for the task from agents.dual import Policy, ObservationFunction from environments import Branching as Environment from environments import DefaultInformationFunction as Information from environments import Configuring as BenchmarkEnv from rewards import TimeLimitDualIntegral as BoundIntegral import neptune def simulate(args, env, policy, instance, initial_primal_bound): # reset the environment observation, action_set, reward, done, info = env.reset(instance, objective_limit=initial_primal_bound) if args.debug: print(f" info: {info}") print(f" reward: {reward}") print(f" action_set: {action_set}") # cumulated_reward = 0 # discard initial reward # loop over the environment while not done: if policy != None: action = policy(action_set, observation) else: action = {} observation, action_set, reward, done, info = env.step(action) if args.debug: print(f" info: {info}") print(f" reward: {reward}") print(f" action_set: {action_set}") # cumulated_reward += reward # print(f" cumulated reward (to be maximized): {cumulated_reward}") # print(f" time to solve : {info['solvingtime']}") # print(f" number of nodes : {info['nnodes']}") # print(f" dual bound : {info['dual_bound']}") # print(f" primal bound : {info['primal_bound']}") # print(f" status : {info['status']}") return info # save instance results # with open(results_file, mode='a') as csv_file: # writer = csv.DictWriter(csv_file, fieldnames=results_fieldnames) # writer.writerow({ # 'instance': str(instance), # 'seed': seed, # 'initial_primal_bound': initial_primal_bound, # 'initial_dual_bound': initial_dual_bound, # 'objective_offset': objective_offset, # 'cumulated_reward': cumulated_reward, # }) if __name__ == '__main__': parser = argparse.ArgumentParser() # parser.add_argument( # 'problem', # help='Problem benchmark to process.', # choices=['item_placement', 'load_balancing', 'anonymous'], # ) parser.add_argument( '-t', '--timelimit', help='Episode time limit (in seconds).', default=argparse.SUPPRESS, type=float, ) parser.add_argument( '-d', '--debug', help='Print debug traces.', action='store_true', ) args = parser.parse_args() args.problem = 'set_cover' run = neptune.init_run( project="571-project/learn-to-branch", ) # check the Ecole version installed # print(f"Evaluating the {args.task} task agent on the {args.problem} problem.") # collect the instance files if args.problem == 'set_cover': gen = ec.instance.SetCoverGenerator( n_rows=100, n_cols=125, density=0.05, ) elif args.problem == 'auction': gen = ec.instance.CombinatorialAuctionGenerator( ) time_limit = 15*60 memory_limit = 8796093022207 # maximum # override from command-line argument if provided time_limit = getattr(args, "timelimit", time_limit) # evaluation loop for seed, instance in enumerate(gen): run['seed'].append(seed) # observation_function = ObservationFunction(problem=args.problem) observation_function = ec.observation.NodeBipartite() policy = Policy(problem=args.problem) policy.load_weights("models/learn2branch-set_cover-250-250-actor_critic-gae.pt") integral_function = BoundIntegral() env = Environment( time_limit=time_limit, observation_function=observation_function, reward_function=-integral_function, # negated integral (minimization) scip_params={'limits/memory': memory_limit}, ) benchmark_naive_env = BenchmarkEnv( time_limit=time_limit, observation_function=None, reward_function=None, ) # seed both the agent and the environment (deterministic behavior) # observation_function.seed(seed) policy.seed(seed) env.seed(seed) benchmark_naive_env.seed(seed) # set up the reward function parameters for that instance initial_primal_bound = instance.primal_bound initial_dual_bound = instance.dual_bound objective_offset = 0 integral_function.set_parameters( initial_primal_bound=initial_primal_bound, initial_dual_bound=initial_dual_bound, objective_offset=objective_offset) print() print(f"Instance {instance.name}") print(f" seed: {seed}") # print(f" initial primal bound: {initial_primal_bound}") # print(f" initial dual bound: {initial_dual_bound}") # print(f" objective offset: {objective_offset}") print() print(" RL:") info_rl = simulate(args, env, policy, instance, initial_primal_bound) run['solvingtime_rl'].append(info_rl['solvingtime']) run['nnodes_rl'].append(info_rl['nnodes']) run['dual_bound_rl'].append(info_rl['dual_bound']) run['primal_bound_rl'].append(info_rl['primal_bound']) # run['time_rl'].append(info_rl['status']) print() print(" SCIP:") info_scip = simulate(args, benchmark_naive_env, None, instance, initial_primal_bound) run['solvingtime_scip'].append(info_scip['solvingtime']) run['nnodes_scip'].append(info_scip['nnodes']) run['dual_bound_scip'].append(info_scip['dual_bound']) run['primal_bound_scip'].append(info_scip['primal_bound']) # run['time_scip'].append(info_scip['status']) if seed == 100: run.stop() exit(0)