import argparse
import csv
import json
import pathlib
import ecole as ec
import numpy as np
import torch
# set up the proper agent, environment and goal for the task
from agents.dual import Policy, ObservationFunction
from environments import Branching as Environment
from environments import DefaultInformationFunction as Information
from environments import Configuring as BenchmarkEnv
from rewards import TimeLimitDualIntegral as BoundIntegral
import neptune
def simulate(args, env, policy, instance, initial_primal_bound):
# reset the environment
observation, action_set, reward, done, info = env.reset(instance, objective_limit=initial_primal_bound)
if args.debug:
print(f" info: {info}")
print(f" reward: {reward}")
print(f" action_set: {action_set}")
# cumulated_reward = 0 # discard initial reward
# loop over the environment
while not done:
if policy != None:
action = policy(action_set, observation)
else:
action = {}
observation, action_set, reward, done, info = env.step(action)
if args.debug:
print(f" info: {info}")
print(f" reward: {reward}")
print(f" action_set: {action_set}")
# cumulated_reward += reward
# print(f" cumulated reward (to be maximized): {cumulated_reward}")
# print(f" time to solve : {info['solvingtime']}")
# print(f" number of nodes : {info['nnodes']}")
# print(f" dual bound : {info['dual_bound']}")
# print(f" primal bound : {info['primal_bound']}")
# print(f" status : {info['status']}")
return info
# save instance results
# with open(results_file, mode='a') as csv_file:
# writer = csv.DictWriter(csv_file, fieldnames=results_fieldnames)
# writer.writerow({
# 'instance': str(instance),
# 'seed': seed,
# 'initial_primal_bound': initial_primal_bound,
# 'initial_dual_bound': initial_dual_bound,
# 'objective_offset': objective_offset,
# 'cumulated_reward': cumulated_reward,
# })
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# parser.add_argument(
# 'problem',
# help='Problem benchmark to process.',
# choices=['item_placement', 'load_balancing', 'anonymous'],
# )
parser.add_argument(
'-t', '--timelimit',
help='Episode time limit (in seconds).',
default=argparse.SUPPRESS,
type=float,
)
parser.add_argument(
'-d', '--debug',
help='Print debug traces.',
action='store_true',
)
args = parser.parse_args()
args.problem = 'set_cover'
run = neptune.init_run(
project="571-project/learn-to-branch",
)
# check the Ecole version installed
# print(f"Evaluating the {args.task} task agent on the {args.problem} problem.")
# collect the instance files
if args.problem == 'set_cover':
gen = ec.instance.SetCoverGenerator(
n_rows=100,
n_cols=125,
density=0.05,
)
elif args.problem == 'auction':
gen = ec.instance.CombinatorialAuctionGenerator(
)
time_limit = 15*60
memory_limit = 8796093022207 # maximum
# override from command-line argument if provided
time_limit = getattr(args, "timelimit", time_limit)
# evaluation loop
for seed, instance in enumerate(gen):
run['seed'].append(seed)
# observation_function = ObservationFunction(problem=args.problem)
observation_function = ec.observation.NodeBipartite()
policy = Policy(problem=args.problem)
policy.load_weights("models/learn2branch-set_cover-250-250-actor_critic-gae.pt")
integral_function = BoundIntegral()
env = Environment(
time_limit=time_limit,
observation_function=observation_function,
reward_function=-integral_function, # negated integral (minimization)
scip_params={'limits/memory': memory_limit},
)
benchmark_naive_env = BenchmarkEnv(
time_limit=time_limit,
observation_function=None,
reward_function=None,
)
# seed both the agent and the environment (deterministic behavior)
# observation_function.seed(seed)
policy.seed(seed)
env.seed(seed)
benchmark_naive_env.seed(seed)
# set up the reward function parameters for that instance
initial_primal_bound = instance.primal_bound
initial_dual_bound = instance.dual_bound
objective_offset = 0
integral_function.set_parameters(
initial_primal_bound=initial_primal_bound,
initial_dual_bound=initial_dual_bound,
objective_offset=objective_offset)
print()
print(f"Instance {instance.name}")
print(f" seed: {seed}")
# print(f" initial primal bound: {initial_primal_bound}")
# print(f" initial dual bound: {initial_dual_bound}")
# print(f" objective offset: {objective_offset}")
print()
print(" RL:")
info_rl = simulate(args, env, policy, instance, initial_primal_bound)
run['solvingtime_rl'].append(info_rl['solvingtime'])
run['nnodes_rl'].append(info_rl['nnodes'])
run['dual_bound_rl'].append(info_rl['dual_bound'])
run['primal_bound_rl'].append(info_rl['primal_bound'])
# run['time_rl'].append(info_rl['status'])
print()
print(" SCIP:")
info_scip = simulate(args, benchmark_naive_env, None, instance, initial_primal_bound)
run['solvingtime_scip'].append(info_scip['solvingtime'])
run['nnodes_scip'].append(info_scip['nnodes'])
run['dual_bound_scip'].append(info_scip['dual_bound'])
run['primal_bound_scip'].append(info_scip['primal_bound'])
# run['time_scip'].append(info_scip['status'])
if seed == 100:
run.stop()
exit(0)