import numpy as np from environment import BranchAndBoundEnv from scipy.optimize import linprog from collections import namedtuple from utils import read_mps MILPExample = namedtuple('MILPExample', ['name','c', 'A_ub', 'b_ub', 'A_eq', 'b_eq', 'bounds', 'integer_indices']) def get_problems(): examples = [] """ min c'x s.t. A_ub @ x <= b_ub x[0] >= 0 x[1] >= 0 x[0], x[1] integer """ # Problem 1: A_ub = np.array([[3, -5], [3, 5]]) b_ub = np.array([0, 15]) c = np.array([-2, -1]) bounds = [(0, None)] * len(c) integer_indices = np.array([0, 1]) name = 'easy_1' examples.append( MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=None, b_eq=None, integer_indices=integer_indices) ) # Problem 2: A_ub = np.array([[2, -2], [-8, 10]]) b_ub = np.array([-1, 13]) c = np.array([1, 1]) bounds = [(0, None)] * len(c) integer_indices = np.array([0, 1]) name = 'easy_2' examples.append( MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=None, b_eq=None, integer_indices=integer_indices) ) # Problem 3: A_ub = np.array([[-4, 2], [2, 2], [-0.5, 1]]) b_ub = np.array([0, 30, 3.75]) c = np.array([-2, -3]) bounds = [(None, None), (0, None)] integer_indices = np.array([0, 1]) name = 'easy_3' examples.append( MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=None, b_eq=None, integer_indices=integer_indices) ) # Problem 4: A_ub = np.array([[0.8, 0.2, 0.3], [0.4, 0.3, 0], [0.2, 0, 0.1]]) b_ub = np.array([20, 10, 5]) c = np.array([-20, -6, -8]) bounds = [(0, None)]*len(c) integer_indices = np.array([0, 1, 2]) name = 'easy_4' examples.append( MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=None, b_eq=None, integer_indices=integer_indices) ) # Problem 5: c, A_ub, b_ub, A_eq, b_eq, bounds, integer_indices, continuous_indicies = read_mps() name = 'hard_1' examples.append( MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=A_eq, b_eq=b_eq, integer_indices=integer_indices) ) return examples if __name__ == "__main__": examples = get_problems() examples = [examples[0]] for example_i in examples: print(f"Testing example: {example_i.name}") # try: env = BranchAndBoundEnv( c=example_i.c, A_ub=example_i.A_ub, b_ub=example_i.b_ub, A_eq=example_i.A_eq, b_eq=example_i.b_eq, bounds=example_i.bounds, integer_indices=example_i.integer_indices ) obs = env.reset() done = False expected_return = 0 actions_to_take = [1, 0, 0] i = 0 while not done: # For testing, select a random valid action valid_actions = np.where(obs['action_mask'])[0] if len(valid_actions) == 0: print('failed') action = np.random.choice(len(valid_actions)) # action = actions_to_take[i] obs, reward, done, info = env.step(action) expected_return += reward print(f"Action: {action}, Reward: {reward}, Done: {done}") i+=1 print("===RL===") print(f"f(x*): {env.best_obj_value}") print(f"expected return: {expected_return}") # actual_solution = linprog(c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, integrality=1) # print("==linprog==") # print(f"f(x*): {actual_solution.fun}, x* = {actual_solution.x}") # except: # print("Result: FAILED") # else: # print("Result: PASS")