RL_self_implemented/test_environment.py · EECE571F-project

import numpy as np
from environment import BranchAndBoundEnv
from scipy.optimize import linprog
from collections import namedtuple
from utils import read_mps

MILPExample = namedtuple('MILPExample', ['name','c', 'A_ub', 'b_ub', 'A_eq', 'b_eq', 'bounds', 'integer_indices'])

def get_problems():
    examples = []
    """
    min c'x
    s.t. A_ub @ x <= b_ub
         x[0] >= 0
         x[1] >= 0
         x[0], x[1] integer
    """
    # Problem 1:
    A_ub = np.array([[3, -5], [3, 5]])
    b_ub = np.array([0, 15])
    c = np.array([-2, -1])
    bounds = [(0, None)] * len(c)
    integer_indices = np.array([0, 1])
    name = 'easy_1'
    examples.append(
        MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=None, b_eq=None, integer_indices=integer_indices)
    )

    # Problem 2:
    A_ub = np.array([[2, -2], [-8, 10]])
    b_ub = np.array([-1, 13])
    c = np.array([1, 1])
    bounds = [(0, None)] * len(c)
    integer_indices = np.array([0, 1])
    name = 'easy_2'
    examples.append(
        MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=None, b_eq=None, integer_indices=integer_indices)
    )

    # Problem 3:
    A_ub = np.array([[-4, 2], [2, 2], [-0.5, 1]])
    b_ub = np.array([0, 30, 3.75])
    c = np.array([-2, -3])
    bounds = [(None, None), (0, None)]
    integer_indices = np.array([0, 1])
    name = 'easy_3'
    examples.append(
        MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=None, b_eq=None, integer_indices=integer_indices)
    )

    # Problem 4:
    A_ub = np.array([[0.8, 0.2, 0.3], [0.4, 0.3, 0], [0.2, 0, 0.1]])
    b_ub = np.array([20, 10, 5])
    c = np.array([-20, -6, -8])
    bounds = [(0, None)]*len(c)
    integer_indices = np.array([0, 1, 2])
    name = 'easy_4'
    examples.append(
        MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=None, b_eq=None, integer_indices=integer_indices)
    )
    
    # Problem 5:
    c, A_ub, b_ub, A_eq, b_eq, bounds, integer_indices, continuous_indicies = read_mps()
    name = 'hard_1'
    examples.append(
        MILPExample(name=name, c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, A_eq=A_eq, b_eq=b_eq, integer_indices=integer_indices)
    )
    
    return examples

if __name__ == "__main__":
    examples = get_problems()
    examples = [examples[0]]
    for example_i in examples:
        print(f"Testing example: {example_i.name}")
        # try:
        env = BranchAndBoundEnv(
            c=example_i.c, 
            A_ub=example_i.A_ub, 
            b_ub=example_i.b_ub,
            A_eq=example_i.A_eq,
            b_eq=example_i.b_eq,
            bounds=example_i.bounds, 
            integer_indices=example_i.integer_indices
        )

        obs = env.reset()
        done = False

        expected_return = 0
        actions_to_take = [1, 0, 0]
        i = 0
        while not done:
            # For testing, select a random valid action
            valid_actions = np.where(obs['action_mask'])[0]
            if len(valid_actions) == 0:
                print('failed')
            action = np.random.choice(len(valid_actions))
            # action = actions_to_take[i]
            obs, reward, done, info = env.step(action)
            expected_return += reward
            print(f"Action: {action}, Reward: {reward}, Done: {done}")
            i+=1

        print("===RL===")
        print(f"f(x*): {env.best_obj_value}")
        print(f"expected return: {expected_return}")
        
        # actual_solution = linprog(c=c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, integrality=1)
        # print("==linprog==")
        # print(f"f(x*): {actual_solution.fun}, x* = {actual_solution.x}")
        # except:
        #     print("Result: FAILED")
        # else:
        #     print("Result: PASS")