''' Created on July 1, 2022 Tensorflow Implementation of Adaptive Adversarial Contrastive Learning for Cross-Domain Recommendation @author: Chi-Wei Hsu (apple.iim09g@nycu.edu.tw) ''' import os import argparse import tensorflow as tf os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from data import * from GraphEnv import GraphEnv from DDQN_load import * from utility.parser import parse_args if __name__ == '__main__': args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) source_name,target_name = args.dataset.split('_') data_generator_s = Data(path=args.data_path + source_name, batch_size=args.batch_size, neg_num=args.neg_num) data_generator_t = Data(path=args.data_path + target_name, batch_size=args.batch_size, neg_num=args.neg_num) env = GraphEnv(args, data_generator_s, data_generator_t) device = "cuda:0" num_action_type = 3 dqn = DQN(env, num_action_type, device=device) score = list() import time import datetime from tqdm import tqdm now = datetime.datetime.now() now = now.strftime("%m%d-%H%M") N_EPISODES = 100 MEMORY_CAPACITY = 10 start = time.time() for i in tqdm(range(N_EPISODES)): t = time.time() if i%10 == 0: # print("reset to init emb!") s = env.reset() a = dqn.choose_action(s.reshape(-1)) a = np.argmax(a, axis=1) s_, r, max_hit, max_hit_action, max_reward, max_reward_action = env.step(a) s = s_ score.append(max_hit) dqn.store_transition(s.reshape(-1), a.reshape(-1), r, s_.reshape(-1)) if dqn.memory_counter > MEMORY_CAPACITY: dqn.learn() torch.cuda.empty_cache() print("Episode: {} - Reward: {:.4f}, Time cost: {:.4f}".format(i, r, time.time()-start))