Skip to content

Commit

Permalink
Debugging Multiagent algorithms. Added plan_on function to multiagent…
Browse files Browse the repository at this point in the history
…q_learner to run updates as a planner instead of learner.
  • Loading branch information
danieldritter committed Jan 17, 2021
1 parent 5af13b6 commit 0fcab63
Show file tree
Hide file tree
Showing 7 changed files with 32,465 additions and 32,670 deletions.
64,823 changes: 32,243 additions & 32,580 deletions demos/MultiAgentQLearning.ipynb

Large diffs are not rendered by default.

146 changes: 96 additions & 50 deletions msdm/algorithms/correlatedq.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
from tqdm import tqdm
from cvxopt.modeling import op
from cvxopt.modeling import variable
from cvxopt import solvers
import cvxopt
import numpy as np
from scipy.optimize import linprog


class CorrelatedQLearner(TabularMultiAgentQLearner):
Expand All @@ -35,53 +38,97 @@ def __init__(self,learning_agents: Iterable,
else:
raise Exception("Please enter one of ['Utilitarian','Egalitarian','Republican','Libertarian'] for the objective_func parameter")
self.equilibrium_type = objective_func
self.curr_equilibrium = None
self.curr_equilibrium_policy = None

def utilitarian_Q(self,q_values,next_state,problem,agent_name=None):
q_matrix = np.zeros((len(self.all_agents),len(problem.joint_action_list)))
for agent_i,agent in enumerate(self.all_agents):
for ai, action in enumerate(problem.joint_action_list):
q_matrix[agent_i][ai] = q_values[agent][next_state][action]
q_matrix = np.sum(q_matrix,axis=0)
return q_matrix

def utilitarian_Q(self,agent_name,q_values,policy,curr_state,problem):
agents = []
agents.extend(self.learning_agents)
agents.extend(list(self.other_policies.keys()))
total_val = 0.0
for agent in agents:
for ai,action in enumerate(problem.joint_action_list):
total_val += policy[ai]*q_values[agent][curr_state][action]
# Negated so the LP solver will maximize instead of minimize this value
return -total_val

def egalitarian_Q(self,agent_name,q_values,policy,curr_state,problem):
agents = []
agents.extend(self.learning_agents)
agents.extend(list(self.other_policies.keys()))
agent_vals = []
for agent in agents:
agent_total = 0.0
for ai,action in enumerate(problem.joint_action_list):
agent_total += policy[ai]*q_values[agent][curr_state][action]
agent_vals.append(agent_total)
# Negated so the LP solver will maximize instead of minimize this value
return -cvxopt.modeling.min(agent_vals)
def egalitarian_Q(self,q_values,next_state,problem,agent_name=None):
q_matrix = np.zeros((len(self.all_agents),len(problem.joint_action_list)))
for agent_i,agent in enumerate(self.all_agents):
for ai, action in enumerate(problem.joint_action_list):
q_matrix[agent_i][ai] = q_values[agent][next_state][action]
return np.amin(q_matrix,axis=0)

def republican_Q(self,agent_name,q_values,policy,curr_state,problem):
agents = []
agents.extend(self.learning_agents)
agents.extend(list(self.other_policies.keys()))
agent_vals = []
for agent in agents:
agent_total = 0.0
# subtracting the values to allow for the min function at the end(max throws an error in the LP)
for ai,action in enumerate(problem.joint_action_list):
agent_total -= policy[ai]*q_values[agent][curr_state][action]
agent_vals.append(agent_total)
# Negated so the LP solver will maximize instead of minimize this value
return -cvxopt.modeling.min(agent_vals)
def republican_Q(self,q_values,next_state,problem,agent_name=None):
q_matrix = np.zeros((len(self.all_agents),len(problem.joint_action_list)))
for agent_i,agent in enumerate(self.all_agents):
for ai, action in enumerate(problem.joint_action_list):
q_matrix[agent_i][ai] = q_values[agent][next_state][action]
return np.amax(q_matrix,axis=0)

def libertarian_Q(self,agent_name,q_values,policy,curr_state,problem):
agent_total = 0.0
for ai,action in enumerate(problem.joint_action_list):
agent_total += policy[ai]*q_values[agent_name][curr_state][action]
# Negated so the LP solver will maximize instead of minimize this value
return -agent_total
def libertarian_Q(self,q_values,next_state,problem,agent_name):
q_matrix = np.zeros((len(problem.joint_action_list)))
for ai, action in enumerate(problem.joint_action_list):
q_matrix[ai] = q_values[agent_name][next_state][action]
return q_matrix

def update(self,agent_name,actions,q_values,joint_rewards,curr_state,next_state,problem):
if problem.is_terminal(next_state):
return self.lr*(joint_rewards[agent_name])
if problem.is_terminal(curr_state):
return 0.0

if self.equilibrium_type != "Libertarian":
# compute equilibrium for first agent, then reuse for others
if agent_name == self.learning_agents[0]:
self.curr_equilibrium_policy,self.curr_equilibrium = self.cvxopt_equilibrium(q_values,next_state,problem)
expected_val = 0.0
for ai, action in enumerate(problem.joint_action_list):
q_val = q_values[agent_name][next_state][action]
expected_val += self.curr_equilibrium_policy[ai]*q_val
q_del = joint_rewards[agent_name] + self.dr*expected_val
return q_del
# Have to recompute each equilibria for lCEQ
else:
self.curr_equilibrium_policy,self.curr_equilibrium = self.compute_equilibrium(q_values,next_state,problem,agent_name=agent_name)
expected_val = 0.0
for ai, action in enumerate(problem.joint_action_list):
q_val = q_values[agent_name][next_state][action]
expected_val += self.curr_equilibrium_policy[ai]*q_val
q_del = joint_rewards[agent_name] + self.dr*expected_val
return q_del

def compute_equilibrium(self,q_values,next_state,problem,agent_name=None):
next_actions = problem.joint_actions(next_state)
joint_actions = problem.joint_action_list
# Assumes all agents have same action space
individual_actions = list(next_actions[self.all_agents[0]])
num_variables = len(joint_actions)
num_inequality_constraints = len(individual_actions)*len(self.all_agents)
num_equality_constraints = 1
A_ineq = np.zeros((num_inequality_constraints,len(joint_actions)))
for ai,action in enumerate(joint_actions):
curr_index = 0
for agent_i,agent in enumerate(self.all_agents):
altered_action = copy.deepcopy(action)
q_val = q_values[agent][next_state][action]
for iai,indiv_action in enumerate(individual_actions):
if indiv_action == action[agent]:
curr_index +=1
continue
else:
altered_action[agent] = indiv_action
altered_q_val = q_values[agent][next_state][altered_action]
constraint_val = (q_val - altered_q_val)
A_ineq[curr_index,ai] = constraint_val
curr_index += 1

A_eq = np.ones((1,num_variables))
b_ineq = np.zeros((num_inequality_constraints))
c = self.objective_func(q_values,next_state,problem,agent_name)
lp = linprog(-c,A_ub=-A_ineq,b_ub=b_ineq,A_eq=A_eq,b_eq=1)
equilibrium = -1*lp.fun
policy = lp.x
return policy,equilibrium

def cvxopt_equilibrium(self,q_values,next_state,problem,agent_name=None):
cvxopt.solvers.options['show_progress'] = False
next_actions = problem.joint_actions(next_state)
joint_actions = problem.joint_action_list
Expand All @@ -94,10 +141,10 @@ def update(self,agent_name,actions,q_values,joint_rewards,curr_state,next_state,
for agent in action:
indiv_actions = list(separate_actions[agent])
altered_action = copy.deepcopy(action)
q_val = q_values[agent][curr_state][action]
q_val = q_values[agent][next_state][action]
for indiv_action in indiv_actions:
altered_action[agent] = indiv_action
altered_q_val = q_values[agent][curr_state][altered_action]
altered_q_val = q_values[agent][next_state][altered_action]
constraint_val = (q_val - altered_q_val)*policy_var
total_sum[agent] += constraint_val
for agent in total_sum:
Expand All @@ -108,11 +155,10 @@ def update(self,agent_name,actions,q_values,joint_rewards,curr_state,next_state,
constraints.append(sum_constraint)
constraints.append(non_negative)
constraints.append(less_than_one)
objective_value = self.objective_func(agent_name,q_values,policies,curr_state,problem)
lp = op(objective_value,constraints)
objective_value = cvxopt.matrix(self.objective_func(q_values,next_state,problem,agent_name))
objective_value = cvxopt.modeling.dot(objective_value,policies)
lp = op(-objective_value,constraints)
lp.solve()
equilibrium = float(lp.objective.value()[0])
q_del = self.lr*(joint_rewards[agent_name] + self.dr*equilibrium)
return q_del


policies = list(policies.value)
return policies,equilibrium
22 changes: 13 additions & 9 deletions msdm/algorithms/friendfoeq.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ def __init__(self,learning_agents: Iterable,
alg_name=alg_name,render=render,render_from=render_from)
self.friends = friends
self.foes = foes
self.equilibria = []

def update(self,agent_name,actions,q_values,joint_rewards,curr_state,next_state,problem):
if problem.is_terminal(next_state):
return self.lr*joint_rewards[agent_name]
# Pure friend-Q case:
if len(self.foes[agent_name]) == 0:
ffq_equilibrium = max(q_values[agent_name][next_state].items(),key=lambda x:x[1])[1]
Expand Down Expand Up @@ -58,21 +61,22 @@ def update(self,agent_name,actions,q_values,joint_rewards,curr_state,next_state,
joint_action[friend] = action[friend_order[friend]]
for foe in foe_order:
joint_action[foe] = foe_action[foe_order[foe]]
payoff_matrix[i,j] = q_vals[joint_action]

payoff_matrix[i][j] = q_vals[joint_action]
cvxopt.solvers.options['show_progress'] = False
payoff_matrix = cvxopt.matrix(payoff_matrix)
# pi = variable(len(friendly_actions),"policy")
pi = variable(len(foe_actions),"policy")
cvx_payoff_matrix = cvxopt.matrix(payoff_matrix.T)
pi = variable(len(friendly_actions),"policy")
c1 = (cvxopt.modeling.sum(pi) == 1.0)
c2 = (pi >= 0.0)
c4 = (pi <= 1.0)
minimax_value = variable(1,"minimax_value")
c3 = (minimax_value >= payoff_matrix*pi)
c3 = (minimax_value >= cvx_payoff_matrix*pi)
constraints = [c1,c2,c3,c4]
lp = op(minimax_value,constraints)
lp = op(-minimax_value,constraints)
lp.solve()
ffq_equilibrium = float(lp.objective.value()[0])
q_del = self.lr*(joint_rewards[agent_name] + self.dr*ffq_equilibrium)
policy = np.array(pi.value)
expected_val = np.amin(np.dot(payoff_matrix.T,policy))
ffq_equilibrium = expected_val
self.equilibria.append((ffq_equilibrium,next_state))
q_del = (joint_rewards[agent_name] + self.dr*ffq_equilibrium)
return q_del

Loading

0 comments on commit 0fcab63

Please sign in to comment.