Context Navigation

PriorBeliefQlearningStrategy.java

Last change on this file was 153, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

This commit finalized the RLBOA project and it is now ready for use

Our own package (uva.project.:

Moved to agents.rlboa
Added opponents and strategies that are mentioned in the report
Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
States extend from AbstractState

File size: 3.6 KB

Line
1	package agents.rlboa;
2
3	import genius.core.boaframework.NegotiationSession;
4	import genius.core.boaframework.OpponentModel;
5
6	import java.util.ArrayList;
7	import java.util.Collections;
8	import java.util.HashMap;
9	import java.util.List;
10
11	public class PriorBeliefQlearningStrategy extends QlearningStrategy {
12
13	public PriorBeliefQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
14	super(negotiationSession, opponentModel);
15	}
16
17	@Override
18	protected void initQTable() {
19	this.qTable = new HashMap<Integer, ArrayList<Double>>();
20
21	ArrayList<Double> initialBelief = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0));
22	initialBelief.set(this.state.getActionSize() - 1, 1.0);
23
24	// Initial state has different action space
25	this.qTable.putIfAbsent(this.state.hash(), initialBelief);
26	}
27
28	@Override
29	public String getName() {
30	return "Inverted Q-offering";
31	}
32
33	/**
34	* This is the general action function for the RL-agent. We determine a bin by either
35	* moving up (retracting offer), doing nothing or moving down (conceding offer).
36	* @param currentBin
37	* @return
38	*/
39	@Override
40	protected int determineTargetBin(int currentBin) {
41	int targetBin = currentBin;
42	ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
43
44	List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
45	int action = this.epsilonGreedy(qValues);
46	this.actions.add(action);
47
48	// Apply action current bin (ie. move up, down or stay)
49	switch (action) {
50	case 0: targetBin--;
51	break;
52	case 1: targetBin++;
53	break;
54	case 2: break;
55	}
56
57	System.out.println("Reactive bid:");
58	System.out.println(action);
59
60	// Can't go out of bounds
61	// TODO: Discuss impact on learning algorithm
62	targetBin = Math.min(targetBin, this.getNBins() - 1);
63	targetBin = Math.max(targetBin, 0);
64
65	return targetBin;
66
67
68	}
69
70	@Override
71	protected int determineOpeningBin() {
72	ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
73	List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
74	int action = this.epsilonGreedy(qValues);
75	this.actions.add(action);
76
77	System.out.println("Opening bid:");
78	System.out.println(action);
79
80	return action;
81	}
82
83	@Override
84	protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
85	// initialize state if it is new
86
87	// If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
88	// just 3 values (up/down/nothing).
89	ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 0.0));
90	ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
91
92	if (state.getActionSize() == this.getNBins()) {
93	stateDefaultActionValues.set(state.getActionSize() - 1, 1.0);
94	}
95	if (newState.getActionSize() == this.getNBins()) {
96	newStateDefaultActionValues.set(newState.getActionSize() - 1, 1.0);
97	}
98
99	// Make entries in qTable if they don't exist yet
100	this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
101	this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
102
103	// Perform update
104	Double Qnext = this.maxActionValue(newState);
105	Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
106	this.qTable.get(state.hash()).set(action, newActionValue);
107	}
108	}
109

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: src/main/java/agents/rlboa/PriorBeliefQlearningStrategy.java

Download in other formats: