Context Navigation

InvertedQlearningStrategy.java@ 126

Last change on this file since 126 was 126, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

Added input for strategyParameters to SessionPanel (gui)

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

Our own package:

Added opponents and strategies that are mentioned in the report
Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
States extend from AbstractState

File size: 3.1 KB

Line
1	package uva.projectai.y2018.jasparon;
2
3	import genius.core.boaframework.NegotiationSession;
4	import genius.core.boaframework.OpponentModel;
5
6	import java.util.ArrayList;
7	import java.util.Collections;
8	import java.util.HashMap;
9	import java.util.List;
10
11	public class InvertedQlearningStrategy extends QlearningStrategy {
12
13	public InvertedQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
14	super(negotiationSession, opponentModel);
15	}
16
17	@Override
18	protected void initQTable() {
19	this.qTable = new HashMap<Integer, ArrayList<Double>>();
20
21	// Initial state has different action space
22	this.qTable.putIfAbsent(this.state.hash(), new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0)));
23	}
24
25	@Override
26	public String getName() {
27	return "Inverted Q-offering";
28	}
29
30	/**
31	* This is the general action function for the RL-agent. We determine a bin by either
32	* moving up (retracting offer), doing nothing or moving down (conceding offer).
33	* @param currentBin
34	* @return
35	*/
36	@Override
37	protected int determineTargetBin(int currentBin) {
38	int targetBin = currentBin;
39	ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
40
41	List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
42	int action = this.epsilonGreedy(qValues);
43	this.actions.add(action);
44
45	// Apply action current bin (ie. move up, down or stay)
46	switch (action) {
47	case 0: targetBin--;
48	break;
49	case 1: targetBin++;
50	break;
51	case 2: break;
52	}
53
54	// Can't go out of bounds
55	// TODO: Discuss impact on learning algorithm
56	targetBin = Math.min(targetBin, this.getNBins() - 1);
57	targetBin = Math.max(targetBin, 0);
58
59	return targetBin;
60	}
61
62	@Override
63	protected int determineOpeningBin() {
64	ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
65	List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
66	int action = this.epsilonGreedy(qValues);
67	this.actions.add(action);
68
69	return action;
70	}
71
72	@Override
73	protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
74	// initialize state if it is new
75
76	// If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
77	// just 3 values (up/down/nothing).
78	ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 1.0));
79	ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
80
81	// Make entries in qTable if they don't exist yet
82	this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
83	this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
84
85	// Perform update
86	Double Qnext = this.maxActionValue(newState);
87	Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
88	this.qTable.get(state.hash()).set(action, newActionValue);
89	}
90	}
91

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: src/main/java/uva/projectai/y2018/jasparon/InvertedQlearningStrategy.java@ 126

Download in other formats: