source: src/main/java/uva/projectai/y2018/jasparon/InvertedQlearningStrategy.java@ 126

Last change on this file since 126 was 126, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

Added input for strategyParameters to SessionPanel (gui)

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

Our own package:

  • Added opponents and strategies that are mentioned in the report
  • Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
  • States extend from AbstractState
File size: 3.1 KB
Line 
1package uva.projectai.y2018.jasparon;
2
3import genius.core.boaframework.NegotiationSession;
4import genius.core.boaframework.OpponentModel;
5
6import java.util.ArrayList;
7import java.util.Collections;
8import java.util.HashMap;
9import java.util.List;
10
11public class InvertedQlearningStrategy extends QlearningStrategy {
12
13 public InvertedQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
14 super(negotiationSession, opponentModel);
15 }
16
17 @Override
18 protected void initQTable() {
19 this.qTable = new HashMap<Integer, ArrayList<Double>>();
20
21 // Initial state has different action space
22 this.qTable.putIfAbsent(this.state.hash(), new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0)));
23 }
24
25 @Override
26 public String getName() {
27 return "Inverted Q-offering";
28 }
29
30 /**
31 * This is the general action function for the RL-agent. We determine a bin by either
32 * moving up (retracting offer), doing nothing or moving down (conceding offer).
33 * @param currentBin
34 * @return
35 */
36 @Override
37 protected int determineTargetBin(int currentBin) {
38 int targetBin = currentBin;
39 ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
40
41 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
42 int action = this.epsilonGreedy(qValues);
43 this.actions.add(action);
44
45 // Apply action current bin (ie. move up, down or stay)
46 switch (action) {
47 case 0: targetBin--;
48 break;
49 case 1: targetBin++;
50 break;
51 case 2: break;
52 }
53
54 // Can't go out of bounds
55 // TODO: Discuss impact on learning algorithm
56 targetBin = Math.min(targetBin, this.getNBins() - 1);
57 targetBin = Math.max(targetBin, 0);
58
59 return targetBin;
60 }
61
62 @Override
63 protected int determineOpeningBin() {
64 ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
65 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
66 int action = this.epsilonGreedy(qValues);
67 this.actions.add(action);
68
69 return action;
70 }
71
72 @Override
73 protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
74 // initialize state if it is new
75
76 // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
77 // just 3 values (up/down/nothing).
78 ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 1.0));
79 ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
80
81 // Make entries in qTable if they don't exist yet
82 this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
83 this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
84
85 // Perform update
86 Double Qnext = this.maxActionValue(newState);
87 Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
88 this.qTable.get(state.hash()).set(action, newActionValue);
89 }
90}
91
Note: See TracBrowser for help on using the repository browser.