source: src/main/java/uva/projectai/y2018/jasparon/PriorBeliefQlearningStrategy.java@ 126

Last change on this file since 126 was 126, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

Added input for strategyParameters to SessionPanel (gui)

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

Our own package:

  • Added opponents and strategies that are mentioned in the report
  • Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
  • States extend from AbstractState
File size: 3.6 KB
Line 
1package uva.projectai.y2018.jasparon;
2
3import genius.core.boaframework.NegotiationSession;
4import genius.core.boaframework.OpponentModel;
5
6import java.util.ArrayList;
7import java.util.Collections;
8import java.util.HashMap;
9import java.util.List;
10
11public class PriorBeliefQlearningStrategy extends QlearningStrategy {
12
13 public PriorBeliefQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
14 super(negotiationSession, opponentModel);
15 }
16
17 @Override
18 protected void initQTable() {
19 this.qTable = new HashMap<Integer, ArrayList<Double>>();
20
21 ArrayList<Double> initialBelief = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0));
22 initialBelief.set(this.state.getActionSize() - 1, 1.0);
23
24 // Initial state has different action space
25 this.qTable.putIfAbsent(this.state.hash(), initialBelief);
26 }
27
28 @Override
29 public String getName() {
30 return "Inverted Q-offering";
31 }
32
33 /**
34 * This is the general action function for the RL-agent. We determine a bin by either
35 * moving up (retracting offer), doing nothing or moving down (conceding offer).
36 * @param currentBin
37 * @return
38 */
39 @Override
40 protected int determineTargetBin(int currentBin) {
41 int targetBin = currentBin;
42 ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
43
44 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
45 int action = this.epsilonGreedy(qValues);
46 this.actions.add(action);
47
48 // Apply action current bin (ie. move up, down or stay)
49 switch (action) {
50 case 0: targetBin--;
51 break;
52 case 1: targetBin++;
53 break;
54 case 2: break;
55 }
56
57 System.out.println("Reactive bid:");
58 System.out.println(action);
59
60 // Can't go out of bounds
61 // TODO: Discuss impact on learning algorithm
62 targetBin = Math.min(targetBin, this.getNBins() - 1);
63 targetBin = Math.max(targetBin, 0);
64
65 return targetBin;
66
67
68 }
69
70 @Override
71 protected int determineOpeningBin() {
72 ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
73 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
74 int action = this.epsilonGreedy(qValues);
75 this.actions.add(action);
76
77 System.out.println("Opening bid:");
78 System.out.println(action);
79
80 return action;
81 }
82
83 @Override
84 protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
85 // initialize state if it is new
86
87 // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
88 // just 3 values (up/down/nothing).
89 ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 0.0));
90 ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
91
92 if (state.getActionSize() == this.getNBins()) {
93 stateDefaultActionValues.set(state.getActionSize() - 1, 1.0);
94 }
95 if (newState.getActionSize() == this.getNBins()) {
96 newStateDefaultActionValues.set(newState.getActionSize() - 1, 1.0);
97 }
98
99 // Make entries in qTable if they don't exist yet
100 this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
101 this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
102
103 // Perform update
104 Double Qnext = this.maxActionValue(newState);
105 Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
106 this.qTable.get(state.hash()).set(action, newActionValue);
107 }
108}
109
Note: See TracBrowser for help on using the repository browser.