source: src/main/java/agents/rlboa/PriorBeliefQlearningStrategy.java

Last change on this file was 153, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

This commit finalized the RLBOA project and it is now ready for use

Our own package (uva.project.:

  • Moved to agents.rlboa
  • Added opponents and strategies that are mentioned in the report
  • Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
  • States extend from AbstractState
File size: 3.6 KB
Line 
1package agents.rlboa;
2
3import genius.core.boaframework.NegotiationSession;
4import genius.core.boaframework.OpponentModel;
5
6import java.util.ArrayList;
7import java.util.Collections;
8import java.util.HashMap;
9import java.util.List;
10
11public class PriorBeliefQlearningStrategy extends QlearningStrategy {
12
13 public PriorBeliefQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
14 super(negotiationSession, opponentModel);
15 }
16
17 @Override
18 protected void initQTable() {
19 this.qTable = new HashMap<Integer, ArrayList<Double>>();
20
21 ArrayList<Double> initialBelief = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0));
22 initialBelief.set(this.state.getActionSize() - 1, 1.0);
23
24 // Initial state has different action space
25 this.qTable.putIfAbsent(this.state.hash(), initialBelief);
26 }
27
28 @Override
29 public String getName() {
30 return "Inverted Q-offering";
31 }
32
33 /**
34 * This is the general action function for the RL-agent. We determine a bin by either
35 * moving up (retracting offer), doing nothing or moving down (conceding offer).
36 * @param currentBin
37 * @return
38 */
39 @Override
40 protected int determineTargetBin(int currentBin) {
41 int targetBin = currentBin;
42 ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
43
44 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
45 int action = this.epsilonGreedy(qValues);
46 this.actions.add(action);
47
48 // Apply action current bin (ie. move up, down or stay)
49 switch (action) {
50 case 0: targetBin--;
51 break;
52 case 1: targetBin++;
53 break;
54 case 2: break;
55 }
56
57 System.out.println("Reactive bid:");
58 System.out.println(action);
59
60 // Can't go out of bounds
61 // TODO: Discuss impact on learning algorithm
62 targetBin = Math.min(targetBin, this.getNBins() - 1);
63 targetBin = Math.max(targetBin, 0);
64
65 return targetBin;
66
67
68 }
69
70 @Override
71 protected int determineOpeningBin() {
72 ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
73 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
74 int action = this.epsilonGreedy(qValues);
75 this.actions.add(action);
76
77 System.out.println("Opening bid:");
78 System.out.println(action);
79
80 return action;
81 }
82
83 @Override
84 protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
85 // initialize state if it is new
86
87 // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
88 // just 3 values (up/down/nothing).
89 ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 0.0));
90 ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
91
92 if (state.getActionSize() == this.getNBins()) {
93 stateDefaultActionValues.set(state.getActionSize() - 1, 1.0);
94 }
95 if (newState.getActionSize() == this.getNBins()) {
96 newStateDefaultActionValues.set(newState.getActionSize() - 1, 1.0);
97 }
98
99 // Make entries in qTable if they don't exist yet
100 this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
101 this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
102
103 // Perform update
104 Double Qnext = this.maxActionValue(newState);
105 Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
106 this.qTable.get(state.hash()).set(action, newActionValue);
107 }
108}
109
Note: See TracBrowser for help on using the repository browser.