source: src/main/java/agents/rlboa/InvertedQlearningStrategy.java

Last change on this file was 153, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

This commit finalized the RLBOA project and it is now ready for use

Our own package (uva.project.:

  • Moved to agents.rlboa
  • Added opponents and strategies that are mentioned in the report
  • Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
  • States extend from AbstractState
File size: 3.1 KB
Line 
1package agents.rlboa;
2
3import genius.core.boaframework.NegotiationSession;
4import genius.core.boaframework.OpponentModel;
5
6import java.util.ArrayList;
7import java.util.Collections;
8import java.util.HashMap;
9import java.util.List;
10
11public class InvertedQlearningStrategy extends QlearningStrategy {
12
13 public InvertedQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
14 super(negotiationSession, opponentModel);
15 }
16
17 @Override
18 protected void initQTable() {
19 this.qTable = new HashMap<Integer, ArrayList<Double>>();
20
21 // Initial state has different action space
22 this.qTable.putIfAbsent(this.state.hash(), new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0)));
23 }
24
25 @Override
26 public String getName() {
27 return "Inverted Q-offering";
28 }
29
30 /**
31 * This is the general action function for the RL-agent. We determine a bin by either
32 * moving up (retracting offer), doing nothing or moving down (conceding offer).
33 * @param currentBin
34 * @return
35 */
36 @Override
37 protected int determineTargetBin(int currentBin) {
38 int targetBin = currentBin;
39 ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
40
41 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
42 int action = this.epsilonGreedy(qValues);
43 this.actions.add(action);
44
45 // Apply action current bin (ie. move up, down or stay)
46 switch (action) {
47 case 0: targetBin--;
48 break;
49 case 1: targetBin++;
50 break;
51 case 2: break;
52 }
53
54 // Can't go out of bounds
55 // TODO: Discuss impact on learning algorithm
56 targetBin = Math.min(targetBin, this.getNBins() - 1);
57 targetBin = Math.max(targetBin, 0);
58
59 return targetBin;
60 }
61
62 @Override
63 protected int determineOpeningBin() {
64 ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
65 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
66 int action = this.epsilonGreedy(qValues);
67 this.actions.add(action);
68
69 return action;
70 }
71
72 @Override
73 protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
74 // initialize state if it is new
75
76 // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
77 // just 3 values (up/down/nothing).
78 ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 1.0));
79 ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
80
81 // Make entries in qTable if they don't exist yet
82 this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
83 this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
84
85 // Perform update
86 Double Qnext = this.maxActionValue(newState);
87 Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
88 this.qTable.get(state.hash()).set(action, newActionValue);
89 }
90}
91
Note: See TracBrowser for help on using the repository browser.