[153] | 1 | package agents.rlboa;
|
---|
[67] | 2 |
|
---|
| 3 | import genius.core.boaframework.NegotiationSession;
|
---|
| 4 | import genius.core.boaframework.OpponentModel;
|
---|
| 5 |
|
---|
| 6 | import java.util.ArrayList;
|
---|
| 7 | import java.util.Collections;
|
---|
| 8 | import java.util.HashMap;
|
---|
| 9 | import java.util.List;
|
---|
| 10 |
|
---|
| 11 | public class PriorBeliefQlearningStrategy extends QlearningStrategy {
|
---|
| 12 |
|
---|
| 13 | public PriorBeliefQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
|
---|
| 14 | super(negotiationSession, opponentModel);
|
---|
| 15 | }
|
---|
| 16 |
|
---|
| 17 | @Override
|
---|
| 18 | protected void initQTable() {
|
---|
| 19 | this.qTable = new HashMap<Integer, ArrayList<Double>>();
|
---|
| 20 |
|
---|
| 21 | ArrayList<Double> initialBelief = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0));
|
---|
| 22 | initialBelief.set(this.state.getActionSize() - 1, 1.0);
|
---|
| 23 |
|
---|
| 24 | // Initial state has different action space
|
---|
| 25 | this.qTable.putIfAbsent(this.state.hash(), initialBelief);
|
---|
| 26 | }
|
---|
| 27 |
|
---|
| 28 | @Override
|
---|
| 29 | public String getName() {
|
---|
| 30 | return "Inverted Q-offering";
|
---|
| 31 | }
|
---|
| 32 |
|
---|
| 33 | /**
|
---|
| 34 | * This is the general action function for the RL-agent. We determine a bin by either
|
---|
| 35 | * moving up (retracting offer), doing nothing or moving down (conceding offer).
|
---|
| 36 | * @param currentBin
|
---|
| 37 | * @return
|
---|
| 38 | */
|
---|
| 39 | @Override
|
---|
| 40 | protected int determineTargetBin(int currentBin) {
|
---|
| 41 | int targetBin = currentBin;
|
---|
| 42 | ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
|
---|
| 43 |
|
---|
| 44 | List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
|
---|
| 45 | int action = this.epsilonGreedy(qValues);
|
---|
| 46 | this.actions.add(action);
|
---|
| 47 |
|
---|
| 48 | // Apply action current bin (ie. move up, down or stay)
|
---|
| 49 | switch (action) {
|
---|
| 50 | case 0: targetBin--;
|
---|
| 51 | break;
|
---|
| 52 | case 1: targetBin++;
|
---|
| 53 | break;
|
---|
| 54 | case 2: break;
|
---|
| 55 | }
|
---|
| 56 |
|
---|
| 57 | System.out.println("Reactive bid:");
|
---|
| 58 | System.out.println(action);
|
---|
| 59 |
|
---|
| 60 | // Can't go out of bounds
|
---|
| 61 | // TODO: Discuss impact on learning algorithm
|
---|
| 62 | targetBin = Math.min(targetBin, this.getNBins() - 1);
|
---|
| 63 | targetBin = Math.max(targetBin, 0);
|
---|
| 64 |
|
---|
| 65 | return targetBin;
|
---|
| 66 |
|
---|
| 67 |
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 | @Override
|
---|
| 71 | protected int determineOpeningBin() {
|
---|
| 72 | ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
|
---|
| 73 | List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
|
---|
| 74 | int action = this.epsilonGreedy(qValues);
|
---|
| 75 | this.actions.add(action);
|
---|
| 76 |
|
---|
| 77 | System.out.println("Opening bid:");
|
---|
| 78 | System.out.println(action);
|
---|
| 79 |
|
---|
| 80 | return action;
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 | @Override
|
---|
[153] | 84 | protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
|
---|
[67] | 85 | // initialize state if it is new
|
---|
| 86 |
|
---|
| 87 | // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
|
---|
| 88 | // just 3 values (up/down/nothing).
|
---|
| 89 | ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 0.0));
|
---|
| 90 | ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
|
---|
| 91 |
|
---|
| 92 | if (state.getActionSize() == this.getNBins()) {
|
---|
| 93 | stateDefaultActionValues.set(state.getActionSize() - 1, 1.0);
|
---|
| 94 | }
|
---|
| 95 | if (newState.getActionSize() == this.getNBins()) {
|
---|
| 96 | newStateDefaultActionValues.set(newState.getActionSize() - 1, 1.0);
|
---|
| 97 | }
|
---|
| 98 |
|
---|
| 99 | // Make entries in qTable if they don't exist yet
|
---|
| 100 | this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
|
---|
| 101 | this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
|
---|
| 102 |
|
---|
| 103 | // Perform update
|
---|
| 104 | Double Qnext = this.maxActionValue(newState);
|
---|
| 105 | Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
|
---|
| 106 | this.qTable.get(state.hash()).set(action, newActionValue);
|
---|
| 107 | }
|
---|
| 108 | }
|
---|
| 109 |
|
---|