[153] | 1 | package agents.rlboa;
|
---|
[67] | 2 |
|
---|
| 3 | import genius.core.boaframework.NegotiationSession;
|
---|
| 4 | import genius.core.boaframework.OpponentModel;
|
---|
| 5 |
|
---|
| 6 | import java.util.ArrayList;
|
---|
| 7 | import java.util.Collections;
|
---|
| 8 | import java.util.HashMap;
|
---|
| 9 | import java.util.List;
|
---|
| 10 |
|
---|
| 11 | public class InvertedQlearningStrategy extends QlearningStrategy {
|
---|
| 12 |
|
---|
| 13 | public InvertedQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
|
---|
| 14 | super(negotiationSession, opponentModel);
|
---|
| 15 | }
|
---|
| 16 |
|
---|
| 17 | @Override
|
---|
| 18 | protected void initQTable() {
|
---|
| 19 | this.qTable = new HashMap<Integer, ArrayList<Double>>();
|
---|
| 20 |
|
---|
| 21 | // Initial state has different action space
|
---|
| 22 | this.qTable.putIfAbsent(this.state.hash(), new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0)));
|
---|
| 23 | }
|
---|
| 24 |
|
---|
| 25 | @Override
|
---|
| 26 | public String getName() {
|
---|
| 27 | return "Inverted Q-offering";
|
---|
| 28 | }
|
---|
| 29 |
|
---|
| 30 | /**
|
---|
| 31 | * This is the general action function for the RL-agent. We determine a bin by either
|
---|
| 32 | * moving up (retracting offer), doing nothing or moving down (conceding offer).
|
---|
| 33 | * @param currentBin
|
---|
| 34 | * @return
|
---|
| 35 | */
|
---|
| 36 | @Override
|
---|
| 37 | protected int determineTargetBin(int currentBin) {
|
---|
| 38 | int targetBin = currentBin;
|
---|
| 39 | ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
|
---|
| 40 |
|
---|
| 41 | List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
|
---|
| 42 | int action = this.epsilonGreedy(qValues);
|
---|
| 43 | this.actions.add(action);
|
---|
| 44 |
|
---|
| 45 | // Apply action current bin (ie. move up, down or stay)
|
---|
| 46 | switch (action) {
|
---|
| 47 | case 0: targetBin--;
|
---|
| 48 | break;
|
---|
| 49 | case 1: targetBin++;
|
---|
| 50 | break;
|
---|
| 51 | case 2: break;
|
---|
| 52 | }
|
---|
| 53 |
|
---|
| 54 | // Can't go out of bounds
|
---|
| 55 | // TODO: Discuss impact on learning algorithm
|
---|
| 56 | targetBin = Math.min(targetBin, this.getNBins() - 1);
|
---|
| 57 | targetBin = Math.max(targetBin, 0);
|
---|
| 58 |
|
---|
| 59 | return targetBin;
|
---|
| 60 | }
|
---|
| 61 |
|
---|
| 62 | @Override
|
---|
| 63 | protected int determineOpeningBin() {
|
---|
| 64 | ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
|
---|
| 65 | List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
|
---|
| 66 | int action = this.epsilonGreedy(qValues);
|
---|
| 67 | this.actions.add(action);
|
---|
| 68 |
|
---|
| 69 | return action;
|
---|
| 70 | }
|
---|
| 71 |
|
---|
| 72 | @Override
|
---|
[153] | 73 | protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
|
---|
[67] | 74 | // initialize state if it is new
|
---|
| 75 |
|
---|
| 76 | // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
|
---|
| 77 | // just 3 values (up/down/nothing).
|
---|
| 78 | ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 1.0));
|
---|
| 79 | ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
|
---|
| 80 |
|
---|
| 81 | // Make entries in qTable if they don't exist yet
|
---|
| 82 | this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
|
---|
| 83 | this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
|
---|
| 84 |
|
---|
| 85 | // Perform update
|
---|
| 86 | Double Qnext = this.maxActionValue(newState);
|
---|
| 87 | Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
|
---|
| 88 | this.qTable.get(state.hash()).set(action, newActionValue);
|
---|
| 89 | }
|
---|
| 90 | }
|
---|
| 91 |
|
---|