[153] | 1 | package agents.rlboa;
|
---|
[67] | 2 |
|
---|
| 3 | import java.util.ArrayList;
|
---|
| 4 | import java.util.Collections;
|
---|
| 5 |
|
---|
| 6 | import genius.core.StrategyParameters;
|
---|
| 7 | import genius.core.boaframework.NegotiationSession;
|
---|
| 8 | import genius.core.boaframework.OpponentModel;
|
---|
| 9 |
|
---|
| 10 | public class QLambdaStrategy extends QlearningStrategy {
|
---|
| 11 |
|
---|
| 12 | double lambda;
|
---|
| 13 |
|
---|
| 14 | public QLambdaStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
|
---|
| 15 | super(negotiationSession, opponentModel);
|
---|
| 16 | // TODO Auto-generated constructor stub
|
---|
| 17 | }
|
---|
| 18 |
|
---|
| 19 | @Override
|
---|
[153] | 20 | protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
|
---|
[67] | 21 | // initialize state if it is new
|
---|
| 22 |
|
---|
| 23 | // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
|
---|
| 24 | // just 3 values (up/down/nothing).
|
---|
| 25 | ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 0.0));
|
---|
| 26 | ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 0.0));
|
---|
| 27 |
|
---|
| 28 | // Make entries in qTable if they don't exist yet
|
---|
| 29 | this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
|
---|
| 30 | this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
|
---|
| 31 |
|
---|
| 32 | // Perform update
|
---|
| 33 | Double Qnext = this.maxActionValue(newState);
|
---|
| 34 | Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
|
---|
| 35 | this.qTable.get(state.hash()).set(action, newActionValue);
|
---|
| 36 |
|
---|
| 37 |
|
---|
| 38 |
|
---|
| 39 | // Initialize eligibility trace if it is non-existing or empty
|
---|
| 40 |
|
---|
| 41 | // If
|
---|
| 42 |
|
---|
| 43 | // Add current state, action, reward to eligibility trace
|
---|
| 44 |
|
---|
| 45 | // If terminal state update all states in eligibility trace
|
---|
| 46 | }
|
---|
| 47 | public void setHyperparameters(StrategyParameters properties) {
|
---|
| 48 | super.setHyperparameters(properties);
|
---|
| 49 | this.lambda = properties.getValueAsDouble("lambda");
|
---|
| 50 | }
|
---|
| 51 | }
|
---|