1 | package agents.rlboa;
|
---|
2 |
|
---|
3 | import java.util.ArrayList;
|
---|
4 | import java.util.Collections;
|
---|
5 |
|
---|
6 | import genius.core.StrategyParameters;
|
---|
7 | import genius.core.boaframework.NegotiationSession;
|
---|
8 | import genius.core.boaframework.OpponentModel;
|
---|
9 |
|
---|
10 | public class QLambdaStrategy extends QlearningStrategy {
|
---|
11 |
|
---|
12 | double lambda;
|
---|
13 |
|
---|
14 | public QLambdaStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
|
---|
15 | super(negotiationSession, opponentModel);
|
---|
16 | // TODO Auto-generated constructor stub
|
---|
17 | }
|
---|
18 |
|
---|
19 | @Override
|
---|
20 | protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
|
---|
21 | // initialize state if it is new
|
---|
22 |
|
---|
23 | // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
|
---|
24 | // just 3 values (up/down/nothing).
|
---|
25 | ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 0.0));
|
---|
26 | ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 0.0));
|
---|
27 |
|
---|
28 | // Make entries in qTable if they don't exist yet
|
---|
29 | this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
|
---|
30 | this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
|
---|
31 |
|
---|
32 | // Perform update
|
---|
33 | Double Qnext = this.maxActionValue(newState);
|
---|
34 | Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
|
---|
35 | this.qTable.get(state.hash()).set(action, newActionValue);
|
---|
36 |
|
---|
37 |
|
---|
38 |
|
---|
39 | // Initialize eligibility trace if it is non-existing or empty
|
---|
40 |
|
---|
41 | // If
|
---|
42 |
|
---|
43 | // Add current state, action, reward to eligibility trace
|
---|
44 |
|
---|
45 | // If terminal state update all states in eligibility trace
|
---|
46 | }
|
---|
47 | public void setHyperparameters(StrategyParameters properties) {
|
---|
48 | super.setHyperparameters(properties);
|
---|
49 | this.lambda = properties.getValueAsDouble("lambda");
|
---|
50 | }
|
---|
51 | }
|
---|