source: src/main/java/uva/projectai/y2018/jasparon/ 126

Last change on this file since 126 was 126, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to

Moved code to add RLBOA listeners to RLBOAUtils is misc package

Added input for strategyParameters to SessionPanel (gui)

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

Our own package:

  • Added opponents and strategies that are mentioned in the report
  • Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
  • States extend from AbstractState
File size: 10.8 KB
1package uva.projectai.y2018.jasparon;
3import genius.core.StrategyParameters;
4import genius.core.bidding.BidDetails;
5import genius.core.boaframework.NegotiationSession;
6import genius.core.boaframework.OfferingStrategy;
7import genius.core.boaframework.OpponentModel;
8import genius.core.boaframework.OutcomeSpace;
9import genius.core.boaframework.SortedOutcomeSpace;
10import genius.core.misc.Range;
12import java.util.ArrayList;
13import java.util.Collections;
14import java.util.HashMap;
15import java.util.List;
16import java.util.Random;
18public class QlearningStrategy extends OfferingStrategy {
20 protected HashMap<Integer, ArrayList<Double>> qTable;
21 protected ArrayList<Integer> actions = new ArrayList<Integer>();
22 protected int bins;
23 protected Double eps;
24 protected Double alpha;
25 protected Double gamma;
26 protected AbstractState state;
27 protected String mode;
28 protected int timeBins;
29 protected Range minMaxBin;
31 public QlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
32 super.init(negotiationSession, null);
33 this.opponentModel = opponentModel;
34 this.endNegotiation = false;
35 this.state = State.INITIAL;
37 OutcomeSpace outcomeSpace = new SortedOutcomeSpace(negotiationSession.getUtilitySpace());
38 this.negotiationSession.setOutcomeSpace(outcomeSpace);
39 }
41 public ArrayList<Integer> getActions() {
42 return this.actions;
43 }
45 /**
46 * @return int representing the last action taken by the strategy
47 * @throws IndexOutOfBoundsException if called before any action has been performed
48 */
49 public int getLastAction() throws IndexOutOfBoundsException {
50 return this.actions.get(this.actions.size() - 1);
51 }
53 public void setMinMaxBin(Range minMaxBin) {
54 this.minMaxBin = minMaxBin;
55 }
57 public Range getMinMaxBin() {
58 return this.minMaxBin;
59 }
61 protected void initQTable() {
62 this.qTable = new HashMap<Integer, ArrayList<Double>>();
64 // Initial state has different action space
65 this.qTable.putIfAbsent(this.state.hash(), new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0)));
66 }
68 public void initQtable(HashMap<Integer, ArrayList<Double>> qTable) {
69 if (qTable != null) {
70 this.qTable = qTable;
71 }
72 else {
73 this.initQTable();
74 }
75 }
77 @Override
78 public BidDetails determineOpeningBid() {
79 // Open the negotiation with a free bid (one of N bins)
80 int targetBin = this.determineOpeningBin();
81 return this.pickBidInBin(targetBin);
82 }
84 @Override
85 public BidDetails determineNextBid() {
86 // HACK(?) this QlearningStrategy works for all states that represent the world in bins,
87 // so we needed a way to recognize these. Therefore the interface BinnedRepresentation
88 int targetBin = this.determineTargetBin(((BinnedRepresentation) this.state).getMyBin());
89 return this.pickBidInBin(targetBin);
90 }
92 @Override
93 public String getName() {
94 return "Q-Offering";
95 }
97 /**
98 * Check if the bid falls inside the lower and upper bounds
99 * @param lower lower bound of utility (inclusive)
100 * @param upper upper bound of utility (exclusive)
101 * @param bidDetails bid to check (has util and time)
102 * @return boolean
103 */
104 private boolean isInBin(double lower, double upper, BidDetails bidDetails) {
105 double myUtil = bidDetails.getMyUndiscountedUtil();
106 return myUtil < upper && myUtil >= lower;
107 }
109 /**
110 * Make the opponent model select a bid that is in the provided target bin
111 * @param targetBin index of the bin in which to pick a bid
112 * @return BidDetails of the selected bid
113 */
114 protected BidDetails pickBidInBin(int targetBin) {
116 double lowerBound = targetBin * this.getBinSize();
117 double upperBound = lowerBound + this.getBinSize();
119 // getBidsInRange behaves weirdly and returns bids that are outise of there range (false positives)
120 List<BidDetails> bidsInRange = this.negotiationSession.getOutcomeSpace().getBidsinRange(new Range(lowerBound, upperBound));
121 bidsInRange.removeIf( bid -> !this.isInBin(lowerBound, upperBound, bid) );
123 // If there are no bids possible within this bin, recursively choose another bin by the following logic:
124 // if you conceded this round, concede further, etc.
125 if (bidsInRange.isEmpty()) {
127 Random random = new Random();
128 int newBin = 0;
129 int direction = -1;
131 // Check if this is the opening action or not; if it is we just pick randomly
132 if (this.actions.size() > 1) {
133 direction = this.actions.get(this.actions.size() - 1);
134 } else {
135 newBin = random.nextInt(this.bins);
136 }
138 // conceded last time
139 if (direction == 0) {
140 newBin = determineTargetBin(targetBin - 1);
141 }
143 // retracted last time
144 if (direction == 1) {
145 newBin = determineTargetBin(targetBin + 1);
146 }
148 // stayed last time
149 if (direction == 2) {
150 int randomUpOrDown = random.nextBoolean() ? 1 : -1;
151 newBin = determineTargetBin(targetBin + randomUpOrDown);
152 }
154 return this.pickBidInBin(newBin);
155 }
158 return this.maxBidForOpponent(bidsInRange);
159 }
161 /**
162 * This is the general action function for the RL-agent. We determine a bin by either
163 * moving up (retracting offer), doing nothing or moving down (conceding offer).
164 * @param currentBin
165 * @return
166 */
167 protected int determineTargetBin(int currentBin) {
168 int targetBin = currentBin;
169 ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0));
171 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
172 int action = this.epsilonGreedy(qValues);
173 this.actions.add(action);
175 // Apply action current bin (ie. move up, down or stay)
176 switch (action) {
177 case 0: targetBin--;
178 break;
179 case 1: targetBin++;
180 break;
181 case 2: break;
182 }
184 // Can't go outside of the range of relevant bins.
185 targetBin = Math.min(targetBin, (int) this.minMaxBin.getUpperbound());
186 targetBin = Math.max(targetBin, (int) this.minMaxBin.getLowerbound());
188 return targetBin;
189 }
191 protected int determineOpeningBin() {
192 ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0));
193 List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
194 int action = this.epsilonGreedy(qValues);
195 this.actions.add(action);
197 return action;
198 }
200 /**
201 * @param list List of doubles
202 * @return The index of the highest value in the list
203 */
204 protected int indifferentArgMax(List<Double> list) {
205 double maximum = Collections.max(list);
207 List<Integer> maximaIdxs = new ArrayList<Integer>();
209 // collect indices of all occurrences of maximum
210 for (int i = 0; i < list.size(); i++) {
211 if (list.get(i) == maximum) {
212 maximaIdxs.add(i);
213 }
214 }
216 // pick a random index from the list (this is the indifferent part)
217 Random rnd = new Random();
218 int choice = rnd.nextInt(maximaIdxs.size());
220 return maximaIdxs.get(choice);
221 }
223 protected int epsilonGreedy(List<Double> qValues) {
224 int action;
226 // With probability epsilon, pick a random action (epsilon greedy)
227 if (Math.random() < this.eps && this.isTraining()) {
228 Random random = new Random();
229 action = random.nextInt(qValues.size());
230 }
231 else {
232 action = this.indifferentArgMax(qValues);
233 }
235 return action;
236 }
238 /**
239 * @return The number of bins in which the each utility axis is divided
240 */
241 int getNBins() {
242 return this.bins;
243 }
245 /**
246 * @return The width of the bins in which the each utility axis is divided
247 */
248 protected double getBinSize() {
249 return 1.0 / this.getNBins();
250 }
252 /**
253 * Setter for the state property
254 * @param state new {@link State}
255 *
256 */
257 protected void setState(State state) {
258 this.state = state;
259 }
261 /**
262 * Getter for the state property
263 * @return
264 */
265 protected AbstractState getState() {
266 return this.state;
267 }
269 /**
270 * Determine the bid with the highest expected utility for the opponent from a list of bids
271 * @param bids
272 * @return BidDetails with representing the maximum bid
273 */
274 protected BidDetails maxBidForOpponent(List<BidDetails> bids) {
275 BidDetails maxBid = null;
277 for (BidDetails bid : bids) {
278 if (maxBid == null) {
279 maxBid = bid;
280 }
281 else if (this.opponentModel.getBidEvaluation(bid.getBid()) > this.opponentModel.getBidEvaluation(maxBid.getBid())) {
282 maxBid = bid;
283 }
284 }
286 return maxBid;
287 }
289 /**
290 * Gets called by Negotiator when a relevant negotiation event occurs
291 * @param reward
292 * @param newState
293 */
294 public void observeEnvironment(double reward, AbstractState newState) {
296 // Only start updating after an action is performed
297 // Only update if training is enabled
298 if (this.actions.size() > 0 && this.isTraining()) {
299 this.updateQFuction(this.state, this.getLastAction(), reward, newState);
300 }
301 this.state = newState;
302 }
304 public HashMap<Integer, ArrayList<Double>> getQTable() {
305 return this.qTable;
306 }
308 protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
309 // initialize state if it is new
311 // If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
312 // just 3 values (up/down/nothing).
313 ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 0.0));
314 ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 0.0));
316 // Make entries in qTable if they don't exist yet
317 this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
318 this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
321 // To remind ourselves that the below function is correct =>
322 // the gamma comes from the domain/preference profile through reward which is discounted.
323 Double gamma = 1.0;
324 // Perform update
325 Double Qnext = this.maxActionValue(newState);
326 Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + gamma * Qnext - this.qFunction(state, action));
327 this.qTable.get(state.hash()).set(action, newActionValue);
328 }
330 /**
331 * Determine max_a Q(s,a)
332 * @param state The hash of the state for which to retrieve the max action value
333 * @return Value of optimal action given that can be taken in the provided state (0 if state is unknown)
334 */
335 protected Double maxActionValue(AbstractState state) {
336 return Collections.max(this.qTable.get(state.hash()));
337 }
339 /**
340 * Get the Q value associated with the provided (state, action) pair.
341 * @param state
342 * @param action
343 * @return
344 */
345 protected Double qFunction(AbstractState state, int action) {
346 ArrayList<Double> actionValues = this.qTable.get(state.hash());
347 return actionValues.get(action);
348 }
350 public void setHyperparameters(StrategyParameters properties) {
351 this.eps = properties.getValueAsDouble("epsilon");
352 this.alpha = properties.getValueAsDouble("alpha");
353 this.bins = (int) properties.getValueAsDouble("bins");
354 this.mode = properties.getValueAsString("_mode");
355 this.timeBins = (int) properties.getValueAsDouble("time_bins");
356 }
358 protected boolean isTraining() {
359 return this.mode.equals("train");
360 }
Note: See TracBrowser for help on using the repository browser.