Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

QlearningStrategy.java@ 345

Last change on this file since 345 was 153, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

This commit finalized the RLBOA project and it is now ready for use

Our own package (uva.project.:

Moved to agents.rlboa
Added opponents and strategies that are mentioned in the report
Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
States extend from AbstractState

File size: 10.8 KB

Rev	Line
[153]	1	package agents.rlboa;
[67]	2
	3	import genius.core.StrategyParameters;
	4	import genius.core.bidding.BidDetails;
	5	import genius.core.boaframework.NegotiationSession;
	6	import genius.core.boaframework.OfferingStrategy;
	7	import genius.core.boaframework.OpponentModel;
	8	import genius.core.boaframework.OutcomeSpace;
	9	import genius.core.boaframework.SortedOutcomeSpace;
	10	import genius.core.misc.Range;
	11
	12	import java.util.ArrayList;
	13	import java.util.Collections;
	14	import java.util.HashMap;
	15	import java.util.List;
	16	import java.util.Random;
	17
	18	public class QlearningStrategy extends OfferingStrategy {
	19
	20	protected HashMap<Integer, ArrayList<Double>> qTable;
	21	protected ArrayList<Integer> actions = new ArrayList<Integer>();
	22	protected int bins;
	23	protected Double eps;
	24	protected Double alpha;
	25	protected Double gamma;
[153]	26	protected AbstractState state;
	27	protected String mode;
	28	protected int timeBins;
	29	protected Range minMaxBin;
[67]	30
	31	public QlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
	32	super.init(negotiationSession, null);
	33	this.opponentModel = opponentModel;
	34	this.endNegotiation = false;
	35	this.state = State.INITIAL;
[153]	36
[67]	37	OutcomeSpace outcomeSpace = new SortedOutcomeSpace(negotiationSession.getUtilitySpace());
	38	this.negotiationSession.setOutcomeSpace(outcomeSpace);
	39	}
	40
	41	public ArrayList<Integer> getActions() {
	42	return this.actions;
	43	}
	44
	45	/**
	46	* @return int representing the last action taken by the strategy
	47	* @throws IndexOutOfBoundsException if called before any action has been performed
	48	*/
	49	public int getLastAction() throws IndexOutOfBoundsException {
	50	return this.actions.get(this.actions.size() - 1);
	51	}
[153]	52
	53	public void setMinMaxBin(Range minMaxBin) {
	54	this.minMaxBin = minMaxBin;
	55	}
	56
	57	public Range getMinMaxBin() {
	58	return this.minMaxBin;
	59	}
[67]	60
	61	protected void initQTable() {
	62	this.qTable = new HashMap<Integer, ArrayList<Double>>();
	63
	64	// Initial state has different action space
	65	this.qTable.putIfAbsent(this.state.hash(), new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0)));
	66	}
	67
	68	public void initQtable(HashMap<Integer, ArrayList<Double>> qTable) {
	69	if (qTable != null) {
	70	this.qTable = qTable;
	71	}
	72	else {
	73	this.initQTable();
	74	}
	75	}
	76
	77	@Override
	78	public BidDetails determineOpeningBid() {
	79	// Open the negotiation with a free bid (one of N bins)
	80	int targetBin = this.determineOpeningBin();
	81	return this.pickBidInBin(targetBin);
	82	}
	83
	84	@Override
	85	public BidDetails determineNextBid() {
[153]	86	// HACK(?) this QlearningStrategy works for all states that represent the world in bins,
	87	// so we needed a way to recognize these. Therefore the interface BinnedRepresentation
	88	int targetBin = this.determineTargetBin(((BinnedRepresentation) this.state).getMyBin());
[67]	89	return this.pickBidInBin(targetBin);
	90	}
	91
	92	@Override
	93	public String getName() {
	94	return "Q-Offering";
	95	}
	96
	97	/**
[153]	98	* Check if the bid falls inside the lower and upper bounds
	99	* @param lower lower bound of utility (inclusive)
	100	* @param upper upper bound of utility (exclusive)
	101	* @param bidDetails bid to check (has util and time)
	102	* @return boolean
	103	*/
	104	private boolean isInBin(double lower, double upper, BidDetails bidDetails) {
	105	double myUtil = bidDetails.getMyUndiscountedUtil();
	106	return myUtil < upper && myUtil >= lower;
	107	}
	108
	109	/**
[67]	110	* Make the opponent model select a bid that is in the provided target bin
	111	* @param targetBin index of the bin in which to pick a bid
	112	* @return BidDetails of the selected bid
	113	*/
	114	protected BidDetails pickBidInBin(int targetBin) {
	115
[153]	116	double lowerBound = targetBin * this.getBinSize();
	117	double upperBound = lowerBound + this.getBinSize();
[67]	118
[153]	119	// getBidsInRange behaves weirdly and returns bids that are outise of there range (false positives)
[67]	120	List<BidDetails> bidsInRange = this.negotiationSession.getOutcomeSpace().getBidsinRange(new Range(lowerBound, upperBound));
[153]	121	bidsInRange.removeIf( bid -> !this.isInBin(lowerBound, upperBound, bid) );
[67]	122
[153]	123	// If there are no bids possible within this bin, recursively choose another bin by the following logic:
	124	// if you conceded this round, concede further, etc.
	125	if (bidsInRange.isEmpty()) {
	126
	127	Random random = new Random();
	128	int newBin = 0;
	129	int direction = -1;
	130
	131	// Check if this is the opening action or not; if it is we just pick randomly
	132	if (this.actions.size() > 1) {
	133	direction = this.actions.get(this.actions.size() - 1);
	134	} else {
	135	newBin = random.nextInt(this.bins);
	136	}
	137
	138	// conceded last time
	139	if (direction == 0) {
	140	newBin = determineTargetBin(targetBin - 1);
	141	}
	142
	143	// retracted last time
	144	if (direction == 1) {
	145	newBin = determineTargetBin(targetBin + 1);
	146	}
	147
	148	// stayed last time
	149	if (direction == 2) {
	150	int randomUpOrDown = random.nextBoolean() ? 1 : -1;
	151	newBin = determineTargetBin(targetBin + randomUpOrDown);
	152	}
	153
	154	return this.pickBidInBin(newBin);
	155	}
	156
	157
[67]	158	return this.maxBidForOpponent(bidsInRange);
	159	}
	160
	161	/**
	162	* This is the general action function for the RL-agent. We determine a bin by either
	163	* moving up (retracting offer), doing nothing or moving down (conceding offer).
	164	* @param currentBin
	165	* @return
	166	*/
	167	protected int determineTargetBin(int currentBin) {
	168	int targetBin = currentBin;
	169	ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0));
	170
	171	List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
	172	int action = this.epsilonGreedy(qValues);
	173	this.actions.add(action);
	174
	175	// Apply action current bin (ie. move up, down or stay)
	176	switch (action) {
	177	case 0: targetBin--;
	178	break;
	179	case 1: targetBin++;
	180	break;
	181	case 2: break;
	182	}
	183
[153]	184	// Can't go outside of the range of relevant bins.
	185	targetBin = Math.min(targetBin, (int) this.minMaxBin.getUpperbound());
	186	targetBin = Math.max(targetBin, (int) this.minMaxBin.getLowerbound());
[67]	187
	188	return targetBin;
	189	}
	190
	191	protected int determineOpeningBin() {
	192	ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 0.0));
	193	List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
	194	int action = this.epsilonGreedy(qValues);
	195	this.actions.add(action);
	196
	197	return action;
	198	}
	199
	200	/**
	201	* @param list List of doubles
	202	* @return The index of the highest value in the list
	203	*/
	204	protected int indifferentArgMax(List<Double> list) {
	205	double maximum = Collections.max(list);
	206
	207	List<Integer> maximaIdxs = new ArrayList<Integer>();
	208
[153]	209	// collect indices of all occurrences of maximum
[67]	210	for (int i = 0; i < list.size(); i++) {
	211	if (list.get(i) == maximum) {
	212	maximaIdxs.add(i);
	213	}
	214	}
	215
	216	// pick a random index from the list (this is the indifferent part)
	217	Random rnd = new Random();
	218	int choice = rnd.nextInt(maximaIdxs.size());
	219
	220	return maximaIdxs.get(choice);
	221	}
	222
	223	protected int epsilonGreedy(List<Double> qValues) {
	224	int action;
	225
[153]	226	// With probability epsilon, pick a random action (epsilon greedy)
	227	if (Math.random() < this.eps && this.isTraining()) {
[67]	228	Random random = new Random();
	229	action = random.nextInt(qValues.size());
	230	}
	231	else {
	232	action = this.indifferentArgMax(qValues);
	233	}
	234
	235	return action;
	236	}
	237
	238	/**
	239	* @return The number of bins in which the each utility axis is divided
	240	*/
	241	int getNBins() {
	242	return this.bins;
	243	}
	244
	245	/**
	246	* @return The width of the bins in which the each utility axis is divided
	247	*/
	248	protected double getBinSize() {
	249	return 1.0 / this.getNBins();
	250	}
	251
	252	/**
	253	* Setter for the state property
	254	* @param state new {@link State}
	255	*
	256	*/
	257	protected void setState(State state) {
	258	this.state = state;
	259	}
	260
	261	/**
	262	* Getter for the state property
	263	* @return
	264	*/
[153]	265	protected AbstractState getState() {
[67]	266	return this.state;
	267	}
	268
	269	/**
	270	* Determine the bid with the highest expected utility for the opponent from a list of bids
	271	* @param bids
	272	* @return BidDetails with representing the maximum bid
	273	*/
	274	protected BidDetails maxBidForOpponent(List<BidDetails> bids) {
	275	BidDetails maxBid = null;
	276
	277	for (BidDetails bid : bids) {
	278	if (maxBid == null) {
	279	maxBid = bid;
	280	}
	281	else if (this.opponentModel.getBidEvaluation(bid.getBid()) > this.opponentModel.getBidEvaluation(maxBid.getBid())) {
	282	maxBid = bid;
	283	}
	284	}
	285
	286	return maxBid;
	287	}
	288
	289	/**
	290	* Gets called by Negotiator when a relevant negotiation event occurs
	291	* @param reward
	292	* @param newState
	293	*/
[153]	294	public void observeEnvironment(double reward, AbstractState newState) {
[67]	295
	296	// Only start updating after an action is performed
[153]	297	// Only update if training is enabled
	298	if (this.actions.size() > 0 && this.isTraining()) {
[67]	299	this.updateQFuction(this.state, this.getLastAction(), reward, newState);
	300	}
	301	this.state = newState;
	302	}
	303
	304	public HashMap<Integer, ArrayList<Double>> getQTable() {
	305	return this.qTable;
	306	}
	307
[153]	308	protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
[67]	309	// initialize state if it is new
	310
	311	// If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
	312	// just 3 values (up/down/nothing).
	313	ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 0.0));
	314	ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 0.0));
	315
	316	// Make entries in qTable if they don't exist yet
	317	this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
	318	this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
	319
[153]	320
	321	// To remind ourselves that the below function is correct =>
	322	// the gamma comes from the domain/preference profile through reward which is discounted.
	323	Double gamma = 1.0;
[67]	324	// Perform update
	325	Double Qnext = this.maxActionValue(newState);
[153]	326	Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + gamma * Qnext - this.qFunction(state, action));
[67]	327	this.qTable.get(state.hash()).set(action, newActionValue);
	328	}
	329
	330	/**
	331	* Determine max_a Q(s,a)
	332	* @param state The hash of the state for which to retrieve the max action value
	333	* @return Value of optimal action given that can be taken in the provided state (0 if state is unknown)
	334	*/
[153]	335	protected Double maxActionValue(AbstractState state) {
[67]	336	return Collections.max(this.qTable.get(state.hash()));
	337	}
	338
	339	/**
	340	* Get the Q value associated with the provided (state, action) pair.
	341	* @param state
	342	* @param action
	343	* @return
	344	*/
[153]	345	protected Double qFunction(AbstractState state, int action) {
[67]	346	ArrayList<Double> actionValues = this.qTable.get(state.hash());
	347	return actionValues.get(action);
	348	}
	349
	350	public void setHyperparameters(StrategyParameters properties) {
	351	this.eps = properties.getValueAsDouble("epsilon");
	352	this.alpha = properties.getValueAsDouble("alpha");
	353	this.bins = (int) properties.getValueAsDouble("bins");
[153]	354	this.mode = properties.getValueAsString("_mode");
	355	this.timeBins = (int) properties.getValueAsDouble("time_bins");
[67]	356	}
[153]	357
	358	protected boolean isTraining() {
	359	return this.mode.equals("train");
	360	}
[67]	361	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: src/main/java/agents/rlboa/QlearningStrategy.java@ 345

Download in other formats: