Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

InvertedQlearningStrategy.java@ 346

Last change on this file since 346 was 153, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

This commit finalized the RLBOA project and it is now ready for use

Our own package (uva.project.:

Moved to agents.rlboa
Added opponents and strategies that are mentioned in the report
Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
States extend from AbstractState

File size: 3.1 KB

Rev	Line
[153]	1	package agents.rlboa;
[67]	2
	3	import genius.core.boaframework.NegotiationSession;
	4	import genius.core.boaframework.OpponentModel;
	5
	6	import java.util.ArrayList;
	7	import java.util.Collections;
	8	import java.util.HashMap;
	9	import java.util.List;
	10
	11	public class InvertedQlearningStrategy extends QlearningStrategy {
	12
	13	public InvertedQlearningStrategy(NegotiationSession negotiationSession, OpponentModel opponentModel) {
	14	super(negotiationSession, opponentModel);
	15	}
	16
	17	@Override
	18	protected void initQTable() {
	19	this.qTable = new HashMap<Integer, ArrayList<Double>>();
	20
	21	// Initial state has different action space
	22	this.qTable.putIfAbsent(this.state.hash(), new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0)));
	23	}
	24
	25	@Override
	26	public String getName() {
	27	return "Inverted Q-offering";
	28	}
	29
	30	/**
	31	* This is the general action function for the RL-agent. We determine a bin by either
	32	* moving up (retracting offer), doing nothing or moving down (conceding offer).
	33	* @param currentBin
	34	* @return
	35	*/
	36	@Override
	37	protected int determineTargetBin(int currentBin) {
	38	int targetBin = currentBin;
	39	ArrayList<Double> defaultActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
	40
	41	List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultActionValues);
	42	int action = this.epsilonGreedy(qValues);
	43	this.actions.add(action);
	44
	45	// Apply action current bin (ie. move up, down or stay)
	46	switch (action) {
	47	case 0: targetBin--;
	48	break;
	49	case 1: targetBin++;
	50	break;
	51	case 2: break;
	52	}
	53
	54	// Can't go out of bounds
	55	// TODO: Discuss impact on learning algorithm
	56	targetBin = Math.min(targetBin, this.getNBins() - 1);
	57	targetBin = Math.max(targetBin, 0);
	58
	59	return targetBin;
	60	}
	61
	62	@Override
	63	protected int determineOpeningBin() {
	64	ArrayList<Double> defaultInitialActionValues = new ArrayList<Double>(Collections.nCopies(this.state.getActionSize(), 1.0));
	65	List<Double> qValues = this.qTable.getOrDefault(this.state.hash(), defaultInitialActionValues);
	66	int action = this.epsilonGreedy(qValues);
	67	this.actions.add(action);
	68
	69	return action;
	70	}
	71
	72	@Override
[153]	73	protected void updateQFuction(AbstractState state, int action, double reward, AbstractState newState) {
[67]	74	// initialize state if it is new
	75
	76	// If agent hasn't done a opening bid, initialize action values to number of bins, otherwise
	77	// just 3 values (up/down/nothing).
	78	ArrayList<Double> stateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(state.getActionSize(), 1.0));
	79	ArrayList<Double> newStateDefaultActionValues = new ArrayList<Double>(Collections.nCopies(newState.getActionSize(), 1.0));
	80
	81	// Make entries in qTable if they don't exist yet
	82	this.qTable.putIfAbsent(state.hash(), stateDefaultActionValues);
	83	this.qTable.putIfAbsent(newState.hash(), newStateDefaultActionValues);
	84
	85	// Perform update
	86	Double Qnext = this.maxActionValue(newState);
	87	Double newActionValue = this.qFunction(state, action) + this.alpha * (reward + this.gamma * Qnext - this.qFunction(state, action));
	88	this.qTable.get(state.hash()).set(action, newActionValue);
	89	}
	90	}
	91

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: src/main/java/agents/rlboa/InvertedQlearningStrategy.java@ 346

Download in other formats: