source: src/main/java/agents/rlboa/Qlearner.java

Last change on this file was 153, checked in by Aron Hammond, 6 years ago

Added function to calculate opposition to MultiLateralAnalysis.java

Moved code to add RLBOA listeners to RLBOAUtils is misc package

!! close SessionInfo after tournament; this caused /tmp/ to fill up with GeniusData files

This commit finalized the RLBOA project and it is now ready for use

Our own package (uva.project.:

  • Moved to agents.rlboa
  • Added opponents and strategies that are mentioned in the report
  • Change class hierarchy, agents can now extend from RLBOAagentBilateral to inherit RL functionality.
  • States extend from AbstractState
File size: 8.1 KB
RevLine 
[153]1package agents.rlboa;
[67]2
3import java.io.FileInputStream;
4import java.io.File;
5import java.io.FileNotFoundException;
6import java.io.FileOutputStream;
7import java.io.ObjectInputStream;
8import java.io.ObjectOutputStream;
9import java.util.ArrayList;
10import java.util.HashMap;
11
12import genius.core.Bid;
[153]13import genius.core.StrategyParameters;
[67]14import genius.core.actions.Action;
15import genius.core.actions.EndNegotiation;
16import genius.core.events.MultipartyNegoActionEvent;
[153]17import genius.core.misc.Range;
[67]18import negotiator.boaframework.acceptanceconditions.other.AC_Next;
19import negotiator.boaframework.omstrategy.BestBid;
[153]20import negotiator.boaframework.opponentmodel.AgentXFrequencyModel;
[67]21import negotiator.boaframework.opponentmodel.PerfectModel;
[127]22
[67]23@SuppressWarnings("serial")
[153]24public class Qlearner extends RLBOAagentBilateral {
[67]25
26 QlearningStrategy offeringStrategy;
27
[153]28 @SuppressWarnings("unchecked")
[67]29 @Override
30 public void agentSetup() {
[153]31
[67]32 HashMap<String, Double> params = new HashMap<String, Double>();
33
[153]34 // Initialize opponent model
35 switch (this.getStrategyParameters().getValueAsString("opponentModel")) {
36 case "PerfectModel":
37 opponentModel = new PerfectModel();
38 break;
39 case "FrequencyModel":
40 opponentModel = new AgentXFrequencyModel();
41 break;
42 default:
43 break;
44 }
[67]45 opponentModel.init(negotiationSession, params);
46
47 // Initialize offeringStrategy (is a RL-component)
48 switch (this.getStrategyParameters().getValueAsString("strategy")) {
[153]49 case "QlearningStrategy":
50 offeringStrategy = new QlearningStrategy(negotiationSession, opponentModel);
51 break;
52 case "PriorBeliefQlearningStrategy":
53 offeringStrategy = new PriorBeliefQlearningStrategy(negotiationSession, opponentModel);
54 break;
55 case "QLambdaStrategy":
56 offeringStrategy = new QLambdaStrategy(negotiationSession, opponentModel);
57 break;
[67]58 }
59 offeringStrategy.setHyperparameters(this.getStrategyParameters());
[153]60
61 // Initialize q-table
62 String pathToQtable = this.strategyParameters.getValueAsString("_path_to_qtable");
63 String filepath = this.parsePathToQtable(pathToQtable);
64 HashMap<Integer, ArrayList<Double>> qTable = (HashMap<Integer, ArrayList<Double>>) this.readObjectFromFile(filepath);
[67]65 offeringStrategy.initQtable(qTable);
66
67 // Accept if the incoming offer is higher than what you would offer yourself
68 acceptConditions = new AC_Next(negotiationSession, offeringStrategy, 1, 0);
69
70 // Opponent model strategy always selects best bid it has available
71 omStrategy = new BestBid();
72 omStrategy.init(negotiationSession, opponentModel, params);
73 setDecoupledComponents(acceptConditions, offeringStrategy, opponentModel, omStrategy);
[153]74
75 // Get reservation value and max bid to determine relevant range of bins
76 double minUtility;
77 double maxUtility;
78
79 try {
80 Bid maxUtilBid = this.utilitySpace.getMaxUtilityBid();
81 Bid minUtilBid = this.utilitySpace.getMinUtilityBid();
82 maxUtility = this.utilitySpace.getUtility(maxUtilBid);
83 minUtility = this.utilitySpace.getUtility(minUtilBid);
84 minUtility = Math.max(minUtility, this.utilitySpace.getReservationValueUndiscounted());
85 } catch (Exception e) {
86 // exception is thrown by getMaxUtilityBid if there are no bids in the outcomespace
87 // but I guess that's pretty rare. Default to 0.0 - 1.0 to prevent crashes.
88 maxUtility = 1.0;
89 minUtility = 0.0;
90 }
91
92 int minBin = this.getBinIndex(minUtility);
93 int maxBin = this.getBinIndex(maxUtility);
94
95 offeringStrategy.setMinMaxBin(new Range(minBin, maxBin));
[67]96 }
97
98 @Override
99 public String getName() {
100 return "Q-learner";
101 }
102
[153]103 /**
104 * @param negoEvent
105 * @return AbstractState object representing the current state of the agent
106 * given the negoEvent
107 */
108 public AbstractState getStateRepresentation(MultipartyNegoActionEvent negoEvent) {
[67]109 Bid oppLastBid = negotiationSession.getOpponentBidHistory().getLastBid();
110 Bid myLastBid = negotiationSession.getOwnBidHistory().getLastBid();
111 Bid agreement = negoEvent.getAgreement();
112 Action currentAction = negoEvent.getAction();
113
[153]114 if (agreement != null || currentAction.getClass() == EndNegotiation.class || negoEvent.getTime() == 1.0) {
[67]115 return State.TERMINAL;
116 }
117
[153]118 int myBin = this.getBinIndex(myLastBid);
119 int oppBin = this.getBinIndex(oppLastBid);
[67]120
121 double time = negotiationSession.getTime();
122
123 State state = new State(myBin, oppBin, this.getTimeBinIndex(time));
124
125 return state;
126 }
127
[153]128 /**
129 * Checks if a valid bid is passed and calculates the bin in which it would fall
130 * otherwise return extreme number to indicate that the bid doesn't exist yet.
131 *
132 * @param bid
133 * @return
134 */
135 protected int getBinIndex(Bid bid) {
136 int bin;
137 if (bid != null) {
138 double bidUtil = this.getUtility(bid);
139 bin = this.getBinIndex(bidUtil);
140 } else {
141 bin = Integer.MIN_VALUE;
142 }
143
144 return bin;
145 }
146
147 /**
148 * Helper function that calculates the bin index based
149 * on a specified utility. Is called by the similarly
150 * named function that takes a bid as argument.
151 *
152 * @param util
153 * @return
154 */
[67]155 private int getBinIndex(double util) {
[153]156 util = Math.min(0.999, util); // ensures maximum bid is in bin (N_BINS - 1)
[67]157 int n_bins = offeringStrategy.getNBins();
158 return (int) Math.floor(util * n_bins);
159 }
160
[153]161 /**
162 *
163 * @param time
164 * @return bin index that represents the current time in the state. Is binned
165 * based on a strategy parameter
166 */
167 protected int getTimeBinIndex(double time) {
168 return (int) Math.floor(time * this.getStrategyParameters().getValueAsDouble("time_bins"));
[67]169 }
170
171 @Override
[153]172 public double getReward(Bid agreement) {
[67]173 double reward = 0.0;
[153]174
[67]175 if (agreement != null) {
[153]176 reward = this.getUtility(agreement);
[67]177 }
178
179 return reward;
180 }
181
182 @Override
[153]183 public void observeEnvironment(double reward, AbstractState newState) {
[67]184 this.offeringStrategy.observeEnvironment(reward, newState);
185
186 if (newState.isTerminalState()) {
187 this.writeObjectToFile(this.offeringStrategy.getQTable());
188 }
189 }
190
191 public void writeObjectToFile(Object serObj) {
192
[153]193 String pathToQtable = this.strategyParameters.getValueAsString("_path_to_qtable");
194 String filepath = parsePathToQtable(pathToQtable);
195
196 File outputFile = new File(filepath);
197 outputFile.getParentFile().mkdirs();
[67]198 try {
199 FileOutputStream fileOut = new FileOutputStream(filepath);
200 ObjectOutputStream objectOut = new ObjectOutputStream(fileOut);
201 objectOut.writeObject(serObj);
202 objectOut.close();
[153]203 System.out.println("The Q-table was succesfully written to a file");
[67]204
205 } catch (Exception ex) {
206 ex.printStackTrace();
207 }
208 }
209
210 public Object readObjectFromFile(String filepath) {
211 Object obj = null;
212
213 try {
214 FileInputStream fileIn = new FileInputStream(filepath);
215 ObjectInputStream objectIn = new ObjectInputStream(fileIn);
216 obj = objectIn.readObject();
217 objectIn.close();
218 System.out.println("Succesfully read object");
[153]219 } catch (Exception ex) {
220 if (ex instanceof FileNotFoundException) {
[67]221 System.out.println("qTable file does not exist. A new file will be created.");
[153]222 } else {
[67]223 ex.printStackTrace();
224 }
225 }
226
227 return obj;
228 }
[153]229
230 private String parsePathToQtable(String rawPath) {
231 String filepath;
232 File check = new File(rawPath);
233 if (check.isFile() || rawPath.endsWith(".table")) {
234 filepath = rawPath;
235 } else {
236 filepath = rawPath + this.instanceIdentifier();
237 }
238 return filepath;
239 }
[67]240
241 public String instanceIdentifier() {
[153]242 String domainName = this.negotiationSession.getDomain().getName().replace(".xml", "").replace("etc/templates", "").replace("/", "_");
243 return String.format("%s-%s-%s-%s", this.getName(), domainName,
244 this.utilitySpace.getFileName().replace('/', '_').replace(domainName, ""),
245 this.filterStrategyParameters(this.getStrategyParameters()).replace(';', '-').replace('=', '@')); // because ; is csv // delimiter);
[67]246 }
[153]247
248 public String filterStrategyParameters(StrategyParameters parameters) {
249 String fullString = parameters.toString();
250 String filteredString = "";
251
252 // filter out pairs that look like this: _key=value
253 // these are not part of the identifier. Convention adapted from
254 // pythonic 'private variable' indication (self._privatevar)
255 for (String pair : fullString.split(";")) {
256 if (!pair.startsWith("_") && pair.contains("=")) {
257 filteredString = filteredString + ";" + pair;
258 }
259 }
260
261 return filteredString;
262 }
[67]263}
Note: See TracBrowser for help on using the repository browser.