1 | package agents.rlboa;
|
---|
2 |
|
---|
3 | import java.io.FileInputStream;
|
---|
4 | import java.io.File;
|
---|
5 | import java.io.FileNotFoundException;
|
---|
6 | import java.io.FileOutputStream;
|
---|
7 | import java.io.ObjectInputStream;
|
---|
8 | import java.io.ObjectOutputStream;
|
---|
9 | import java.util.ArrayList;
|
---|
10 | import java.util.HashMap;
|
---|
11 |
|
---|
12 | import genius.core.Bid;
|
---|
13 | import genius.core.StrategyParameters;
|
---|
14 | import genius.core.actions.Action;
|
---|
15 | import genius.core.actions.EndNegotiation;
|
---|
16 | import genius.core.events.MultipartyNegoActionEvent;
|
---|
17 | import genius.core.misc.Range;
|
---|
18 | import negotiator.boaframework.acceptanceconditions.other.AC_Next;
|
---|
19 | import negotiator.boaframework.omstrategy.BestBid;
|
---|
20 | import negotiator.boaframework.opponentmodel.AgentXFrequencyModel;
|
---|
21 | import negotiator.boaframework.opponentmodel.PerfectModel;
|
---|
22 |
|
---|
23 | @SuppressWarnings("serial")
|
---|
24 | public class Qlearner extends RLBOAagentBilateral {
|
---|
25 |
|
---|
26 | QlearningStrategy offeringStrategy;
|
---|
27 |
|
---|
28 | @SuppressWarnings("unchecked")
|
---|
29 | @Override
|
---|
30 | public void agentSetup() {
|
---|
31 |
|
---|
32 | HashMap<String, Double> params = new HashMap<String, Double>();
|
---|
33 |
|
---|
34 | // Initialize opponent model
|
---|
35 | switch (this.getStrategyParameters().getValueAsString("opponentModel")) {
|
---|
36 | case "PerfectModel":
|
---|
37 | opponentModel = new PerfectModel();
|
---|
38 | break;
|
---|
39 | case "FrequencyModel":
|
---|
40 | opponentModel = new AgentXFrequencyModel();
|
---|
41 | break;
|
---|
42 | default:
|
---|
43 | break;
|
---|
44 | }
|
---|
45 | opponentModel.init(negotiationSession, params);
|
---|
46 |
|
---|
47 | // Initialize offeringStrategy (is a RL-component)
|
---|
48 | switch (this.getStrategyParameters().getValueAsString("strategy")) {
|
---|
49 | case "QlearningStrategy":
|
---|
50 | offeringStrategy = new QlearningStrategy(negotiationSession, opponentModel);
|
---|
51 | break;
|
---|
52 | case "PriorBeliefQlearningStrategy":
|
---|
53 | offeringStrategy = new PriorBeliefQlearningStrategy(negotiationSession, opponentModel);
|
---|
54 | break;
|
---|
55 | case "QLambdaStrategy":
|
---|
56 | offeringStrategy = new QLambdaStrategy(negotiationSession, opponentModel);
|
---|
57 | break;
|
---|
58 | }
|
---|
59 | offeringStrategy.setHyperparameters(this.getStrategyParameters());
|
---|
60 |
|
---|
61 | // Initialize q-table
|
---|
62 | String pathToQtable = this.strategyParameters.getValueAsString("_path_to_qtable");
|
---|
63 | String filepath = this.parsePathToQtable(pathToQtable);
|
---|
64 | HashMap<Integer, ArrayList<Double>> qTable = (HashMap<Integer, ArrayList<Double>>) this.readObjectFromFile(filepath);
|
---|
65 | offeringStrategy.initQtable(qTable);
|
---|
66 |
|
---|
67 | // Accept if the incoming offer is higher than what you would offer yourself
|
---|
68 | acceptConditions = new AC_Next(negotiationSession, offeringStrategy, 1, 0);
|
---|
69 |
|
---|
70 | // Opponent model strategy always selects best bid it has available
|
---|
71 | omStrategy = new BestBid();
|
---|
72 | omStrategy.init(negotiationSession, opponentModel, params);
|
---|
73 | setDecoupledComponents(acceptConditions, offeringStrategy, opponentModel, omStrategy);
|
---|
74 |
|
---|
75 | // Get reservation value and max bid to determine relevant range of bins
|
---|
76 | double minUtility;
|
---|
77 | double maxUtility;
|
---|
78 |
|
---|
79 | try {
|
---|
80 | Bid maxUtilBid = this.utilitySpace.getMaxUtilityBid();
|
---|
81 | Bid minUtilBid = this.utilitySpace.getMinUtilityBid();
|
---|
82 | maxUtility = this.utilitySpace.getUtility(maxUtilBid);
|
---|
83 | minUtility = this.utilitySpace.getUtility(minUtilBid);
|
---|
84 | minUtility = Math.max(minUtility, this.utilitySpace.getReservationValueUndiscounted());
|
---|
85 | } catch (Exception e) {
|
---|
86 | // exception is thrown by getMaxUtilityBid if there are no bids in the outcomespace
|
---|
87 | // but I guess that's pretty rare. Default to 0.0 - 1.0 to prevent crashes.
|
---|
88 | maxUtility = 1.0;
|
---|
89 | minUtility = 0.0;
|
---|
90 | }
|
---|
91 |
|
---|
92 | int minBin = this.getBinIndex(minUtility);
|
---|
93 | int maxBin = this.getBinIndex(maxUtility);
|
---|
94 |
|
---|
95 | offeringStrategy.setMinMaxBin(new Range(minBin, maxBin));
|
---|
96 | }
|
---|
97 |
|
---|
98 | @Override
|
---|
99 | public String getName() {
|
---|
100 | return "Q-learner";
|
---|
101 | }
|
---|
102 |
|
---|
103 | /**
|
---|
104 | * @param negoEvent
|
---|
105 | * @return AbstractState object representing the current state of the agent
|
---|
106 | * given the negoEvent
|
---|
107 | */
|
---|
108 | public AbstractState getStateRepresentation(MultipartyNegoActionEvent negoEvent) {
|
---|
109 | Bid oppLastBid = negotiationSession.getOpponentBidHistory().getLastBid();
|
---|
110 | Bid myLastBid = negotiationSession.getOwnBidHistory().getLastBid();
|
---|
111 | Bid agreement = negoEvent.getAgreement();
|
---|
112 | Action currentAction = negoEvent.getAction();
|
---|
113 |
|
---|
114 | if (agreement != null || currentAction.getClass() == EndNegotiation.class || negoEvent.getTime() == 1.0) {
|
---|
115 | return State.TERMINAL;
|
---|
116 | }
|
---|
117 |
|
---|
118 | int myBin = this.getBinIndex(myLastBid);
|
---|
119 | int oppBin = this.getBinIndex(oppLastBid);
|
---|
120 |
|
---|
121 | double time = negotiationSession.getTime();
|
---|
122 |
|
---|
123 | State state = new State(myBin, oppBin, this.getTimeBinIndex(time));
|
---|
124 |
|
---|
125 | return state;
|
---|
126 | }
|
---|
127 |
|
---|
128 | /**
|
---|
129 | * Checks if a valid bid is passed and calculates the bin in which it would fall
|
---|
130 | * otherwise return extreme number to indicate that the bid doesn't exist yet.
|
---|
131 | *
|
---|
132 | * @param bid
|
---|
133 | * @return
|
---|
134 | */
|
---|
135 | protected int getBinIndex(Bid bid) {
|
---|
136 | int bin;
|
---|
137 | if (bid != null) {
|
---|
138 | double bidUtil = this.getUtility(bid);
|
---|
139 | bin = this.getBinIndex(bidUtil);
|
---|
140 | } else {
|
---|
141 | bin = Integer.MIN_VALUE;
|
---|
142 | }
|
---|
143 |
|
---|
144 | return bin;
|
---|
145 | }
|
---|
146 |
|
---|
147 | /**
|
---|
148 | * Helper function that calculates the bin index based
|
---|
149 | * on a specified utility. Is called by the similarly
|
---|
150 | * named function that takes a bid as argument.
|
---|
151 | *
|
---|
152 | * @param util
|
---|
153 | * @return
|
---|
154 | */
|
---|
155 | private int getBinIndex(double util) {
|
---|
156 | util = Math.min(0.999, util); // ensures maximum bid is in bin (N_BINS - 1)
|
---|
157 | int n_bins = offeringStrategy.getNBins();
|
---|
158 | return (int) Math.floor(util * n_bins);
|
---|
159 | }
|
---|
160 |
|
---|
161 | /**
|
---|
162 | *
|
---|
163 | * @param time
|
---|
164 | * @return bin index that represents the current time in the state. Is binned
|
---|
165 | * based on a strategy parameter
|
---|
166 | */
|
---|
167 | protected int getTimeBinIndex(double time) {
|
---|
168 | return (int) Math.floor(time * this.getStrategyParameters().getValueAsDouble("time_bins"));
|
---|
169 | }
|
---|
170 |
|
---|
171 | @Override
|
---|
172 | public double getReward(Bid agreement) {
|
---|
173 | double reward = 0.0;
|
---|
174 |
|
---|
175 | if (agreement != null) {
|
---|
176 | reward = this.getUtility(agreement);
|
---|
177 | }
|
---|
178 |
|
---|
179 | return reward;
|
---|
180 | }
|
---|
181 |
|
---|
182 | @Override
|
---|
183 | public void observeEnvironment(double reward, AbstractState newState) {
|
---|
184 | this.offeringStrategy.observeEnvironment(reward, newState);
|
---|
185 |
|
---|
186 | if (newState.isTerminalState()) {
|
---|
187 | this.writeObjectToFile(this.offeringStrategy.getQTable());
|
---|
188 | }
|
---|
189 | }
|
---|
190 |
|
---|
191 | public void writeObjectToFile(Object serObj) {
|
---|
192 |
|
---|
193 | String pathToQtable = this.strategyParameters.getValueAsString("_path_to_qtable");
|
---|
194 | String filepath = parsePathToQtable(pathToQtable);
|
---|
195 |
|
---|
196 | File outputFile = new File(filepath);
|
---|
197 | outputFile.getParentFile().mkdirs();
|
---|
198 | try {
|
---|
199 | FileOutputStream fileOut = new FileOutputStream(filepath);
|
---|
200 | ObjectOutputStream objectOut = new ObjectOutputStream(fileOut);
|
---|
201 | objectOut.writeObject(serObj);
|
---|
202 | objectOut.close();
|
---|
203 | System.out.println("The Q-table was succesfully written to a file");
|
---|
204 |
|
---|
205 | } catch (Exception ex) {
|
---|
206 | ex.printStackTrace();
|
---|
207 | }
|
---|
208 | }
|
---|
209 |
|
---|
210 | public Object readObjectFromFile(String filepath) {
|
---|
211 | Object obj = null;
|
---|
212 |
|
---|
213 | try {
|
---|
214 | FileInputStream fileIn = new FileInputStream(filepath);
|
---|
215 | ObjectInputStream objectIn = new ObjectInputStream(fileIn);
|
---|
216 | obj = objectIn.readObject();
|
---|
217 | objectIn.close();
|
---|
218 | System.out.println("Succesfully read object");
|
---|
219 | } catch (Exception ex) {
|
---|
220 | if (ex instanceof FileNotFoundException) {
|
---|
221 | System.out.println("qTable file does not exist. A new file will be created.");
|
---|
222 | } else {
|
---|
223 | ex.printStackTrace();
|
---|
224 | }
|
---|
225 | }
|
---|
226 |
|
---|
227 | return obj;
|
---|
228 | }
|
---|
229 |
|
---|
230 | private String parsePathToQtable(String rawPath) {
|
---|
231 | String filepath;
|
---|
232 | File check = new File(rawPath);
|
---|
233 | if (check.isFile() || rawPath.endsWith(".table")) {
|
---|
234 | filepath = rawPath;
|
---|
235 | } else {
|
---|
236 | filepath = rawPath + this.instanceIdentifier();
|
---|
237 | }
|
---|
238 | return filepath;
|
---|
239 | }
|
---|
240 |
|
---|
241 | public String instanceIdentifier() {
|
---|
242 | String domainName = this.negotiationSession.getDomain().getName().replace(".xml", "").replace("etc/templates", "").replace("/", "_");
|
---|
243 | return String.format("%s-%s-%s-%s", this.getName(), domainName,
|
---|
244 | this.utilitySpace.getFileName().replace('/', '_').replace(domainName, ""),
|
---|
245 | this.filterStrategyParameters(this.getStrategyParameters()).replace(';', '-').replace('=', '@')); // because ; is csv // delimiter);
|
---|
246 | }
|
---|
247 |
|
---|
248 | public String filterStrategyParameters(StrategyParameters parameters) {
|
---|
249 | String fullString = parameters.toString();
|
---|
250 | String filteredString = "";
|
---|
251 |
|
---|
252 | // filter out pairs that look like this: _key=value
|
---|
253 | // these are not part of the identifier. Convention adapted from
|
---|
254 | // pythonic 'private variable' indication (self._privatevar)
|
---|
255 | for (String pair : fullString.split(";")) {
|
---|
256 | if (!pair.startsWith("_") && pair.contains("=")) {
|
---|
257 | filteredString = filteredString + ";" + pair;
|
---|
258 | }
|
---|
259 | }
|
---|
260 |
|
---|
261 | return filteredString;
|
---|
262 | }
|
---|
263 | }
|
---|