[153] | 1 | package agents.rlboa;
|
---|
[67] | 2 |
|
---|
| 3 | import java.io.FileInputStream;
|
---|
| 4 | import java.io.File;
|
---|
| 5 | import java.io.FileNotFoundException;
|
---|
| 6 | import java.io.FileOutputStream;
|
---|
| 7 | import java.io.ObjectInputStream;
|
---|
| 8 | import java.io.ObjectOutputStream;
|
---|
| 9 | import java.util.ArrayList;
|
---|
| 10 | import java.util.HashMap;
|
---|
| 11 |
|
---|
| 12 | import genius.core.Bid;
|
---|
[153] | 13 | import genius.core.StrategyParameters;
|
---|
[67] | 14 | import genius.core.actions.Action;
|
---|
| 15 | import genius.core.actions.EndNegotiation;
|
---|
| 16 | import genius.core.events.MultipartyNegoActionEvent;
|
---|
[153] | 17 | import genius.core.misc.Range;
|
---|
[67] | 18 | import negotiator.boaframework.acceptanceconditions.other.AC_Next;
|
---|
| 19 | import negotiator.boaframework.omstrategy.BestBid;
|
---|
[153] | 20 | import negotiator.boaframework.opponentmodel.AgentXFrequencyModel;
|
---|
[67] | 21 | import negotiator.boaframework.opponentmodel.PerfectModel;
|
---|
[127] | 22 |
|
---|
[67] | 23 | @SuppressWarnings("serial")
|
---|
[153] | 24 | public class Qlearner extends RLBOAagentBilateral {
|
---|
[67] | 25 |
|
---|
| 26 | QlearningStrategy offeringStrategy;
|
---|
| 27 |
|
---|
[153] | 28 | @SuppressWarnings("unchecked")
|
---|
[67] | 29 | @Override
|
---|
| 30 | public void agentSetup() {
|
---|
[153] | 31 |
|
---|
[67] | 32 | HashMap<String, Double> params = new HashMap<String, Double>();
|
---|
| 33 |
|
---|
[153] | 34 | // Initialize opponent model
|
---|
| 35 | switch (this.getStrategyParameters().getValueAsString("opponentModel")) {
|
---|
| 36 | case "PerfectModel":
|
---|
| 37 | opponentModel = new PerfectModel();
|
---|
| 38 | break;
|
---|
| 39 | case "FrequencyModel":
|
---|
| 40 | opponentModel = new AgentXFrequencyModel();
|
---|
| 41 | break;
|
---|
| 42 | default:
|
---|
| 43 | break;
|
---|
| 44 | }
|
---|
[67] | 45 | opponentModel.init(negotiationSession, params);
|
---|
| 46 |
|
---|
| 47 | // Initialize offeringStrategy (is a RL-component)
|
---|
| 48 | switch (this.getStrategyParameters().getValueAsString("strategy")) {
|
---|
[153] | 49 | case "QlearningStrategy":
|
---|
| 50 | offeringStrategy = new QlearningStrategy(negotiationSession, opponentModel);
|
---|
| 51 | break;
|
---|
| 52 | case "PriorBeliefQlearningStrategy":
|
---|
| 53 | offeringStrategy = new PriorBeliefQlearningStrategy(negotiationSession, opponentModel);
|
---|
| 54 | break;
|
---|
| 55 | case "QLambdaStrategy":
|
---|
| 56 | offeringStrategy = new QLambdaStrategy(negotiationSession, opponentModel);
|
---|
| 57 | break;
|
---|
[67] | 58 | }
|
---|
| 59 | offeringStrategy.setHyperparameters(this.getStrategyParameters());
|
---|
[153] | 60 |
|
---|
| 61 | // Initialize q-table
|
---|
| 62 | String pathToQtable = this.strategyParameters.getValueAsString("_path_to_qtable");
|
---|
| 63 | String filepath = this.parsePathToQtable(pathToQtable);
|
---|
| 64 | HashMap<Integer, ArrayList<Double>> qTable = (HashMap<Integer, ArrayList<Double>>) this.readObjectFromFile(filepath);
|
---|
[67] | 65 | offeringStrategy.initQtable(qTable);
|
---|
| 66 |
|
---|
| 67 | // Accept if the incoming offer is higher than what you would offer yourself
|
---|
| 68 | acceptConditions = new AC_Next(negotiationSession, offeringStrategy, 1, 0);
|
---|
| 69 |
|
---|
| 70 | // Opponent model strategy always selects best bid it has available
|
---|
| 71 | omStrategy = new BestBid();
|
---|
| 72 | omStrategy.init(negotiationSession, opponentModel, params);
|
---|
| 73 | setDecoupledComponents(acceptConditions, offeringStrategy, opponentModel, omStrategy);
|
---|
[153] | 74 |
|
---|
| 75 | // Get reservation value and max bid to determine relevant range of bins
|
---|
| 76 | double minUtility;
|
---|
| 77 | double maxUtility;
|
---|
| 78 |
|
---|
| 79 | try {
|
---|
| 80 | Bid maxUtilBid = this.utilitySpace.getMaxUtilityBid();
|
---|
| 81 | Bid minUtilBid = this.utilitySpace.getMinUtilityBid();
|
---|
| 82 | maxUtility = this.utilitySpace.getUtility(maxUtilBid);
|
---|
| 83 | minUtility = this.utilitySpace.getUtility(minUtilBid);
|
---|
| 84 | minUtility = Math.max(minUtility, this.utilitySpace.getReservationValueUndiscounted());
|
---|
| 85 | } catch (Exception e) {
|
---|
| 86 | // exception is thrown by getMaxUtilityBid if there are no bids in the outcomespace
|
---|
| 87 | // but I guess that's pretty rare. Default to 0.0 - 1.0 to prevent crashes.
|
---|
| 88 | maxUtility = 1.0;
|
---|
| 89 | minUtility = 0.0;
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | int minBin = this.getBinIndex(minUtility);
|
---|
| 93 | int maxBin = this.getBinIndex(maxUtility);
|
---|
| 94 |
|
---|
| 95 | offeringStrategy.setMinMaxBin(new Range(minBin, maxBin));
|
---|
[67] | 96 | }
|
---|
| 97 |
|
---|
| 98 | @Override
|
---|
| 99 | public String getName() {
|
---|
| 100 | return "Q-learner";
|
---|
| 101 | }
|
---|
| 102 |
|
---|
[153] | 103 | /**
|
---|
| 104 | * @param negoEvent
|
---|
| 105 | * @return AbstractState object representing the current state of the agent
|
---|
| 106 | * given the negoEvent
|
---|
| 107 | */
|
---|
| 108 | public AbstractState getStateRepresentation(MultipartyNegoActionEvent negoEvent) {
|
---|
[67] | 109 | Bid oppLastBid = negotiationSession.getOpponentBidHistory().getLastBid();
|
---|
| 110 | Bid myLastBid = negotiationSession.getOwnBidHistory().getLastBid();
|
---|
| 111 | Bid agreement = negoEvent.getAgreement();
|
---|
| 112 | Action currentAction = negoEvent.getAction();
|
---|
| 113 |
|
---|
[153] | 114 | if (agreement != null || currentAction.getClass() == EndNegotiation.class || negoEvent.getTime() == 1.0) {
|
---|
[67] | 115 | return State.TERMINAL;
|
---|
| 116 | }
|
---|
| 117 |
|
---|
[153] | 118 | int myBin = this.getBinIndex(myLastBid);
|
---|
| 119 | int oppBin = this.getBinIndex(oppLastBid);
|
---|
[67] | 120 |
|
---|
| 121 | double time = negotiationSession.getTime();
|
---|
| 122 |
|
---|
| 123 | State state = new State(myBin, oppBin, this.getTimeBinIndex(time));
|
---|
| 124 |
|
---|
| 125 | return state;
|
---|
| 126 | }
|
---|
| 127 |
|
---|
[153] | 128 | /**
|
---|
| 129 | * Checks if a valid bid is passed and calculates the bin in which it would fall
|
---|
| 130 | * otherwise return extreme number to indicate that the bid doesn't exist yet.
|
---|
| 131 | *
|
---|
| 132 | * @param bid
|
---|
| 133 | * @return
|
---|
| 134 | */
|
---|
| 135 | protected int getBinIndex(Bid bid) {
|
---|
| 136 | int bin;
|
---|
| 137 | if (bid != null) {
|
---|
| 138 | double bidUtil = this.getUtility(bid);
|
---|
| 139 | bin = this.getBinIndex(bidUtil);
|
---|
| 140 | } else {
|
---|
| 141 | bin = Integer.MIN_VALUE;
|
---|
| 142 | }
|
---|
| 143 |
|
---|
| 144 | return bin;
|
---|
| 145 | }
|
---|
| 146 |
|
---|
| 147 | /**
|
---|
| 148 | * Helper function that calculates the bin index based
|
---|
| 149 | * on a specified utility. Is called by the similarly
|
---|
| 150 | * named function that takes a bid as argument.
|
---|
| 151 | *
|
---|
| 152 | * @param util
|
---|
| 153 | * @return
|
---|
| 154 | */
|
---|
[67] | 155 | private int getBinIndex(double util) {
|
---|
[153] | 156 | util = Math.min(0.999, util); // ensures maximum bid is in bin (N_BINS - 1)
|
---|
[67] | 157 | int n_bins = offeringStrategy.getNBins();
|
---|
| 158 | return (int) Math.floor(util * n_bins);
|
---|
| 159 | }
|
---|
| 160 |
|
---|
[153] | 161 | /**
|
---|
| 162 | *
|
---|
| 163 | * @param time
|
---|
| 164 | * @return bin index that represents the current time in the state. Is binned
|
---|
| 165 | * based on a strategy parameter
|
---|
| 166 | */
|
---|
| 167 | protected int getTimeBinIndex(double time) {
|
---|
| 168 | return (int) Math.floor(time * this.getStrategyParameters().getValueAsDouble("time_bins"));
|
---|
[67] | 169 | }
|
---|
| 170 |
|
---|
| 171 | @Override
|
---|
[153] | 172 | public double getReward(Bid agreement) {
|
---|
[67] | 173 | double reward = 0.0;
|
---|
[153] | 174 |
|
---|
[67] | 175 | if (agreement != null) {
|
---|
[153] | 176 | reward = this.getUtility(agreement);
|
---|
[67] | 177 | }
|
---|
| 178 |
|
---|
| 179 | return reward;
|
---|
| 180 | }
|
---|
| 181 |
|
---|
| 182 | @Override
|
---|
[153] | 183 | public void observeEnvironment(double reward, AbstractState newState) {
|
---|
[67] | 184 | this.offeringStrategy.observeEnvironment(reward, newState);
|
---|
| 185 |
|
---|
| 186 | if (newState.isTerminalState()) {
|
---|
| 187 | this.writeObjectToFile(this.offeringStrategy.getQTable());
|
---|
| 188 | }
|
---|
| 189 | }
|
---|
| 190 |
|
---|
| 191 | public void writeObjectToFile(Object serObj) {
|
---|
| 192 |
|
---|
[153] | 193 | String pathToQtable = this.strategyParameters.getValueAsString("_path_to_qtable");
|
---|
| 194 | String filepath = parsePathToQtable(pathToQtable);
|
---|
| 195 |
|
---|
| 196 | File outputFile = new File(filepath);
|
---|
| 197 | outputFile.getParentFile().mkdirs();
|
---|
[67] | 198 | try {
|
---|
| 199 | FileOutputStream fileOut = new FileOutputStream(filepath);
|
---|
| 200 | ObjectOutputStream objectOut = new ObjectOutputStream(fileOut);
|
---|
| 201 | objectOut.writeObject(serObj);
|
---|
| 202 | objectOut.close();
|
---|
[153] | 203 | System.out.println("The Q-table was succesfully written to a file");
|
---|
[67] | 204 |
|
---|
| 205 | } catch (Exception ex) {
|
---|
| 206 | ex.printStackTrace();
|
---|
| 207 | }
|
---|
| 208 | }
|
---|
| 209 |
|
---|
| 210 | public Object readObjectFromFile(String filepath) {
|
---|
| 211 | Object obj = null;
|
---|
| 212 |
|
---|
| 213 | try {
|
---|
| 214 | FileInputStream fileIn = new FileInputStream(filepath);
|
---|
| 215 | ObjectInputStream objectIn = new ObjectInputStream(fileIn);
|
---|
| 216 | obj = objectIn.readObject();
|
---|
| 217 | objectIn.close();
|
---|
| 218 | System.out.println("Succesfully read object");
|
---|
[153] | 219 | } catch (Exception ex) {
|
---|
| 220 | if (ex instanceof FileNotFoundException) {
|
---|
[67] | 221 | System.out.println("qTable file does not exist. A new file will be created.");
|
---|
[153] | 222 | } else {
|
---|
[67] | 223 | ex.printStackTrace();
|
---|
| 224 | }
|
---|
| 225 | }
|
---|
| 226 |
|
---|
| 227 | return obj;
|
---|
| 228 | }
|
---|
[153] | 229 |
|
---|
| 230 | private String parsePathToQtable(String rawPath) {
|
---|
| 231 | String filepath;
|
---|
| 232 | File check = new File(rawPath);
|
---|
| 233 | if (check.isFile() || rawPath.endsWith(".table")) {
|
---|
| 234 | filepath = rawPath;
|
---|
| 235 | } else {
|
---|
| 236 | filepath = rawPath + this.instanceIdentifier();
|
---|
| 237 | }
|
---|
| 238 | return filepath;
|
---|
| 239 | }
|
---|
[67] | 240 |
|
---|
| 241 | public String instanceIdentifier() {
|
---|
[153] | 242 | String domainName = this.negotiationSession.getDomain().getName().replace(".xml", "").replace("etc/templates", "").replace("/", "_");
|
---|
| 243 | return String.format("%s-%s-%s-%s", this.getName(), domainName,
|
---|
| 244 | this.utilitySpace.getFileName().replace('/', '_').replace(domainName, ""),
|
---|
| 245 | this.filterStrategyParameters(this.getStrategyParameters()).replace(';', '-').replace('=', '@')); // because ; is csv // delimiter);
|
---|
[67] | 246 | }
|
---|
[153] | 247 |
|
---|
| 248 | public String filterStrategyParameters(StrategyParameters parameters) {
|
---|
| 249 | String fullString = parameters.toString();
|
---|
| 250 | String filteredString = "";
|
---|
| 251 |
|
---|
| 252 | // filter out pairs that look like this: _key=value
|
---|
| 253 | // these are not part of the identifier. Convention adapted from
|
---|
| 254 | // pythonic 'private variable' indication (self._privatevar)
|
---|
| 255 | for (String pair : fullString.split(";")) {
|
---|
| 256 | if (!pair.startsWith("_") && pair.contains("=")) {
|
---|
| 257 | filteredString = filteredString + ";" + pair;
|
---|
| 258 | }
|
---|
| 259 | }
|
---|
| 260 |
|
---|
| 261 | return filteredString;
|
---|
| 262 | }
|
---|
[67] | 263 | }
|
---|