source: anac2020/BlingBling/src/main/java/geniusweb/blingbling/Blingbling.java@ 33

Last change on this file since 33 was 33, checked in by wouter, 3 years ago

#3

File size: 19.3 KB
Line 
1package geniusweb.blingbling;
2
3//import java.awt.List;
4import java.io.IOException;
5import java.math.BigInteger;
6import java.util.ArrayList;
7import java.util.Arrays;
8import java.util.Collections;
9import java.util.Comparator;
10import java.util.HashMap;
11import java.util.HashSet;
12import java.util.List;
13import java.util.Random;
14import java.util.logging.Level;
15
16import geniusweb.actions.Accept;
17import geniusweb.actions.Action;
18import geniusweb.actions.Comparison;
19import geniusweb.actions.ElicitComparison;
20import geniusweb.actions.Offer;
21import geniusweb.actions.PartyId;
22import geniusweb.bidspace.AllBidsList;
23import geniusweb.inform.ActionDone;
24import geniusweb.inform.Finished;
25import geniusweb.inform.Inform;
26import geniusweb.inform.Settings;
27import geniusweb.inform.YourTurn;
28import geniusweb.issuevalue.Bid;
29import geniusweb.issuevalue.Value;
30import geniusweb.party.Capabilities;
31import geniusweb.party.DefaultParty;
32import geniusweb.profile.PartialOrdering;
33import geniusweb.profile.Profile;
34import geniusweb.profileconnection.ProfileConnectionFactory;
35import geniusweb.profileconnection.ProfileInterface;
36import geniusweb.progress.ProgressRounds;
37import tudelft.utilities.logging.Reporter;
38
39/**
40 * A simple implementation of a SHAOP party that can handle only bilateral
41 * negotiations (1 other party). It will ignore all other parties except the one
42 * that has the turn right before us. It estimates the utilities of bids by
43 * assigning a linear increasing utility from the orderings that have been
44 * created.
45 * <p>
46 * <b>Requirement<b> the initial {@link PartialOrdering} must contain at least
47 * the bids with lowest utility and highest utility, and the proper comparison
48 * info for these two bids.
49 */
50public class Blingbling extends DefaultParty {
51
52// private static final BigDecimal N09 = new BigDecimal("0.9");
53 private Bid nowReceivedBid = null; // we ignore all others
54 private Bid lastReceivedBid = null; // the bid received in the last round.
55 private PartyId me;
56 private final Random random = new Random();
57 protected ProfileInterface profileint; // the preference profile
58 private ProgressRounds progress;
59 private MyProfile myProfile;
60 private OpponentProfile oppProfile;
61 private Boolean startRecord = false;
62
63 private HashMap<String, Value> fixedZoneValue = new HashMap<>();
64 private List<String> negoZone = new ArrayList<>();
65// private List<Bid> FirstReceivedBidList = null;
66 private AllBidsList allbids = null;
67 private List<Bid> acceptableBids = new ArrayList<>();
68 private List<Bid> elicitlist = new ArrayList<>();
69 private List<Bid> paretoBids = new ArrayList<>();
70
71 private Bid elicitBid = null;
72 // params for training
73 private double LearningRate = 0.01;
74 private int Epoch = 40;
75 // params for elicit
76 private double elicitthreshold = 0.7;// if opputility of bid is above this threshold, then elicit the space.
77 private double elicitcost = 0.01;
78 private int elicitflag = 0;
79 private int elicitcnt = 0;
80 // params for strategy threshold.
81 private double threshold = 1.0;// for generate bid;
82 private double acceptthreshold = 1.0;// to determine acceptable bid.
83 private double estimateNashThreshold = 0.7; // the my utility of estimated nash point
84 private double reservationThreshold;
85 private int rethreshold;// 第一次获得可接受的bid的轮数,获得可接受的threshold之后,则就在该acceptthreshold之上寻找新的bid。
86 // params for opponent modeling.
87// private int opprecordlength = 100;//记录前100个不同的
88// private int recordcnt = 0;
89 // params for estimate nash point.
90 private double maxUtilityOppForMe;// record the my max utility among the opponent bid
91 private double nashratio = 1.7;// determine the shape of the pareto frontier.
92// private int nashrecordlength = 20;
93// private int nashcnt = 0;
94 // threshold parameter
95 private int exploreRounds = 150; // for opponent modeling and understand the opposition
96 private double exploreStageThreshold = 0.95;
97 private int agreementSearchRounds = 195; // for agreement search, gradually drop to estimate Nash Point.
98// private double concedeRangeRatio = 0.05; // this is for the case that my estimate nash point is not accurate. the ratio between reservation and nash estimate point
99 private double generateRangeSize = 0.01; // a value from 0.1 to 0.2.
100 private double randomlevel = 0.5;
101// private double slope = 0.02;//if we use the slope
102 private int useAllflag = 0;
103
104 public Blingbling() {
105 }
106
107 public Blingbling(Reporter reporter) {
108 super(reporter); // for debugging
109 }
110
111 @Override
112 public void notifyChange(Inform info) {
113 try {
114 if (info instanceof Settings) {// init
115 Settings settings = (Settings) info;
116
117 // params setting
118 if (settings.getParameters().get("lr") != null) {
119 LearningRate = (double) settings.getParameters().get("lr");
120 }
121 if (settings.getParameters().get("epoch") != null) {
122 Epoch = (int) settings.getParameters().get("epoch");
123 }
124 if (settings.getParameters().get("explorernd") != null) {
125 exploreRounds = (int) settings.getParameters().get("explorernd");
126 }
127 if (settings.getParameters().get("explorethreshold") != null) {
128 exploreStageThreshold = (double) settings.getParameters().get("explorethreshold");
129 }
130 if (settings.getParameters().get("agreementrnd") != null) {
131 agreementSearchRounds = (int) settings.getParameters().get("agreementrnd");
132 }
133 if (settings.getParameters().get("concederratio") != null) {
134 exploreStageThreshold = (double) settings.getParameters().get("explorethreshold");
135 }
136 if (settings.getParameters().get("generaterange") != null) {
137 generateRangeSize = (double) settings.getParameters().get("generaterange");
138 }
139
140 if (settings.getParameters().get("elicitthreshold") != null) {
141 elicitthreshold = (double) settings.getParameters().get("elicitthreshold");
142 }
143 if (settings.getParameters().get("elicitationcost") != null) {
144 elicitcost = (double) settings.getParameters().get("elicitationcost");
145 }
146 if (settings.getParameters().get("nashutility") != null) {
147 estimateNashThreshold = (double) settings.getParameters().get("nashutility");
148 }
149 if (settings.getParameters().get("useall") != null) {
150 useAllflag = (int) settings.getParameters().get("useall");
151 }
152 if (settings.getParameters().get("randomlevel") != null) {
153 randomlevel = (int) settings.getParameters().get("randomlevel");
154 }
155 this.profileint = ProfileConnectionFactory.create(settings.getProfile().getURI(), getReporter());
156 this.me = settings.getID();
157 this.progress = (ProgressRounds) settings.getProgress();
158 this.oppProfile = new OpponentProfile(profileint.getProfile().getDomain());// set opponent model
159
160 this.myProfile = new MyProfile(profileint.getProfile(), Epoch, LearningRate);
161
162 allbids = new AllBidsList(myProfile.getDomain());
163
164 } else if (info instanceof ActionDone) { // the relationship bettween ActionDne and Myturn??
165 Action otheract = ((ActionDone) info).getAction();
166 if (otheract instanceof Offer) {
167
168 this.nowReceivedBid = ((Offer) otheract).getBid();
169
170 } else if (otheract instanceof Comparison) {
171 myProfile.update(((Comparison) otheract).getBid(), ((Comparison) otheract).getBetter(),
172 ((Comparison) otheract).getWorse()); // update the estimate profile
173 myTurn();
174 }
175 } else if (info instanceof YourTurn) {
176 myTurn();
177 } else if (info instanceof Finished) {
178 getReporter().log(Level.INFO, "Final ourcome:" + info);
179// getReporter().log(Level.INFO, "Final EstUtility" + myProfile.getRankUtility(((Finished) info).getAgreement()));
180
181 }
182 } catch (Exception e) {
183 throw new RuntimeException("Fail:", e);
184 }
185 }
186
187 @Override
188 public Capabilities getCapabilities() {
189 return new Capabilities(new HashSet<>(Arrays.asList("SAOP", "SHAOP")), Collections.singleton(Profile.class));
190 }
191
192 @Override
193 public String getDescription() {
194 return "The Blingbling Agent from TU Delft! A stage based method can be optimized via AUTOML!";
195 }
196
197 /**
198 * Called when it's (still) our turn and we should take some action. Also
199 * Updates the progress if necessary.
200 */
201 private void myTurn() throws IOException {// equals choose action
202 int round = progress.getCurrentRound(); // only support round deadline
203 int tround = progress.getTotalRounds();
204 Action action = null;
205
206 if (myProfile == null) {
207 myProfile = new MyProfile(profileint.getProfile(), Epoch, LearningRate);
208 }
209 if (round <= 7) {
210 myProfile.subtrain(Epoch, LearningRate);
211 }
212 if (nowReceivedBid != null && round <= 2) {
213 oppProfile.registerOffer(nowReceivedBid);// register the first offer
214 }
215
216 if (lastReceivedBid != null && lastReceivedBid != nowReceivedBid) {
217 startRecord = true; // start record from the first different
218 }
219 if (startRecord && round <= exploreRounds) {// if possible the explore rounds could be the result of hyperparam
220 // optimation
221 oppProfile.registerOffer(nowReceivedBid);
222// estimateNashPoint();
223 }
224
225 // update the threshold
226 setThreshold(round, tround);
227
228 // accept or not?
229 if (nowReceivedBid != null) {
230 // then we do the action now, no need to ask user
231 if (isAcceptable(nowReceivedBid, round)) {
232 action = new Accept(me, nowReceivedBid);
233 }
234
235 }
236 // elicit or not?
237 int elicitrnd = 0;
238 if (elicitcost >= 0.05 && elicitcost <= 0.07) {
239 elicitrnd = 1;
240 } else {
241 elicitrnd = (int) (0.05 / elicitcost);
242 }
243
244 if (action == null && round > exploreRounds && elicitcnt < elicitrnd && elicitflag == 0) {// right after the
245 // explore rounds,
246 // we determine
247 // whether to elicit
248 // compare or not.
249 List<Bid> elicitbidlist = myProfile.getElicitBid();// 获取比较的bid, which combined via the values with low
250 // frequency in pairwise data.
251 if (ifElicitComparison(elicitbidlist)) {// determine whether to elicit or not
252 if (useAllflag == 1) {
253 action = new ElicitComparison(me, elicitBid, myProfile.getAllBidlist()); // can compare with all the
254 // bids
255 // myProfile.getBidlist()//compare
256 // with exist bid
257 }
258 if (useAllflag == 0) {
259 action = new ElicitComparison(me, elicitBid, myProfile.getBidlist()); // can compare with all the
260 // bids
261 // myProfile.getBidlist()//compare
262 // with exist bid
263 }
264 elicitcnt++;
265 elicitflag = 1;
266 }
267 }
268 if (elicitflag == 1) {
269 myProfile.subtrain(Epoch, LearningRate);
270 }
271 if (elicitcnt == elicitrnd) {
272 getNash();
273 elicitcnt++;
274 }
275
276 // Otherwise just offer a bid. Note the situation at the accept stage. and last
277 // two rounds.
278 if (action == null && round <= tround - 1) {
279 action = generateNextBid(threshold);
280 }
281 if (action == null && round == tround) {// final round
282 if (myProfile.getRankUtility(nowReceivedBid) > myProfile.getRankUtility(myProfile.getReservationBid())) {
283 action = new Accept(me, nowReceivedBid);
284 }
285 List<Bid> oppbid = getSortedAcceptable(oppProfile.getOppOffers());
286 if (myProfile.getRankUtility(oppbid.get(0)) > myProfile.getRankUtility(myProfile.getReservationBid())) {
287 action = new Offer(me, oppbid.get(0));
288 } else {
289 action = generateNextBid(threshold);// why I don't set it to the reservation value? Because I aim to get
290 // the Nash output.
291 }
292 }
293
294 lastReceivedBid = nowReceivedBid;
295 if (action instanceof Offer) {
296 elicitflag = 0;
297
298 }
299 getConnection().send(action);// report Action
300 progress = progress.advance();
301
302 }
303
304// private void reduceSpace(Bid bid, Domain domain) {
305// //here we only us the first bid, use first N bid and pick the maximum is a better choice.
306// //devide the negotiation space to setzone and negotiation zone. return the issue list of negotiatable zone
307// for (String issue : bid.getIssues()) {
308// if (bid.getValue(issue).toString().equals(bestBid.getValue(issue).toString())){
309// fixedZoneValue.put(issue, bid.getValue(issue));
310// }else {
311// negoZone.add(issue);
312// }
313// }
314// }
315
316 private boolean isAcceptable(Bid bid, int round) {
317 if (round > agreementSearchRounds) {
318 if (myProfile.getRankUtility(bid) >= threshold) {
319 return true;
320 } else {
321 return false;
322 }
323 } else {
324 if (acceptableBids.isEmpty()) {
325 if (myProfile.getRankUtility(bid) >= threshold) {
326 acceptthreshold = threshold;
327 rethreshold = round;
328 acceptableBids.add(bid);
329 threshold = 1.0;// restart from 1.0
330 }
331 return false;
332 } else {
333 if (myProfile.getRankUtility(bid) >= threshold) {
334 return true;
335 }
336 if (myProfile.getRankUtility(bid) >= acceptthreshold) {
337 if (!acceptableBids.contains(bid)) {
338 acceptableBids.add(bid);
339 }
340 }
341 return false;
342 }
343 }
344
345 }
346
347 private boolean ifElicitComparison(List<Bid> inbidlist) {
348 if (inbidlist.isEmpty()) {
349 return false;
350 }
351 // if need elicit comparison. need more discussion.
352 Collections.sort(inbidlist, new Comparator<Bid>() {
353
354 @Override
355 public int compare(Bid b1, Bid b2) {
356 return (oppProfile.getPredOppUtility(b1) >= oppProfile.getPredOppUtility(b2)) ? -1 : 1;
357// return new Double(oppProfile.getPredOppUtility(b1)).compareTo(new Double(oppProfile.getPredOppUtility(b2)));
358 }
359
360 });
361 for (Bid bid : inbidlist) {
362 if (!elicitlist.contains(bid)) {
363 if (oppProfile.getPredOppUtility(bid) >= elicitthreshold) {
364 elicitBid = bid;
365 elicitlist.add(bid);
366 return true;
367 }
368 }
369 }
370
371 return false;
372 }
373
374 private void setThreshold(int round, int totalRound) {
375 // Here we drop the threshold gradually, this will help the search of outcome
376 // space.
377 // pay attention to the boundary case
378 if (acceptableBids.isEmpty()) {// 如果到agreement Stage不空的话,那么已经结束了。
379 if (round < 10) {
380 this.threshold = 0.95;// the first several steps.
381 } else if (round < this.exploreRounds) {
382 this.threshold = (this.exploreRounds - round) * (1.0 - this.exploreStageThreshold)
383 / (this.exploreRounds - 10) + this.exploreStageThreshold;
384 } else if (round <= totalRound - 1) {
385 this.threshold = (totalRound - 1 - round) * (this.exploreStageThreshold - this.estimateNashThreshold)
386 / (totalRound - 1 - this.exploreRounds) + this.estimateNashThreshold;
387 }
388// else if (round <= totalRound-1) {
389// this.threshold = this.concedeRangeRatio*(totalRound - 1 - round) * (this.estimateNashThreshold - this.reservationThreshold)
390// /(totalRound-1-this.agreementSearchRounds) + this.reservationThreshold;
391// }
392
393 } else {
394 this.threshold = Math
395 .min((totalRound - 1 - round) * (1 - this.acceptthreshold) / (totalRound - 1 - this.rethreshold)
396 + this.acceptthreshold, 1.0);// decrease from 1.0. we can also use a slope for this.
397 }
398 }
399
400 private List<Bid> getSortedAcceptable(List<Bid> accbidlist) {// decending order according to myProfile utility
401 List<Bid> temp = accbidlist;
402 Collections.sort(temp, new Comparator<Bid>() {
403
404 @Override
405 public int compare(Bid b1, Bid b2) {
406 return (myProfile.getRankUtility(b1) >= myProfile.getRankUtility(b2)) ? -1 : 1;
407 }
408
409 });
410 return temp;
411 }
412
413 private void estimateNashPoint() {
414 // use the AutoML to determine the ratio? //use two profiles to estimate nash
415 // point.
416 double thisutility = myProfile.getRankUtility(nowReceivedBid);
417 if (thisutility > maxUtilityOppForMe) {
418 maxUtilityOppForMe = thisutility;
419 }
420 estimateNashThreshold = (1 - maxUtilityOppForMe) / nashratio + maxUtilityOppForMe; // is there a better function
421 // to estimate the nash
422 // point utility?
423 }
424
425 private Offer generateParetoNextBid(double generatethreshold) {
426 // update paretoBids
427 getParetoFrontier();
428 Bid candidatebid = null;
429 double distance = 1;
430 for (Bid bid : paretoBids) {
431 if (myProfile.getRankUtility(bid) >= generatethreshold) {
432 double d = myProfile.getRankUtility(bid) - generatethreshold;
433 if (d < distance) {
434 distance = d;
435 candidatebid = bid;
436 }
437 }
438 }
439 return new Offer(me, candidatebid);
440 }
441
442 private Offer generateNextBid(double generatethreshold) {
443 // return Offer,
444 long spacesize = this.allbids.size().intValue();
445 List<Bid> candidate = new ArrayList<Bid>();
446 List<Bid> candidatebackup = new ArrayList<Bid>();
447 List<Bid> rand = new ArrayList<Bid>();
448 int candidatenum;
449 for (int n = 0; n < spacesize; n++) {
450 Bid bid = allbids.get(BigInteger.valueOf(n));
451 if (myProfile.getRankUtility(bid) >= generatethreshold
452 && myProfile.getRankUtility(bid) < (generatethreshold + generateRangeSize)) {
453 candidate.add(bid);
454 }
455 if (myProfile.getRankUtility(bid) >= generatethreshold) {
456 candidatebackup.add(bid);
457 }
458 }
459
460 // generate random bid to explore? not sure about the performance
461 if (!candidate.isEmpty()) {
462 candidatenum = (int) (Math.floor(candidate.size() * randomlevel) + 1); // 取size的一半。还是全取?
463 for (int i = 0; i < candidatenum; i++) {
464 int temp = random.nextInt(candidate.size());// explore
465 if (!rand.contains(candidate.get(temp))) {
466 rand.add(candidate.get(temp));
467 } else {
468 i--;
469 }
470 }
471 } else {
472 candidatenum = (int) (Math.floor(candidatebackup.size() * randomlevel) + 1); // 取size的一半。还是全取?,can be a
473 // parameter here.
474 for (int i = 0; i < candidatenum; i++) {
475 int temp = random.nextInt(candidatebackup.size());//
476 if (!rand.contains(candidatebackup.get(temp))) {
477 rand.add(candidatebackup.get(temp));
478 } else {
479 i--;// if generate the same, then this should not count in.
480 }
481 }
482 }
483
484 Collections.sort(rand, new Comparator<Bid>() {
485 @Override
486 public int compare(Bid b1, Bid b2) {
487 return oppProfile.getPredOppUtility(b1) >= oppProfile.getPredOppUtility(b2) ? -1 : 1;
488 }
489
490 });
491
492 Bid bid = rand.get(0);// return the maximum utility bid of the opponent
493 if (!acceptableBids.isEmpty()) {
494 acceptableBids = getSortedAcceptable(acceptableBids);
495 if (myProfile.getRankUtility(acceptableBids.get(0)) >= generatethreshold
496 && myProfile.getRankUtility(acceptableBids.get(0)) < (generatethreshold + generateRangeSize)) {
497 bid = acceptableBids.get(0);
498 }
499 }
500
501 return new Offer(me, bid);
502 }
503
504 public void getNash() {// note here we didn't consider the reservation value.
505 if (paretoBids.isEmpty()) {
506 return;
507 }
508 Bid nashbid = null;
509 double maxnashproduct = 0;
510 for (Bid bid : paretoBids) {
511 double nashproduct = myProfile.getRankUtility(bid) * oppProfile.getPredOppUtility(bid);
512 if (nashproduct > maxnashproduct) {
513 nashbid = bid;
514 maxnashproduct = nashproduct;
515 }
516 }
517 if (myProfile.getRankUtility(nashbid) > estimateNashThreshold) {
518 estimateNashThreshold = Math.max(myProfile.getRankUtility(nashbid),
519 myProfile.getRankUtility(myProfile.getReservationBid()));
520 }
521
522 }
523
524 // return the pareto frontier.
525 public void getParetoFrontier() {
526 for (Bid bid : myProfile.getAllBidlist()) {
527 if (paretoBids.isEmpty()) {
528 paretoBids.add(bid);
529 continue;
530 }
531 for (Bid pbid : paretoBids) {
532 if (isDominated(pbid, bid)) {
533 paretoBids.remove(pbid);
534 paretoBids.add(bid);
535 } else {
536 paretoBids.add(bid);
537 }
538 }
539
540 }
541 }
542
543 // bid1 is dominatedby bid2 or not.
544 public boolean isDominated(Bid bid1, Bid bid2) {
545 double bid1_myutility = myProfile.getRankUtility(bid1);
546 double bid1_opputility = oppProfile.getPredOppUtility(bid1);
547 double bid2_myutility = myProfile.getRankUtility(bid2);
548 double bid2_opputility = oppProfile.getPredOppUtility(bid2);
549 if (bid2_myutility >= bid1_myutility && bid2_opputility >= bid1_opputility) {
550 return true;
551 } else {
552 return false;
553 }
554
555 }
556
557}
Note: See TracBrowser for help on using the repository browser.