package geniusweb.blingbling; //import java.awt.List; import java.io.IOException; import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.logging.Level; import geniusweb.actions.Accept; import geniusweb.actions.Action; import geniusweb.actions.Comparison; import geniusweb.actions.ElicitComparison; import geniusweb.actions.Offer; import geniusweb.actions.PartyId; import geniusweb.bidspace.AllBidsList; import geniusweb.inform.ActionDone; import geniusweb.inform.Finished; import geniusweb.inform.Inform; import geniusweb.inform.Settings; import geniusweb.inform.YourTurn; import geniusweb.issuevalue.Bid; import geniusweb.issuevalue.Value; import geniusweb.party.Capabilities; import geniusweb.party.DefaultParty; import geniusweb.profile.DefaultPartialOrdering; import geniusweb.profile.PartialOrdering; import geniusweb.profileconnection.ProfileConnectionFactory; import geniusweb.profileconnection.ProfileInterface; import geniusweb.progress.ProgressRounds; import tudelft.utilities.logging.Reporter; /** * A simple implementation of a SHAOP party that can handle only bilateral * negotiations (1 other party). It will ignore all other parties except the one * that has the turn right before us. It estimates the utilities of bids by * assigning a linear increasing utility from the orderings that have been * created. *

* Requirement the initial {@link PartialOrdering} must contain at least * the bids with lowest utility and highest utility, and the proper comparison * info for these two bids. */ public class Blingbling extends DefaultParty { // private static final BigDecimal N09 = new BigDecimal("0.9"); private Bid nowReceivedBid = null; // we ignore all others private Bid lastReceivedBid = null; // the bid received in the last round. private PartyId me; private final Random random = new Random(); protected ProfileInterface profileint; // the preference profile private ProgressRounds progress; private MyProfile myProfile; private OpponentProfile oppProfile; private Boolean startRecord = false; private HashMap fixedZoneValue = new HashMap<>(); private List negoZone = new ArrayList<>(); // private List FirstReceivedBidList = null; private AllBidsList allbids = null; private List acceptableBids = new ArrayList<>(); private List elicitlist = new ArrayList<>(); private List paretoBids = new ArrayList<>(); private Bid elicitBid = null; // params for training private double LearningRate = 0.01; private int Epoch = 40; // params for elicit private double elicitthreshold = 0.7;// if opputility of bid is above this threshold, then elicit the space. private double elicitcost = 0.01; private int elicitflag = 0; private int elicitcnt = 0; // params for strategy threshold. private double threshold = 1.0;// for generate bid; private double acceptthreshold = 1.0;// to determine acceptable bid. private double estimateNashThreshold = 0.7; // the my utility of estimated nash point private double reservationThreshold; private int rethreshold;// 第一次获得可接受的bid的轮数,获得可接受的threshold之后,则就在该acceptthreshold之上寻找新的bid。 // params for opponent modeling. // private int opprecordlength = 100;//记录前100个不同的 // private int recordcnt = 0; // params for estimate nash point. private double maxUtilityOppForMe;// record the my max utility among the opponent bid private double nashratio = 1.7;// determine the shape of the pareto frontier. // private int nashrecordlength = 20; // private int nashcnt = 0; // threshold parameter private int exploreRounds = 150; // for opponent modeling and understand the opposition private double exploreStageThreshold = 0.95; private int agreementSearchRounds = 195; // for agreement search, gradually drop to estimate Nash Point. // private double concedeRangeRatio = 0.05; // this is for the case that my estimate nash point is not accurate. the ratio between reservation and nash estimate point private double generateRangeSize = 0.01; // a value from 0.1 to 0.2. private double randomlevel = 0.5; // private double slope = 0.02;//if we use the slope private int useAllflag = 0; public Blingbling() { } public Blingbling(Reporter reporter) { super(reporter); // for debugging } @Override public void notifyChange(Inform info) { try { if (info instanceof Settings) {// init Settings settings = (Settings) info; // params setting if (settings.getParameters().get("lr") != null) { LearningRate = (double) settings.getParameters().get("lr"); } if (settings.getParameters().get("epoch") != null) { Epoch = (int) settings.getParameters().get("epoch"); } if (settings.getParameters().get("explorernd") != null) { exploreRounds = (int) settings.getParameters().get("explorernd"); } if (settings.getParameters().get("explorethreshold") != null) { exploreStageThreshold = (double) settings.getParameters().get("explorethreshold"); } if (settings.getParameters().get("agreementrnd") != null) { agreementSearchRounds = (int) settings.getParameters().get("agreementrnd"); } if (settings.getParameters().get("concederratio") != null) { exploreStageThreshold = (double) settings.getParameters().get("explorethreshold"); } if (settings.getParameters().get("generaterange") != null) { generateRangeSize = (double) settings.getParameters().get("generaterange"); } if (settings.getParameters().get("elicitthreshold") != null) { elicitthreshold = (double) settings.getParameters().get("elicitthreshold"); } if (settings.getParameters().get("elicitationcost") != null) { elicitcost = (double) settings.getParameters().get("elicitationcost"); } if (settings.getParameters().get("nashutility") != null) { estimateNashThreshold = (double) settings.getParameters().get("nashutility"); } if (settings.getParameters().get("useall") != null) { useAllflag = (int) settings.getParameters().get("useall"); } if (settings.getParameters().get("randomlevel") != null) { randomlevel = (int) settings.getParameters().get("randomlevel"); } this.profileint = ProfileConnectionFactory.create(settings.getProfile().getURI(), getReporter()); this.me = settings.getID(); this.progress = (ProgressRounds) settings.getProgress(); this.oppProfile = new OpponentProfile(profileint.getProfile().getDomain());// set opponent model this.myProfile = new MyProfile(profileint.getProfile(), Epoch, LearningRate); allbids = new AllBidsList(myProfile.getDomain()); } else if (info instanceof ActionDone) { // the relationship bettween ActionDne and Myturn?? Action otheract = ((ActionDone) info).getAction(); if (otheract instanceof Offer) { this.nowReceivedBid = ((Offer) otheract).getBid(); } else if (otheract instanceof Comparison) { myProfile.update(((Comparison) otheract).getBid(), ((Comparison) otheract).getBetter(), ((Comparison) otheract).getWorse()); // update the estimate profile myTurn(); } } else if (info instanceof YourTurn) { myTurn(); } else if (info instanceof Finished) { getReporter().log(Level.INFO, "Final ourcome:" + info); // getReporter().log(Level.INFO, "Final EstUtility" + myProfile.getRankUtility(((Finished) info).getAgreement())); } } catch (Exception e) { throw new RuntimeException("Fail:", e); } } @Override public Capabilities getCapabilities() { return new Capabilities(new HashSet<>(Arrays.asList("SAOP", "SHAOP")), Collections.singleton(DefaultPartialOrdering.class)); } @Override public String getDescription() { return "The Blingbling Agent from TU Delft! A stage based method can be optimized via AUTOML!"; } /** * Called when it's (still) our turn and we should take some action. Also * Updates the progress if necessary. */ private void myTurn() throws IOException {// equals choose action int round = progress.getCurrentRound(); // only support round deadline int tround = progress.getTotalRounds(); Action action = null; if (myProfile == null) { myProfile = new MyProfile(profileint.getProfile(), Epoch, LearningRate); } if (round <= 7) { myProfile.subtrain(Epoch, LearningRate); } if (nowReceivedBid != null && round <= 2) { oppProfile.registerOffer(nowReceivedBid);// register the first offer } if (lastReceivedBid != null && lastReceivedBid != nowReceivedBid) { startRecord = true; // start record from the first different } if (startRecord && round <= exploreRounds) {// if possible the explore rounds could be the result of hyperparam // optimation oppProfile.registerOffer(nowReceivedBid); // estimateNashPoint(); } // update the threshold setThreshold(round, tround); // accept or not? if (nowReceivedBid != null) { // then we do the action now, no need to ask user if (isAcceptable(nowReceivedBid, round)) { action = new Accept(me, nowReceivedBid); } } // elicit or not? int elicitrnd = 0; if (elicitcost >= 0.05 && elicitcost <= 0.07) { elicitrnd = 1; } else { elicitrnd = (int) (0.05 / elicitcost); } if (action == null && round > exploreRounds && elicitcnt < elicitrnd && elicitflag == 0) {// right after the // explore rounds, // we determine // whether to elicit // compare or not. List elicitbidlist = myProfile.getElicitBid();// 获取比较的bid, which combined via the values with low // frequency in pairwise data. if (ifElicitComparison(elicitbidlist)) {// determine whether to elicit or not if (useAllflag == 1) { action = new ElicitComparison(me, elicitBid, myProfile.getAllBidlist()); // can compare with all the // bids // myProfile.getBidlist()//compare // with exist bid } if (useAllflag == 0) { action = new ElicitComparison(me, elicitBid, myProfile.getBidlist()); // can compare with all the // bids // myProfile.getBidlist()//compare // with exist bid } elicitcnt++; elicitflag = 1; } } if (elicitflag == 1) { myProfile.subtrain(Epoch, LearningRate); } if (elicitcnt == elicitrnd) { getNash(); elicitcnt++; } // Otherwise just offer a bid. Note the situation at the accept stage. and last // two rounds. if (action == null && round <= tround - 1) { action = generateNextBid(threshold); } if (action == null && round == tround) {// final round if (myProfile.getRankUtility(nowReceivedBid) > myProfile.getRankUtility(myProfile.getReservationBid())) { action = new Accept(me, nowReceivedBid); } List oppbid = getSortedAcceptable(oppProfile.getOppOffers()); if (myProfile.getRankUtility(oppbid.get(0)) > myProfile.getRankUtility(myProfile.getReservationBid())) { action = new Offer(me, oppbid.get(0)); } else { action = generateNextBid(threshold);// why I don't set it to the reservation value? Because I aim to get // the Nash output. } } lastReceivedBid = nowReceivedBid; if (action instanceof Offer) { elicitflag = 0; } getConnection().send(action);// report Action progress = progress.advance(); } // private void reduceSpace(Bid bid, Domain domain) { // //here we only us the first bid, use first N bid and pick the maximum is a better choice. // //devide the negotiation space to setzone and negotiation zone. return the issue list of negotiatable zone // for (String issue : bid.getIssues()) { // if (bid.getValue(issue).toString().equals(bestBid.getValue(issue).toString())){ // fixedZoneValue.put(issue, bid.getValue(issue)); // }else { // negoZone.add(issue); // } // } // } private boolean isAcceptable(Bid bid, int round) { if (round > agreementSearchRounds) { if (myProfile.getRankUtility(bid) >= threshold) { return true; } else { return false; } } else { if (acceptableBids.isEmpty()) { if (myProfile.getRankUtility(bid) >= threshold) { acceptthreshold = threshold; rethreshold = round; acceptableBids.add(bid); threshold = 1.0;// restart from 1.0 } return false; } else { if (myProfile.getRankUtility(bid) >= threshold) { return true; } if (myProfile.getRankUtility(bid) >= acceptthreshold) { if (!acceptableBids.contains(bid)) { acceptableBids.add(bid); } } return false; } } } private boolean ifElicitComparison(List inbidlist) { if (inbidlist.isEmpty()) { return false; } // if need elicit comparison. need more discussion. Collections.sort(inbidlist, new Comparator() { @Override public int compare(Bid b1, Bid b2) { return (oppProfile.getPredOppUtility(b1) >= oppProfile.getPredOppUtility(b2)) ? -1 : 1; // return new Double(oppProfile.getPredOppUtility(b1)).compareTo(new Double(oppProfile.getPredOppUtility(b2))); } }); for (Bid bid : inbidlist) { if (!elicitlist.contains(bid)) { if (oppProfile.getPredOppUtility(bid) >= elicitthreshold) { elicitBid = bid; elicitlist.add(bid); return true; } } } return false; } private void setThreshold(int round, int totalRound) { // Here we drop the threshold gradually, this will help the search of outcome // space. // pay attention to the boundary case if (acceptableBids.isEmpty()) {// 如果到agreement Stage不空的话,那么已经结束了。 if (round < 10) { this.threshold = 0.95;// the first several steps. } else if (round < this.exploreRounds) { this.threshold = (this.exploreRounds - round) * (1.0 - this.exploreStageThreshold) / (this.exploreRounds - 10) + this.exploreStageThreshold; } else if (round <= totalRound - 1) { this.threshold = (totalRound - 1 - round) * (this.exploreStageThreshold - this.estimateNashThreshold) / (totalRound - 1 - this.exploreRounds) + this.estimateNashThreshold; } // else if (round <= totalRound-1) { // this.threshold = this.concedeRangeRatio*(totalRound - 1 - round) * (this.estimateNashThreshold - this.reservationThreshold) // /(totalRound-1-this.agreementSearchRounds) + this.reservationThreshold; // } } else { this.threshold = Math .min((totalRound - 1 - round) * (1 - this.acceptthreshold) / (totalRound - 1 - this.rethreshold) + this.acceptthreshold, 1.0);// decrease from 1.0. we can also use a slope for this. } } private List getSortedAcceptable(List accbidlist) {// decending order according to myProfile utility List temp = accbidlist; Collections.sort(temp, new Comparator() { @Override public int compare(Bid b1, Bid b2) { return (myProfile.getRankUtility(b1) >= myProfile.getRankUtility(b2)) ? -1 : 1; } }); return temp; } private void estimateNashPoint() { // use the AutoML to determine the ratio? //use two profiles to estimate nash // point. double thisutility = myProfile.getRankUtility(nowReceivedBid); if (thisutility > maxUtilityOppForMe) { maxUtilityOppForMe = thisutility; } estimateNashThreshold = (1 - maxUtilityOppForMe) / nashratio + maxUtilityOppForMe; // is there a better function // to estimate the nash // point utility? } private Offer generateParetoNextBid(double generatethreshold) { // update paretoBids getParetoFrontier(); Bid candidatebid = null; double distance = 1; for (Bid bid : paretoBids) { if (myProfile.getRankUtility(bid) >= generatethreshold) { double d = myProfile.getRankUtility(bid) - generatethreshold; if (d < distance) { distance = d; candidatebid = bid; } } } return new Offer(me, candidatebid); } private Offer generateNextBid(double generatethreshold) { // return Offer, long spacesize = this.allbids.size().intValue(); List candidate = new ArrayList(); List candidatebackup = new ArrayList(); List rand = new ArrayList(); int candidatenum; for (int n = 0; n < spacesize; n++) { Bid bid = allbids.get(BigInteger.valueOf(n)); if (myProfile.getRankUtility(bid) >= generatethreshold && myProfile.getRankUtility(bid) < (generatethreshold + generateRangeSize)) { candidate.add(bid); } if (myProfile.getRankUtility(bid) >= generatethreshold) { candidatebackup.add(bid); } } // generate random bid to explore? not sure about the performance if (!candidate.isEmpty()) { candidatenum = (int) (Math.floor(candidate.size() * randomlevel) + 1); // 取size的一半。还是全取? for (int i = 0; i < candidatenum; i++) { int temp = random.nextInt(candidate.size());// explore if (!rand.contains(candidate.get(temp))) { rand.add(candidate.get(temp)); } else { i--; } } } else { candidatenum = (int) (Math.floor(candidatebackup.size() * randomlevel) + 1); // 取size的一半。还是全取?,can be a // parameter here. for (int i = 0; i < candidatenum; i++) { int temp = random.nextInt(candidatebackup.size());// if (!rand.contains(candidatebackup.get(temp))) { rand.add(candidatebackup.get(temp)); } else { i--;// if generate the same, then this should not count in. } } } Collections.sort(rand, new Comparator() { @Override public int compare(Bid b1, Bid b2) { return oppProfile.getPredOppUtility(b1) >= oppProfile.getPredOppUtility(b2) ? -1 : 1; } }); Bid bid = rand.get(0);// return the maximum utility bid of the opponent if (!acceptableBids.isEmpty()) { acceptableBids = getSortedAcceptable(acceptableBids); if (myProfile.getRankUtility(acceptableBids.get(0)) >= generatethreshold && myProfile.getRankUtility(acceptableBids.get(0)) < (generatethreshold + generateRangeSize)) { bid = acceptableBids.get(0); } } return new Offer(me, bid); } public void getNash() {// note here we didn't consider the reservation value. if (paretoBids.isEmpty()) { return; } Bid nashbid = null; double maxnashproduct = 0; for (Bid bid : paretoBids) { double nashproduct = myProfile.getRankUtility(bid) * oppProfile.getPredOppUtility(bid); if (nashproduct > maxnashproduct) { nashbid = bid; maxnashproduct = nashproduct; } } if (myProfile.getRankUtility(nashbid) > estimateNashThreshold) { estimateNashThreshold = Math.max(myProfile.getRankUtility(nashbid), myProfile.getRankUtility(myProfile.getReservationBid())); } } // return the pareto frontier. public void getParetoFrontier() { for (Bid bid : myProfile.getAllBidlist()) { if (paretoBids.isEmpty()) { paretoBids.add(bid); continue; } for (Bid pbid : paretoBids) { if (isDominated(pbid, bid)) { paretoBids.remove(pbid); paretoBids.add(bid); } else { paretoBids.add(bid); } } } } // bid1 is dominatedby bid2 or not. public boolean isDominated(Bid bid1, Bid bid2) { double bid1_myutility = myProfile.getRankUtility(bid1); double bid1_opputility = oppProfile.getPredOppUtility(bid1); double bid2_myutility = myProfile.getRankUtility(bid2); double bid2_opputility = oppProfile.getPredOppUtility(bid2); if (bid2_myutility >= bid1_myutility && bid2_opputility >= bid1_opputility) { return true; } else { return false; } } }