Context Navigation

IAMhaggler2011.java

Last change on this file was 1, checked in by Wouter Pasman, 6 years ago
Initial import : Genius 9.0.0
File size: 18.4 KB

Line
1	package agents.anac.y2012.IAMhaggler2012.agents2011;
2
3	import java.util.ArrayList;
4
5	import agents.Jama.Matrix;
6	import agents.anac.y2012.IAMhaggler2012.agents2011.southampton.utils.BidCreator;
7	import agents.anac.y2012.IAMhaggler2012.agents2011.southampton.utils.Pair;
8	import agents.anac.y2012.IAMhaggler2012.agents2011.southampton.utils.RandomBidCreator;
9	import agents.org.apache.commons.math.MathException;
10	import agents.org.apache.commons.math.MaxIterationsExceededException;
11	import agents.org.apache.commons.math.special.Erf;
12	import agents.uk.ac.soton.ecs.gp4j.bmc.BasicPrior;
13	import agents.uk.ac.soton.ecs.gp4j.bmc.GaussianProcessMixture;
14	import agents.uk.ac.soton.ecs.gp4j.bmc.GaussianProcessMixturePrediction;
15	import agents.uk.ac.soton.ecs.gp4j.bmc.GaussianProcessRegressionBMC;
16	import agents.uk.ac.soton.ecs.gp4j.gp.covariancefunctions.CovarianceFunction;
17	import agents.uk.ac.soton.ecs.gp4j.gp.covariancefunctions.Matern3CovarianceFunction;
18	import agents.uk.ac.soton.ecs.gp4j.gp.covariancefunctions.NoiseCovarianceFunction;
19	import agents.uk.ac.soton.ecs.gp4j.gp.covariancefunctions.SumCovarianceFunction;
20	import genius.core.Bid;
21
22	/**
23	* @author Colin Williams
24	*
25	* The IAMhaggler Agent, created for ANAC 2011. Designed by C. R.
26	* Williams, V. Robu, E. H. Gerding and N. R. Jennings.
27	*
28	*/
29	public class IAMhaggler2011 extends SouthamptonAgent {
30
31	protected double RISK_PARAMETER = 1;
32
33	private Matrix utilitySamples;
34	private Matrix timeSamples;
35	private Matrix utility;
36	private GaussianProcessRegressionBMC regression;
37	private double lastRegressionTime = 0;
38	private double lastRegressionUtility = 1;
39	private ArrayList<Double> opponentTimes = new ArrayList<Double>();
40	private ArrayList<Double> opponentUtilities = new ArrayList<Double>();
41
42	private double maxUtilityInTimeSlot;
43	private int lastTimeSlot = -1;
44	private Matrix means;
45	private Matrix variances;
46
47	private double maxUtility;
48
49	private Bid bestReceivedBid;
50
51	private double previousTargetUtility;
52
53	protected BidCreator bidCreator;
54
55	private double intercept;
56
57	private Matrix matrixTimeSamplesAdjust;
58
59	private double maxOfferedUtility = Double.MIN_VALUE;
60	private double minOfferedUtility = Double.MAX_VALUE;
61
62	public IAMhaggler2011() {
63	debug = true;
64	}
65
66	/*
67	* (non-Javadoc)
68	*
69	* @see agents.southampton.SouthamptonAgent#init()
70	*/
71	@Override
72	public void init() {
73	log("Run init...");
74	try{
75	super.init();
76	} catch (Exception ex) {
77	ex.printStackTrace();
78	}
79	double discountingFactor = 0.5;
80	try
81	{
82	discountingFactor = adjustDiscountFactor(utilitySpace
83	.getDiscountFactor());
84	}
85	catch(Exception ex)
86	{
87	logError("Unable to get discounting factor, assuming 0.5");
88	ex.printStackTrace();
89	}
90	if(discountingFactor == 0)
91	discountingFactor = 1;
92	log("Discounting factor is " + discountingFactor);
93	makeUtilitySamples(100);
94	makeTimeSamples(100);
95	Matrix discounting = generateDiscountingFunction(discountingFactor);
96	Matrix risk = generateRiskFunction(RISK_PARAMETER);
97	utility = risk.arrayTimes(discounting);
98
99	log(utility);
100
101	log("Setting up GP");
102	flushLog();
103
104	BasicPrior[] bps = { new BasicPrior(11, 0.252, 0.5),
105	new BasicPrior(11, 0.166, 0.5), new BasicPrior(1, .01, 1.0) };
106	CovarianceFunction cf = new SumCovarianceFunction(
107	Matern3CovarianceFunction.getInstance(),
108	NoiseCovarianceFunction.getInstance());
109
110	regression = new GaussianProcessRegressionBMC();
111	regression.setCovarianceFunction(cf);
112	regression.setPriors(bps);
113
114	//regression.calculateRegression(new Matrix(new double[] {}, 0), new Matrix(new double[] {}, 0));
115
116	maxUtility = 0;
117	previousTargetUtility = 1;
118
119	bidCreator = new RandomBidCreator();
120
121	log("init complete.");
122	flushLog();
123	}
124
125	@Override
126	public String getName() {
127	return "IAMhaggler2012";
128	}
129
130	/**
131	* Create an m-by-1 matrix of utility samples.
132	*
133	* @param m
134	* The sample size.
135	*/
136	private void makeUtilitySamples(int m) {
137	double[] utilitySamplesArray = new double[m];
138	{
139	for (int i = 0; i < utilitySamplesArray.length; i++) {
140	utilitySamplesArray[i] = 1.0 - ((double) i + 0.5) / ((double) m + 1.0);
141	}
142	}
143	utilitySamples = new Matrix(utilitySamplesArray,
144	utilitySamplesArray.length);
145	}
146
147	/**
148	* Create a 1-by-n matrix of time samples.
149	*
150	* @param n
151	* The sample size.
152	*/
153	private void makeTimeSamples(int n) {
154	double[] timeSamplesArray = new double[n + 1];
155	{
156	for (int i = 0; i < timeSamplesArray.length; i++) {
157	timeSamplesArray[i] = ((double) i) / ((double) n);
158	}
159	}
160	timeSamples = new Matrix(timeSamplesArray, 1);
161	}
162
163	/*
164	* (non-Javadoc)
165	*
166	* @see agents.southampton.SouthamptonAgent#proposeInitialBid()
167	*/
168	@Override
169	protected Bid proposeInitialBid() throws Exception {
170	return utilitySpace.getMaxUtilityBid();
171	}
172
173	/*
174	* (non-Javadoc)
175	*
176	* @see agents.southampton.SouthamptonAgent#proposeNextBid(negotiator.Bid)
177	*/
178	@Override
179	protected Bid proposeNextBid(Bid opponentBid) throws Exception {
180	double opponentUtility = utilitySpace.getUtility(opponentBid);
181
182	if(opponentUtility > maxUtility)
183	{
184	bestReceivedBid = opponentBid;
185	maxUtility = opponentUtility;
186	}
187
188	log("Opponent utility is " + opponentUtility);
189
190	double targetUtility = getTarget(opponentUtility, getTime());
191
192	log("Target utility is " + targetUtility);
193
194	if(targetUtility <= maxUtility && previousTargetUtility > maxUtility)
195	return bestReceivedBid;
196	previousTargetUtility = targetUtility;
197
198	flushLog();
199
200	// Now get a random bid in the range targetUtility � 0.025
201	return bidCreator.getBid(utilitySpace, targetUtility - 0.025,
202	targetUtility + 0.025);
203	}
204
205	/**
206	* Get the target at a given time, recording the opponent's utility.
207	*
208	* @param opponentUtility
209	* The utility of the most recent offer made by the opponent.
210	* @param time
211	* The current time.
212	* @return the target.
213	*/
214	protected double getTarget(double opponentUtility, double time) {
215	log("++>>> IAMhaggler 2011 <<<++");
216
217	log("getTarget: " + opponentUtility);
218
219	maxOfferedUtility = Math.max(maxOfferedUtility, opponentUtility);
220	minOfferedUtility = Math.min(minOfferedUtility, opponentUtility);
221
222	// Calculate the current time slot
223	int timeSlot = (int) Math.floor(time * 36);
224
225	boolean regressionUpdateRequired = false;
226	if (lastTimeSlot == -1) {
227	regressionUpdateRequired = true;
228	}
229
230	// If the time slot has changed
231	if (timeSlot != lastTimeSlot) {
232	if (lastTimeSlot != -1) {
233	// Store the data from the time slot
234	opponentTimes.add((lastTimeSlot + 0.5) / 36.0);
235	if(opponentUtilities.size() == 0)
236	{
237	intercept = Math.max(0.5, maxUtilityInTimeSlot);
238	double[] timeSamplesAdjust = new double[timeSamples.getColumnDimension()];
239	int i = 0;
240	double gradient = 0.9 - intercept;
241	for (double d : timeSamples.getRowPackedCopy()) {
242	timeSamplesAdjust[i++] = intercept + (gradient * d);
243	}
244	matrixTimeSamplesAdjust = new Matrix(timeSamplesAdjust, timeSamplesAdjust.length);
245	}
246	opponentUtilities.add(maxUtilityInTimeSlot);
247	// Flag regression receiveMessage required
248	regressionUpdateRequired = true;
249	}
250	// Update the time slot
251	lastTimeSlot = timeSlot;
252	// Reset the max utility
253	maxUtilityInTimeSlot = 0;
254	}
255
256	log("intercept: " + intercept);
257
258	// Calculate the maximum utility observed in the current time slot
259	maxUtilityInTimeSlot = Math.max(maxUtilityInTimeSlot, opponentUtility);
260
261	if (timeSlot == 0) {
262	return 1.0 - time / 2.0;
263	}
264
265	if (regressionUpdateRequired) {
266	double gradient = 0.9 - intercept;
267	/*
268	double[] x = new double[opponentTimes.size()];
269	double[] yAdjust = new double[opponentTimes.size()];
270	double[] y = new double[opponentUtilities.size()];
271
272	int i;
273	i = 0;
274	for (double d : opponentTimes) {
275	x[i++] = d;
276	}
277	i = 0;
278	for (double d : opponentTimes) {
279	yAdjust[i++] = intercept + (gradient * d);
280	}
281	i = 0;
282	for (double d : opponentUtilities) {
283	y[i++] = d;
284	}
285
286	Matrix matrixX = new Matrix(x, x.length);
287	Matrix matrixYAdjust = new Matrix(yAdjust, yAdjust.length);
288	Matrix matrixY = new Matrix(y, y.length);
289
290	matrixY.minusEquals(matrixYAdjust);
291
292	//GaussianProcessMixture predictor = regression.calculateRegression(matrixX, matrixY);
293	*/
294
295	GaussianProcessMixture predictor;
296
297	if(lastTimeSlot == -1)
298	{
299	predictor = regression.calculateRegression(new double[] {}, new double[] {});
300	}
301	else
302	{
303	double x;
304	double y;
305	try {
306	x = opponentTimes.get(opponentTimes.size() - 1);
307	y = opponentUtilities.get(opponentUtilities.size() - 1);
308	} catch(Exception ex) {
309	System.err.println("Error getting x or y. Aiming for previous target utility of " + previousTargetUtility);
310	return previousTargetUtility;
311	// throw new Error(ex);
312	}
313
314	predictor = regression.updateRegression(
315	new Matrix(new double[] {x}, 1),
316	new Matrix(new double[] {y - intercept - (gradient * x)}, 1));
317	}
318
319	GaussianProcessMixturePrediction prediction = predictor
320	.calculatePrediction(timeSamples.transpose());
321
322	// Store the means and variances
323	means = prediction.getMean().plus(matrixTimeSamplesAdjust);
324	variances = prediction.getVariance();
325
326	log(means.transpose());
327	log(variances.transpose());
328	}
329
330	Pair<Matrix, Matrix> acceptMatrices = generateProbabilityAccept(means, variances,
331	time);
332	Matrix probabilityAccept = acceptMatrices.fst;
333	Matrix cumulativeAccept = acceptMatrices.snd;
334
335	Matrix probabilityExpectedUtility = probabilityAccept.arrayTimes(utility);
336	Matrix cumulativeExpectedUtility = cumulativeAccept.arrayTimes(utility);
337
338	if(regressionUpdateRequired) {
339	log(probabilityAccept);
340	log(cumulativeAccept);
341	log(probabilityExpectedUtility);
342	log(cumulativeExpectedUtility);
343	}
344
345	Pair<Double, Double> bestAgreement = getExpectedBestAgreement(
346	probabilityExpectedUtility, cumulativeExpectedUtility, time);
347	double bestTime = bestAgreement.fst;
348	double bestUtility = bestAgreement.snd;
349
350	double targetUtility = lastRegressionUtility
351	+ ((time - lastRegressionTime)
352	* (bestUtility - lastRegressionUtility) / (bestTime - lastRegressionTime));
353
354	log(time + "," + bestTime + "," + bestUtility + "," + lastRegressionTime + "," + lastRegressionUtility + "," + targetUtility);
355
356	// Store the target utility and time
357	lastRegressionUtility = targetUtility;
358	lastRegressionTime = time;
359
360	log("-->>> IAMhaggler 2011 <<<--");
361
362	return limitConcession(targetUtility);
363	}
364
365	private double limitConcession(double targetUtility) {
366	double limit = 1.0 - ((maxOfferedUtility - minOfferedUtility) + 0.1);
367	if(limit > targetUtility)
368	{
369	log("Limiting concession to " + limit);
370	return limit;
371	}
372	return targetUtility;
373	}
374
375	/**
376	* Generate an n-by-m matrix representing the effect of the discounting
377	* factor for a given utility-time combination. The combinations are given
378	* by the time and utility samples stored in timeSamples and utilitySamples
379	* respectively.
380	*
381	* @param discountingFactor
382	* The discounting factor, in the range (0, 1].
383	* @return An n-by-m matrix representing the discounted utilities.
384	*/
385	private Matrix generateDiscountingFunction(double discountingFactor) {
386	double[] discountingSamples = timeSamples.getRowPackedCopy();
387	double[][] m = new double[utilitySamples.getRowDimension()][timeSamples
388	.getColumnDimension()];
389	for (int i = 0; i < m.length; i++) {
390	for (int j = 0; j < m[i].length; j++) {
391	m[i][j] = Math.pow(discountingFactor, discountingSamples[j]);
392	}
393	}
394	return new Matrix(m);
395	}
396
397	/**
398	* Generate an (n-1)-by-m matrix representing the probability of acceptance for
399	* a given utility-time combination. The combinations are given by the time
400	* and utility samples stored in timeSamples and utilitySamples
401	* respectively.
402	*
403	* @param mean
404	* The means, at each of the sample time points.
405	* @param variance
406	* The variances, at each of the sample time points.
407	* @param time
408	* The current time, in the range [0, 1].
409	* @return An (n-1)-by-m matrix representing the probability of acceptance.
410	*/
411	private Pair<Matrix, Matrix> generateProbabilityAccept(Matrix mean, Matrix variance,
412	double time) {
413	int i = 0;
414	for (; i < timeSamples.getColumnDimension(); i++) {
415	if (timeSamples.get(0, i) > time)
416	break;
417	}
418	Matrix cumulativeAccept = new Matrix(utilitySamples.getRowDimension(),
419	timeSamples.getColumnDimension(), 0);
420	Matrix probabilityAccept = new Matrix(utilitySamples.getRowDimension(),
421	timeSamples.getColumnDimension(), 0);
422
423	double interval = 1.0/utilitySamples.getRowDimension();
424
425	for (; i < timeSamples.getColumnDimension(); i++) {
426	double s = Math.sqrt(2 * variance.get(i, 0));
427	double m = mean.get(i, 0);
428
429	double minp = (1.0 - (0.5 * (1 + erf((utilitySamples.get(0, 0) + (interval/2.0) - m)
430	/ s))));
431	double maxp = (1.0 - (0.5 * (1 + erf((utilitySamples.get(utilitySamples.getRowDimension()-1, 0) - (interval/2.0) - m)
432	/ s))));
433
434	for (int j = 0; j < utilitySamples.getRowDimension(); j++) {
435	double utility = utilitySamples.get(j, 0);
436	double p = (1.0 - (0.5 * (1 + erf((utility - m)
437	/ s))));
438	double p1 = (1.0 - (0.5 * (1 + erf((utility - (interval/2.0) - m)
439	/ s))));
440	double p2 = (1.0 - (0.5 * (1 + erf((utility + (interval/2.0) - m)
441	/ s))));
442
443	cumulativeAccept.set(j, i, (p-minp)/(maxp-minp));
444	probabilityAccept.set(j, i, (p1-p2)/(maxp-minp));
445	}
446	}
447	return new Pair<Matrix, Matrix>(probabilityAccept, cumulativeAccept);
448	}
449
450	/**
451	* Wrapper for the erf function.
452	*
453	* @param x
454	* @return
455	*/
456	private double erf(double x) {
457	if (x > 6)
458	return 1;
459	if (x < -6)
460	return -1;
461	try {
462	double d = Erf.erf(x);
463	if (d > 1)
464	return 1;
465	if (d < -1)
466	return -1;
467	return d;
468	} catch (MaxIterationsExceededException e) {
469	if (x > 0)
470	return 1;
471	else
472	return -1;
473	} catch (MathException e) {
474	e.printStackTrace();
475	return 0;
476	}
477	}
478
479	/**
480	* Generate an n-by-m matrix representing the risk based utility for a given
481	* utility-time combination. The combinations are given by the time and
482	* utility samples stored in timeSamples and utilitySamples
483	*
484	* @param riskParameter
485	* The risk parameter.
486	* @return an n-by-m matrix representing the risk based utility.
487	*/
488	protected Matrix generateRiskFunction(double riskParameter) {
489	double mmin = generateRiskFunction(riskParameter, 0.0);
490	double mmax = generateRiskFunction(riskParameter, 1.0);
491	double range = mmax - mmin;
492
493	double[] riskSamples = utilitySamples.getColumnPackedCopy();
494	double[][] m = new double[utilitySamples.getRowDimension()][timeSamples
495	.getColumnDimension()];
496	for (int i = 0; i < m.length; i++) {
497	double val;
498	if (range == 0) {
499	val = riskSamples[i];
500	} else {
501	val = (generateRiskFunction(riskParameter, riskSamples[i]) - mmin)
502	/ range;
503	}
504	for (int j = 0; j < m[i].length; j++) {
505	m[i][j] = val;
506	}
507	}
508	return new Matrix(m);
509	}
510
511	/**
512	* Generate the risk based utility for a given actual utility.
513	*
514	* @param riskParameter
515	* The risk parameter.
516	* @param utility
517	* The actual utility to calculate the risk based utility from.
518	* @return the risk based utility.
519	*/
520	protected double generateRiskFunction(double riskParameter, double utility) {
521	return Math.pow(utility, riskParameter);
522	}
523
524	/**
525	* Get a pair representing the time and utility value of the expected best
526	* agreement.
527	*
528	* @param expectedValues
529	* A matrix of expected utility values at the sampled time and
530	* utilities given by timeSamples and utilitySamples
531	* respectively.
532	* @param time
533	* The current time.
534	* @return a pair representing the time and utility value of the expected
535	* best agreement.
536	*/
537	private Pair<Double, Double> getExpectedBestAgreement(
538	Matrix probabilityExpectedValues, Matrix cumulativeExpectedValues, double time) {
539	log("probabilityExpectedValues is " + probabilityExpectedValues.getRowDimension() + "x" + probabilityExpectedValues.getColumnDimension());
540	log("time is " + time);
541	Matrix probabilityFutureExpectedValues = getFutureExpectedValues(probabilityExpectedValues, time);
542	Matrix cumulativeFutureExpectedValues = getFutureExpectedValues(cumulativeExpectedValues, time);
543
544	log("probabilityFutureExpectedValues is " + probabilityFutureExpectedValues.getRowDimension() + "x" + probabilityFutureExpectedValues.getColumnDimension());
545
546	double[][] probabilityFutureExpectedValuesArray = probabilityFutureExpectedValues.getArray();
547	double[][] cumulativeFutureExpectedValuesArray = cumulativeFutureExpectedValues.getArray();
548
549	Double bestX = null;
550	Double bestY = null;
551
552	double[] colSums = new double[probabilityFutureExpectedValuesArray[0].length];
553	double bestColSum = 0;
554	int bestCol = 0;
555
556	for (int x = 0; x < probabilityFutureExpectedValuesArray[0].length; x++) {
557	colSums[x] = 0;
558	for (int y = 0; y < probabilityFutureExpectedValuesArray.length; y++) {
559	colSums[x] += probabilityFutureExpectedValuesArray[y][x];
560	}
561
562	if (colSums[x] >= bestColSum) {
563	bestColSum = colSums[x];
564	bestCol = x;
565	}
566	}
567
568	log(new Matrix(colSums, 1));
569
570	int bestRow = 0;
571	double bestRowValue = 0;
572
573	for (int y = 0; y < cumulativeFutureExpectedValuesArray.length; y++) {
574	double expectedValue = cumulativeFutureExpectedValuesArray[y][bestCol];
575	if(expectedValue > bestRowValue) {
576	bestRowValue = expectedValue;
577	bestRow = y;
578	}
579	}
580
581	bestX = timeSamples.get(0, bestCol
582	+ probabilityExpectedValues.getColumnDimension()
583	- probabilityFutureExpectedValues.getColumnDimension());
584	bestY = utilitySamples.get(bestRow, 0);
585
586	log("About to return the best agreement at " + bestX + ", " + bestY);
587	return new Pair<Double, Double>(bestX, bestY);
588	}
589
590	/**
591	* Get a matrix of expected utility values at the sampled time and utilities
592	* given by timeSamples and utilitySamples, for times in the future.
593	*
594	* @param expectedValues
595	* A matrix of expected utility values at the sampled time and
596	* utilities given by timeSamples and utilitySamples
597	* respectively.
598	* @param time
599	* The current time.
600	* @return a matrix of expected utility values for future time.
601	*/
602	private Matrix getFutureExpectedValues(Matrix expectedValues, double time) {
603	int i = 0;
604	for (; i < timeSamples.getColumnDimension(); i++) {
605	if (timeSamples.get(0, i) > time)
606	break;
607	}
608	return expectedValues.getMatrix(0,
609	expectedValues.getRowDimension() - 1, i, expectedValues
610	.getColumnDimension() - 1);
611	}
612	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: src/main/java/agents/anac/y2012/IAMhaggler2012/agents2011/IAMhaggler2011.java

Download in other formats: