Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

LearnedData.py@ 77

Last change on this file since 77 was 75, checked in by wouter, 2 years ago
#6 added ANAC2022 parties
File size: 9.0 KB

Rev	Line
[75]	1	import math
	2	from math import sqrt
	3
	4	from .NegotiationData import NegotiationData
	5
	6
	7	class LearnedData:
	8	"""This class hold the learned data of our agent.
	9	"""
	10
	11	__tSplit: int = 40
	12	__tPhase: float = 0.2
	13	__newWeight: float = 0.3
	14	__newWeightForReject: float = 0.3
	15	__smoothWidth: int = 3 # from each side of the element
	16	__smoothWidthForReject: int = 3 # from each side of the element
	17	__opponentDecrease: float = 0.65
	18	__defualtAlpha: float = 10.7
	19
	20	def __init__(self):
	21
	22	self.__opponentName: str = None
	23	# average utility of agreement
	24	self.__avgUtility: float = 0.0
	25	# num of negotiations against this opponent
	26	self.__numEncounters: int = 0
	27	self.__avgMaxUtilityOpponent: float = 0.0
	28
	29	# our new data structures
	30	self.__stdUtility: float = 0.0
	31	self.__negoResults: list = []
	32	self.__avgOpponentUtility: float = 0.0
	33	self.__opponentAlpha: float = 0.0
	34	self.__opponentUtilByTime: list = []
	35	self.__opponentMaxReject: list = [0.0] * self.__tSplit
	36
	37	def encode(self, paramList: list):
	38	""" This function get deserialize json
	39	"""
	40	self.__opponentName = paramList[0]
	41	self.__avgUtility = paramList[1]
	42	self.__numEncounters = paramList[2]
	43	self.__avgMaxUtilityOpponent = paramList[3]
	44	self.__stdUtility = paramList[4]
	45	self.__negoResults = paramList[5]
	46	self.__avgOpponentUtility = paramList[6]
	47	self.__opponentAlpha = paramList[7]
	48	self.__opponentUtilByTime = paramList[8]
	49	self.__opponentMaxReject = paramList[9]
	50
	51	def update(self, negotiationData: NegotiationData):
	52	""" Update the learned data with a negotiation data of a previous negotiation
	53	session
	54	negotiationData NegotiationData class holding the negotiation data
	55	that is obtain during a negotiation session.
	56	"""
	57	# Keep track of the average utility that we obtained Double
	58	newUtil = negotiationData.getAgreementUtil() if (negotiationData.getAgreementUtil() > 0) \
	59	else self.__avgUtility - 1.1 * pow(self.__stdUtility, 2)
	60
	61	self.__avgUtility = (self.__avgUtility * self.__numEncounters + newUtil) \
	62	/ (self.__numEncounters + 1)
	63
	64	# add utility to UtiList calculate std deviation of results
	65	self.__negoResults.append(negotiationData.getAgreementUtil())
	66	self.__stdUtility = 0.0
	67
	68	for util in self.__negoResults:
	69	self.__stdUtility += pow(util - self.__avgUtility, 2)
	70	self.__stdUtility = sqrt(self.__stdUtility / (self.__numEncounters + 1))
	71
	72	# Track the average value of the maximum that an opponent has offered us across
	73	# multiple negotiation sessions Double
	74	self.__avgMaxUtilityOpponent = (
	75	self.__avgMaxUtilityOpponent * self.__numEncounters + negotiationData.getMaxReceivedUtil()) \
	76	/ (self.__numEncounters + 1)
	77
	78	self.__avgOpponentUtility = (
	79	self.__avgOpponentUtility * self.__numEncounters + negotiationData.getOpponentUtil()) \
	80	/ (self.__numEncounters + 1)
	81
	82	# update opponent utility over time
	83	opponentTimeUtil: list = [0.0] * self.__tSplit if self.__opponentUtilByTime == [] else self.__opponentUtilByTime
	84	# update opponent reject over time
	85	opponentMaxReject: list = [0.0] * self.__tSplit if self.__opponentMaxReject == [] else self.__opponentMaxReject
	86
	87	# update values in the array
	88	newUtilData: list = negotiationData.getOpponentUtilByTime()
	89	newOpponentMaxReject = negotiationData.getOpponentMaxReject()
	90
	91	if self.__numEncounters == 0:
	92	self.__opponentUtilByTime = newUtilData
	93	self.__opponentMaxReject = newOpponentMaxReject
	94
	95	else:
	96	# find the ratio of decrease in the array, for updating 0 - s in the array
	97	ratio: float = ((1 - self.__newWeight) * opponentTimeUtil[0] + self.__newWeight * newUtilData[0]) / \
	98	opponentTimeUtil[0] \
	99	if opponentTimeUtil[0] > 0.0 else 1
	100
	101	# update the array
	102	for i in range(self.__tSplit):
	103	if (newUtilData[i] > 0):
	104	opponentTimeUtil[i] = (
	105	(1 - self.__newWeight) * opponentTimeUtil[i] + self.__newWeight * newUtilData[i])
	106	else:
	107	opponentTimeUtil[i] *= ratio
	108
	109	self.__opponentUtilByTime = opponentTimeUtil
	110
	111	# find the ratio of decrease in the array, for updating 0 - s in the array
	112	ratio: float = ((1 - self.__newWeightForReject) * opponentMaxReject[0] + self.__newWeightForReject *
	113	newOpponentMaxReject[0]) / \
	114	opponentMaxReject[0] \
	115	if opponentMaxReject[0] > 0.0 else 1
	116
	117	# update the array
	118	for i in range(self.__tSplit):
	119	if (newOpponentMaxReject[i] > 0):
	120	opponentMaxReject[i] = (
	121	(1 - self.__newWeightForReject) * opponentMaxReject[i] + self.__newWeightForReject *
	122	newOpponentMaxReject[i])
	123	else:
	124	opponentMaxReject[i] *= ratio
	125
	126	self.__opponentMaxReject = opponentMaxReject
	127
	128	self.__opponentAlpha = self.calcAlpha()
	129
	130	# Keep track of the number of negotiations that we performed
	131	self.__numEncounters += 1
	132
	133	def calcAlpha(self):
	134	# smoothing with smooth width of smoothWidth
	135	alphaArray: list = self.getSmoothThresholdOverTime()
	136
	137	# find the last index with data in alphaArray
	138
	139	maxIndex: int = 0
	140	while maxIndex < self.__tSplit and alphaArray[maxIndex] > 0.2:
	141	maxIndex += 1
	142
	143	# find t, time that threshold decrease by 50 %
	144	maxValue: float = alphaArray[0]
	145	minValue: float = alphaArray[max(maxIndex - self.__smoothWidth - 1, 0)]
	146
	147	# if there is no clear trend-line, return default value
	148	if maxValue - minValue < 0.1:
	149	return self.__defualtAlpha
	150
	151	t: int = 0
	152	while t < maxIndex and alphaArray[t] > (maxValue - self.__opponentDecrease * (maxValue - minValue)):
	153	t += 1
	154
	155	calibratedPolynom: list = [572.83, -1186.7, 899.29, -284.68, 32.911]
	156	alpha: float = calibratedPolynom[0]
	157
	158	tTime: float = self.__tPhase + (1 - self.__tPhase) * (
	159	maxIndex * (float(t) / self.__tSplit) + (self.__tSplit - maxIndex) * 0.85) / self.__tSplit
	160	for i in range(1, len(calibratedPolynom)):
	161	alpha = alpha * tTime + calibratedPolynom[i]
	162
	163	return alpha
	164
	165	def getSmoothThresholdOverTime(self):
	166	# smoothing with smooth width of smoothWidth
	167	smoothedTimeUtil: list = [0.0] * self.__tSplit
	168
	169	# ignore zeros in end of the array
	170	tSplitWithoutZero = self.__tSplit - 1
	171	while self.__opponentUtilByTime[tSplitWithoutZero] == 0 and tSplitWithoutZero > 0:
	172	tSplitWithoutZero -= 1
	173	tSplitWithoutZero += 1
	174	for i in range(tSplitWithoutZero):
	175	for j in range(max(i - self.__smoothWidth, 0), min(i + self.__smoothWidth + 1, tSplitWithoutZero)):
	176	smoothedTimeUtil[i] += self.__opponentUtilByTime[j]
	177	smoothedTimeUtil[i] /= (min(i + self.__smoothWidth + 1, tSplitWithoutZero) - max(i - self.__smoothWidth, 0))
	178
	179	return smoothedTimeUtil
	180
	181	def getSmoothRejectOverTime(self):
	182	# smoothing with smooth width of smoothWidth
	183	smoothedRejectUtil: list = [0.0] * self.__tSplit
	184
	185	# ignore zeros in end of the array
	186	tSplitWithoutZero = self.__tSplit - 1
	187	while self.__opponentMaxReject[tSplitWithoutZero] == 0 and tSplitWithoutZero > 0:
	188	tSplitWithoutZero -= 1
	189	tSplitWithoutZero += 1
	190	for i in range(tSplitWithoutZero):
	191	for j in range(max(i - self.__smoothWidthForReject, 0),
	192	min(i + self.__smoothWidthForReject + 1, tSplitWithoutZero)):
	193	smoothedRejectUtil[i] += self.__opponentMaxReject[j]
	194	smoothedRejectUtil[i] /= (min(i + self.__smoothWidthForReject + 1, tSplitWithoutZero) - max(
	195	i - self.__smoothWidthForReject, 0))
	196
	197	return smoothedRejectUtil
	198
	199	def getAvgUtility(self):
	200	return self.__avgUtility
	201
	202	def getStdUtility(self):
	203	return self.__stdUtility
	204
	205	def getOpponentAlpha(self):
	206	return self.__opponentAlpha
	207
	208	def getOpUtility(self):
	209	return self.__avgOpponentUtility
	210
	211	def getAvgMaxUtility(self):
	212	return self.__avgMaxUtilityOpponent
	213
	214	def getOpponentEncounters(self):
	215	return self.__numEncounters
	216
	217	def setOpponentName(self, opponentName):
	218	self.__opponentName = opponentName

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ANL2022/compromising_agent/LearnedData.py@ 77

Download in other formats: