source: ANL2022/learning_agent/LearnedData.py

Last change on this file was 75, checked in by wouter, 21 months ago

#6 added ANAC2022 parties

File size: 9.0 KB
Line 
1import math
2from math import sqrt
3
4from .NegotiationData import NegotiationData
5
6
7class LearnedData:
8 """This class hold the learned data of our agent.
9 """
10
11 __tSplit: int = 40
12 __tPhase: float = 0.2
13 __newWeight: float = 0.3
14 __newWeightForReject: float = 0.3
15 __smoothWidth: int = 3 # from each side of the element
16 __smoothWidthForReject: int = 3 # from each side of the element
17 __opponentDecrease: float = 0.65
18 __defualtAlpha: float = 10.7
19
20 def __init__(self):
21
22 self.__opponentName: str = None
23 # average utility of agreement
24 self.__avgUtility: float = 0.0
25 # num of negotiations against this opponent
26 self.__numEncounters: int = 0
27 self.__avgMaxUtilityOpponent: float = 0.0
28
29 # our new data structures
30 self.__stdUtility: float = 0.0
31 self.__negoResults: list = []
32 self.__avgOpponentUtility: float = 0.0
33 self.__opponentAlpha: float = 0.0
34 self.__opponentUtilByTime: list = []
35 self.__opponentMaxReject: list = [0.0] * self.__tSplit
36
37 def encode(self, paramList: list):
38 """ This function get deserialize json
39 """
40 self.__opponentName = paramList[0]
41 self.__avgUtility = paramList[1]
42 self.__numEncounters = paramList[2]
43 self.__avgMaxUtilityOpponent = paramList[3]
44 self.__stdUtility = paramList[4]
45 self.__negoResults = paramList[5]
46 self.__avgOpponentUtility = paramList[6]
47 self.__opponentAlpha = paramList[7]
48 self.__opponentUtilByTime = paramList[8]
49 self.__opponentMaxReject = paramList[9]
50
51 def update(self, negotiationData: NegotiationData):
52 """ Update the learned data with a negotiation data of a previous negotiation
53 session
54 negotiationData NegotiationData class holding the negotiation data
55 that is obtain during a negotiation session.
56 """
57 # Keep track of the average utility that we obtained Double
58 newUtil = negotiationData.getAgreementUtil() if (negotiationData.getAgreementUtil() > 0) \
59 else self.__avgUtility - 1.1 * pow(self.__stdUtility, 2)
60
61 self.__avgUtility = (self.__avgUtility * self.__numEncounters + newUtil) \
62 / (self.__numEncounters + 1)
63
64 # add utility to UtiList calculate std deviation of results
65 self.__negoResults.append(negotiationData.getAgreementUtil())
66 self.__stdUtility = 0.0
67
68 for util in self.__negoResults:
69 self.__stdUtility += pow(util - self.__avgUtility, 2)
70 self.__stdUtility = sqrt(self.__stdUtility / (self.__numEncounters + 1))
71
72 # Track the average value of the maximum that an opponent has offered us across
73 # multiple negotiation sessions Double
74 self.__avgMaxUtilityOpponent = (
75 self.__avgMaxUtilityOpponent * self.__numEncounters + negotiationData.getMaxReceivedUtil()) \
76 / (self.__numEncounters + 1)
77
78 self.__avgOpponentUtility = (
79 self.__avgOpponentUtility * self.__numEncounters + negotiationData.getOpponentUtil()) \
80 / (self.__numEncounters + 1)
81
82 # update opponent utility over time
83 opponentTimeUtil: list = [0.0] * self.__tSplit if self.__opponentUtilByTime == [] else self.__opponentUtilByTime
84 # update opponent reject over time
85 opponentMaxReject: list = [0.0] * self.__tSplit if self.__opponentMaxReject == [] else self.__opponentMaxReject
86
87 # update values in the array
88 newUtilData: list = negotiationData.getOpponentUtilByTime()
89 newOpponentMaxReject = negotiationData.getOpponentMaxReject()
90
91 if self.__numEncounters == 0:
92 self.__opponentUtilByTime = newUtilData
93 self.__opponentMaxReject = newOpponentMaxReject
94
95 else:
96 # find the ratio of decrease in the array, for updating 0 - s in the array
97 ratio: float = ((1 - self.__newWeight) * opponentTimeUtil[0] + self.__newWeight * newUtilData[0]) / \
98 opponentTimeUtil[0] \
99 if opponentTimeUtil[0] > 0.0 else 1
100
101 # update the array
102 for i in range(self.__tSplit):
103 if (newUtilData[i] > 0):
104 opponentTimeUtil[i] = (
105 (1 - self.__newWeight) * opponentTimeUtil[i] + self.__newWeight * newUtilData[i])
106 else:
107 opponentTimeUtil[i] *= ratio
108
109 self.__opponentUtilByTime = opponentTimeUtil
110
111 # find the ratio of decrease in the array, for updating 0 - s in the array
112 ratio: float = ((1 - self.__newWeightForReject) * opponentMaxReject[0] + self.__newWeightForReject *
113 newOpponentMaxReject[0]) / \
114 opponentMaxReject[0] \
115 if opponentMaxReject[0] > 0.0 else 1
116
117 # update the array
118 for i in range(self.__tSplit):
119 if (newOpponentMaxReject[i] > 0):
120 opponentMaxReject[i] = (
121 (1 - self.__newWeightForReject) * opponentMaxReject[i] + self.__newWeightForReject *
122 newOpponentMaxReject[i])
123 else:
124 opponentMaxReject[i] *= ratio
125
126 self.__opponentMaxReject = opponentMaxReject
127
128 self.__opponentAlpha = self.calcAlpha()
129
130 # Keep track of the number of negotiations that we performed
131 self.__numEncounters += 1
132
133 def calcAlpha(self):
134 # smoothing with smooth width of smoothWidth
135 alphaArray: list = self.getSmoothThresholdOverTime()
136
137 # find the last index with data in alphaArray
138
139 maxIndex: int = 0
140 while maxIndex < self.__tSplit and alphaArray[maxIndex] > 0.2:
141 maxIndex += 1
142
143 # find t, time that threshold decrease by 50 %
144 maxValue: float = alphaArray[0]
145 minValue: float = alphaArray[max(maxIndex - self.__smoothWidth - 1, 0)]
146
147 # if there is no clear trend-line, return default value
148 if maxValue - minValue < 0.1:
149 return self.__defualtAlpha
150
151 t: int = 0
152 while t < maxIndex and alphaArray[t] > (maxValue - self.__opponentDecrease * (maxValue - minValue)):
153 t += 1
154
155 calibratedPolynom: list = [572.83, -1186.7, 899.29, -284.68, 32.911]
156 alpha: float = calibratedPolynom[0]
157
158 tTime: float = self.__tPhase + (1 - self.__tPhase) * (
159 maxIndex * (float(t) / self.__tSplit) + (self.__tSplit - maxIndex) * 0.85) / self.__tSplit
160 for i in range(1, len(calibratedPolynom)):
161 alpha = alpha * tTime + calibratedPolynom[i]
162
163 return alpha
164
165 def getSmoothThresholdOverTime(self):
166 # smoothing with smooth width of smoothWidth
167 smoothedTimeUtil: list = [0.0] * self.__tSplit
168
169 # ignore zeros in end of the array
170 tSplitWithoutZero = self.__tSplit - 1
171 while self.__opponentUtilByTime[tSplitWithoutZero] == 0 and tSplitWithoutZero > 0:
172 tSplitWithoutZero -= 1
173 tSplitWithoutZero += 1
174 for i in range(tSplitWithoutZero):
175 for j in range(max(i - self.__smoothWidth, 0), min(i + self.__smoothWidth + 1, tSplitWithoutZero)):
176 smoothedTimeUtil[i] += self.__opponentUtilByTime[j]
177 smoothedTimeUtil[i] /= (min(i + self.__smoothWidth + 1, tSplitWithoutZero) - max(i - self.__smoothWidth, 0))
178
179 return smoothedTimeUtil
180
181 def getSmoothRejectOverTime(self):
182 # smoothing with smooth width of smoothWidth
183 smoothedRejectUtil: list = [0.0] * self.__tSplit
184
185 # ignore zeros in end of the array
186 tSplitWithoutZero = self.__tSplit - 1
187 while self.__opponentMaxReject[tSplitWithoutZero] == 0 and tSplitWithoutZero > 0:
188 tSplitWithoutZero -= 1
189 tSplitWithoutZero += 1
190 for i in range(tSplitWithoutZero):
191 for j in range(max(i - self.__smoothWidthForReject, 0),
192 min(i + self.__smoothWidthForReject + 1, tSplitWithoutZero)):
193 smoothedRejectUtil[i] += self.__opponentMaxReject[j]
194 smoothedRejectUtil[i] /= (min(i + self.__smoothWidthForReject + 1, tSplitWithoutZero) - max(
195 i - self.__smoothWidthForReject, 0))
196
197 return smoothedRejectUtil
198
199 def getAvgUtility(self):
200 return self.__avgUtility
201
202 def getStdUtility(self):
203 return self.__stdUtility
204
205 def getOpponentAlpha(self):
206 return self.__opponentAlpha
207
208 def getOpUtility(self):
209 return self.__avgOpponentUtility
210
211 def getAvgMaxUtility(self):
212 return self.__avgMaxUtilityOpponent
213
214 def getOpponentEncounters(self):
215 return self.__numEncounters
216
217 def setOpponentName(self, opponentName):
218 self.__opponentName = opponentName
Note: See TracBrowser for help on using the repository browser.