You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

615 lines
16 KiB

  1. // Copyright 2017 Tendermint. All rights reserved.
  2. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
  3. package trust
  4. import (
  5. "encoding/json"
  6. "math"
  7. "sync"
  8. "time"
  9. cmn "github.com/tendermint/tmlibs/common"
  10. dbm "github.com/tendermint/tmlibs/db"
  11. )
  12. const defaultStorePeriodicSaveInterval = 1 * time.Minute
  13. // TrustMetricStore - Manages all trust metrics for peers
  14. type TrustMetricStore struct {
  15. cmn.BaseService
  16. // Maps a Peer.Key to that peer's TrustMetric
  17. peerMetrics map[string]*TrustMetric
  18. // Mutex that protects the map and history data file
  19. mtx sync.Mutex
  20. // The db where peer trust metric history data will be stored
  21. db dbm.DB
  22. // This configuration will be used when creating new TrustMetrics
  23. config TrustMetricConfig
  24. }
  25. // NewTrustMetricStore returns a store that saves data to the DB
  26. // and uses the config when creating new trust metrics
  27. func NewTrustMetricStore(db dbm.DB, tmc TrustMetricConfig) *TrustMetricStore {
  28. tms := &TrustMetricStore{
  29. peerMetrics: make(map[string]*TrustMetric),
  30. db: db,
  31. config: tmc,
  32. }
  33. tms.BaseService = *cmn.NewBaseService(nil, "TrustMetricStore", tms)
  34. return tms
  35. }
  36. // OnStart implements Service
  37. func (tms *TrustMetricStore) OnStart() error {
  38. tms.BaseService.OnStart()
  39. tms.mtx.Lock()
  40. defer tms.mtx.Unlock()
  41. tms.loadFromDB()
  42. go tms.saveRoutine()
  43. return nil
  44. }
  45. // OnStop implements Service
  46. func (tms *TrustMetricStore) OnStop() {
  47. tms.BaseService.OnStop()
  48. tms.mtx.Lock()
  49. defer tms.mtx.Unlock()
  50. // Stop all trust metric go-routines
  51. for _, tm := range tms.peerMetrics {
  52. tm.Stop()
  53. }
  54. // Make the final trust history data save
  55. tms.saveToDB()
  56. }
  57. // Size returns the number of entries in the trust metric store
  58. func (tms *TrustMetricStore) Size() int {
  59. tms.mtx.Lock()
  60. defer tms.mtx.Unlock()
  61. return tms.size()
  62. }
  63. // GetPeerTrustMetric returns a trust metric by peer key
  64. func (tms *TrustMetricStore) GetPeerTrustMetric(key string) *TrustMetric {
  65. tms.mtx.Lock()
  66. defer tms.mtx.Unlock()
  67. tm, ok := tms.peerMetrics[key]
  68. if !ok {
  69. // If the metric is not available, we will create it
  70. tm = NewMetricWithConfig(tms.config)
  71. // The metric needs to be in the map
  72. tms.peerMetrics[key] = tm
  73. }
  74. return tm
  75. }
  76. // PeerDisconnected pauses the trust metric associated with the peer identified by the key
  77. func (tms *TrustMetricStore) PeerDisconnected(key string) {
  78. tms.mtx.Lock()
  79. defer tms.mtx.Unlock()
  80. // If the Peer that disconnected has a metric, pause it
  81. if tm, ok := tms.peerMetrics[key]; ok {
  82. tm.Pause()
  83. }
  84. }
  85. // Saves the history data for all peers to the store DB.
  86. // This public method acquires the trust metric store lock
  87. func (tms *TrustMetricStore) SaveToDB() {
  88. tms.mtx.Lock()
  89. defer tms.mtx.Unlock()
  90. tms.saveToDB()
  91. }
  92. /* Private methods */
  93. // size returns the number of entries in the store without acquiring the mutex
  94. func (tms *TrustMetricStore) size() int {
  95. return len(tms.peerMetrics)
  96. }
  97. /* Loading & Saving */
  98. /* Both loadFromDB and savetoDB assume the mutex has been acquired */
  99. var trustMetricKey = []byte("trustMetricStore")
  100. // Loads the history data for all peers from the store DB
  101. // cmn.Panics if file is corrupt
  102. func (tms *TrustMetricStore) loadFromDB() bool {
  103. // Obtain the history data we have so far
  104. bytes := tms.db.Get(trustMetricKey)
  105. if bytes == nil {
  106. return false
  107. }
  108. peers := make(map[string]MetricHistoryJSON, 0)
  109. err := json.Unmarshal(bytes, &peers)
  110. if err != nil {
  111. cmn.PanicCrisis(cmn.Fmt("Could not unmarshal Trust Metric Store DB data: %v", err))
  112. }
  113. // If history data exists in the file,
  114. // load it into trust metric
  115. for key, p := range peers {
  116. tm := NewMetricWithConfig(tms.config)
  117. tm.Init(p)
  118. // Load the peer trust metric into the store
  119. tms.peerMetrics[key] = tm
  120. }
  121. return true
  122. }
  123. // Saves the history data for all peers to the store DB
  124. func (tms *TrustMetricStore) saveToDB() {
  125. tms.Logger.Debug("Saving TrustHistory to DB", "size", tms.size())
  126. peers := make(map[string]MetricHistoryJSON, 0)
  127. for key, tm := range tms.peerMetrics {
  128. // Add an entry for the peer identified by key
  129. peers[key] = tm.HistoryJSON()
  130. }
  131. // Write all the data back to the DB
  132. bytes, err := json.Marshal(peers)
  133. if err != nil {
  134. tms.Logger.Error("Failed to encode the TrustHistory", "err", err)
  135. return
  136. }
  137. tms.db.SetSync(trustMetricKey, bytes)
  138. }
  139. // Periodically saves the trust history data to the DB
  140. func (tms *TrustMetricStore) saveRoutine() {
  141. t := time.NewTicker(defaultStorePeriodicSaveInterval)
  142. defer t.Stop()
  143. loop:
  144. for {
  145. select {
  146. case <-t.C:
  147. tms.SaveToDB()
  148. case <-tms.Quit:
  149. break loop
  150. }
  151. }
  152. }
  153. //---------------------------------------------------------------------------------------
  154. const (
  155. // The weight applied to the derivative when current behavior is >= previous behavior
  156. defaultDerivativeGamma1 = 0
  157. // The weight applied to the derivative when current behavior is less than previous behavior
  158. defaultDerivativeGamma2 = 1.0
  159. // The weight applied to history data values when calculating the history value
  160. defaultHistoryDataWeight = 0.8
  161. )
  162. // TrustMetric - keeps track of peer reliability
  163. // See tendermint/docs/architecture/adr-006-trust-metric.md for details
  164. type TrustMetric struct {
  165. // Mutex that protects the metric from concurrent access
  166. mtx sync.Mutex
  167. // Determines the percentage given to current behavior
  168. proportionalWeight float64
  169. // Determines the percentage given to prior behavior
  170. integralWeight float64
  171. // Count of how many time intervals this metric has been tracking
  172. numIntervals int
  173. // Size of the time interval window for this trust metric
  174. maxIntervals int
  175. // The time duration for a single time interval
  176. intervalLen time.Duration
  177. // Stores the trust history data for this metric
  178. history []float64
  179. // Weights applied to the history data when calculating the history value
  180. historyWeights []float64
  181. // The sum of the history weights used when calculating the history value
  182. historyWeightSum float64
  183. // The current number of history data elements
  184. historySize int
  185. // The maximum number of history data elements
  186. historyMaxSize int
  187. // The calculated history value for the current time interval
  188. historyValue float64
  189. // The number of recorded good and bad events for the current time interval
  190. bad, good float64
  191. // While true, history data is not modified
  192. paused bool
  193. // Signal channel for stopping the trust metric go-routine
  194. stop chan struct{}
  195. }
  196. // MetricHistoryJSON - history data necessary to save the trust metric
  197. type MetricHistoryJSON struct {
  198. NumIntervals int `json:"intervals"`
  199. History []float64 `json:"history"`
  200. }
  201. // Returns a snapshot of the trust metric history data
  202. func (tm *TrustMetric) HistoryJSON() MetricHistoryJSON {
  203. tm.mtx.Lock()
  204. defer tm.mtx.Unlock()
  205. return MetricHistoryJSON{
  206. NumIntervals: tm.numIntervals,
  207. History: tm.history,
  208. }
  209. }
  210. // Instantiates a trust metric by loading the history data for a single peer.
  211. // This is called only once and only right after creation, which is why the
  212. // lock is not held while accessing the trust metric struct members
  213. func (tm *TrustMetric) Init(hist MetricHistoryJSON) {
  214. // Restore the number of time intervals we have previously tracked
  215. if hist.NumIntervals > tm.maxIntervals {
  216. hist.NumIntervals = tm.maxIntervals
  217. }
  218. tm.numIntervals = hist.NumIntervals
  219. // Restore the history and its current size
  220. if len(hist.History) > tm.historyMaxSize {
  221. // Keep the history no larger than historyMaxSize
  222. last := len(hist.History) - tm.historyMaxSize
  223. hist.History = hist.History[last:]
  224. }
  225. tm.history = hist.History
  226. tm.historySize = len(tm.history)
  227. // Create the history weight values and weight sum
  228. for i := 1; i <= tm.numIntervals; i++ {
  229. x := math.Pow(defaultHistoryDataWeight, float64(i)) // Optimistic weight
  230. tm.historyWeights = append(tm.historyWeights, x)
  231. }
  232. for _, v := range tm.historyWeights {
  233. tm.historyWeightSum += v
  234. }
  235. // Calculate the history value based on the loaded history data
  236. tm.historyValue = tm.calcHistoryValue()
  237. }
  238. // Pause tells the metric to pause recording data over time intervals.
  239. // All method calls that indicate events will unpause the metric
  240. func (tm *TrustMetric) Pause() {
  241. tm.mtx.Lock()
  242. defer tm.mtx.Unlock()
  243. // Pause the metric for now
  244. tm.paused = true
  245. }
  246. // Stop tells the metric to stop recording data over time intervals
  247. func (tm *TrustMetric) Stop() {
  248. tm.stop <- struct{}{}
  249. }
  250. // BadEvents indicates that an undesirable event(s) took place
  251. func (tm *TrustMetric) BadEvents(num int) {
  252. tm.mtx.Lock()
  253. defer tm.mtx.Unlock()
  254. tm.unpause()
  255. tm.bad += float64(num)
  256. }
  257. // GoodEvents indicates that a desirable event(s) took place
  258. func (tm *TrustMetric) GoodEvents(num int) {
  259. tm.mtx.Lock()
  260. defer tm.mtx.Unlock()
  261. tm.unpause()
  262. tm.good += float64(num)
  263. }
  264. // TrustValue gets the dependable trust value; always between 0 and 1
  265. func (tm *TrustMetric) TrustValue() float64 {
  266. tm.mtx.Lock()
  267. defer tm.mtx.Unlock()
  268. return tm.calcTrustValue()
  269. }
  270. // TrustScore gets a score based on the trust value always between 0 and 100
  271. func (tm *TrustMetric) TrustScore() int {
  272. score := tm.TrustValue() * 100
  273. return int(math.Floor(score))
  274. }
  275. // NextTimeInterval saves current time interval data and prepares for the following interval
  276. func (tm *TrustMetric) NextTimeInterval() {
  277. tm.mtx.Lock()
  278. defer tm.mtx.Unlock()
  279. if tm.paused {
  280. // Do not prepare for the next time interval while paused
  281. return
  282. }
  283. // Add the current trust value to the history data
  284. newHist := tm.calcTrustValue()
  285. tm.history = append(tm.history, newHist)
  286. // Update history and interval counters
  287. if tm.historySize < tm.historyMaxSize {
  288. tm.historySize++
  289. } else {
  290. // Keep the history no larger than historyMaxSize
  291. last := len(tm.history) - tm.historyMaxSize
  292. tm.history = tm.history[last:]
  293. }
  294. if tm.numIntervals < tm.maxIntervals {
  295. tm.numIntervals++
  296. // Add the optimistic weight for the new time interval
  297. wk := math.Pow(defaultHistoryDataWeight, float64(tm.numIntervals))
  298. tm.historyWeights = append(tm.historyWeights, wk)
  299. tm.historyWeightSum += wk
  300. }
  301. // Update the history data using Faded Memories
  302. tm.updateFadedMemory()
  303. // Calculate the history value for the upcoming time interval
  304. tm.historyValue = tm.calcHistoryValue()
  305. tm.good = 0
  306. tm.bad = 0
  307. }
  308. // Copy returns a new trust metric with members containing the same values
  309. func (tm *TrustMetric) Copy() *TrustMetric {
  310. if tm == nil {
  311. return nil
  312. }
  313. return &TrustMetric{
  314. proportionalWeight: tm.proportionalWeight,
  315. integralWeight: tm.integralWeight,
  316. numIntervals: tm.numIntervals,
  317. maxIntervals: tm.maxIntervals,
  318. intervalLen: tm.intervalLen,
  319. history: tm.history,
  320. historyWeights: tm.historyWeights,
  321. historyWeightSum: tm.historyWeightSum,
  322. historySize: tm.historySize,
  323. historyMaxSize: tm.historyMaxSize,
  324. historyValue: tm.historyValue,
  325. good: tm.good,
  326. bad: tm.bad,
  327. paused: tm.paused,
  328. stop: make(chan struct{}),
  329. }
  330. }
  331. // TrustMetricConfig - Configures the weight functions and time intervals for the metric
  332. type TrustMetricConfig struct {
  333. // Determines the percentage given to current behavior
  334. ProportionalWeight float64
  335. // Determines the percentage given to prior behavior
  336. IntegralWeight float64
  337. // The window of time that the trust metric will track events across.
  338. // This can be set to cover many days without issue
  339. TrackingWindow time.Duration
  340. // Each interval should be short for adapability.
  341. // Less than 30 seconds is too sensitive,
  342. // and greater than 5 minutes will make the metric numb
  343. IntervalLength time.Duration
  344. }
  345. // DefaultConfig returns a config with values that have been tested and produce desirable results
  346. func DefaultConfig() TrustMetricConfig {
  347. return TrustMetricConfig{
  348. ProportionalWeight: 0.4,
  349. IntegralWeight: 0.6,
  350. TrackingWindow: (time.Minute * 60 * 24) * 14, // 14 days.
  351. IntervalLength: 1 * time.Minute,
  352. }
  353. }
  354. // NewMetric returns a trust metric with the default configuration
  355. func NewMetric() *TrustMetric {
  356. return NewMetricWithConfig(DefaultConfig())
  357. }
  358. // NewMetricWithConfig returns a trust metric with a custom configuration
  359. func NewMetricWithConfig(tmc TrustMetricConfig) *TrustMetric {
  360. tm := new(TrustMetric)
  361. config := customConfig(tmc)
  362. // Setup using the configuration values
  363. tm.proportionalWeight = config.ProportionalWeight
  364. tm.integralWeight = config.IntegralWeight
  365. tm.intervalLen = config.IntervalLength
  366. // The maximum number of time intervals is the tracking window / interval length
  367. tm.maxIntervals = int(config.TrackingWindow / tm.intervalLen)
  368. // The history size will be determined by the maximum number of time intervals
  369. tm.historyMaxSize = intervalToHistoryOffset(tm.maxIntervals) + 1
  370. // This metric has a perfect history so far
  371. tm.historyValue = 1.0
  372. // Setup the stop channel
  373. tm.stop = make(chan struct{})
  374. go tm.processRequests()
  375. return tm
  376. }
  377. /* Private methods */
  378. // Ensures that all configuration elements have valid values
  379. func customConfig(tmc TrustMetricConfig) TrustMetricConfig {
  380. config := DefaultConfig()
  381. // Check the config for set values, and setup appropriately
  382. if tmc.ProportionalWeight > 0 {
  383. config.ProportionalWeight = tmc.ProportionalWeight
  384. }
  385. if tmc.IntegralWeight > 0 {
  386. config.IntegralWeight = tmc.IntegralWeight
  387. }
  388. if tmc.IntervalLength > time.Duration(0) {
  389. config.IntervalLength = tmc.IntervalLength
  390. }
  391. if tmc.TrackingWindow > time.Duration(0) &&
  392. tmc.TrackingWindow >= config.IntervalLength {
  393. config.TrackingWindow = tmc.TrackingWindow
  394. }
  395. return config
  396. }
  397. // Wakes the trust metric up if it is currently paused
  398. // This method needs to be called with the mutex locked
  399. func (tm *TrustMetric) unpause() {
  400. // Check if this is the first experience with
  401. // what we are tracking since being paused
  402. if tm.paused {
  403. tm.good = 0
  404. tm.bad = 0
  405. // New events cause us to unpause the metric
  406. tm.paused = false
  407. }
  408. }
  409. // Calculates the derivative component
  410. func (tm *TrustMetric) derivativeValue() float64 {
  411. return tm.proportionalValue() - tm.historyValue
  412. }
  413. // Strengthens the derivative component when the change is negative
  414. func (tm *TrustMetric) weightedDerivative() float64 {
  415. var weight float64 = defaultDerivativeGamma1
  416. d := tm.derivativeValue()
  417. if d < 0 {
  418. weight = defaultDerivativeGamma2
  419. }
  420. return weight * d
  421. }
  422. // Performs the update for our Faded Memories process, which allows the
  423. // trust metric tracking window to be large while maintaining a small
  424. // number of history data values
  425. func (tm *TrustMetric) updateFadedMemory() {
  426. if tm.historySize < 2 {
  427. return
  428. }
  429. end := tm.historySize - 1
  430. // Keep the most recent history element
  431. for count := 1; count < tm.historySize; count++ {
  432. i := end - count
  433. // The older the data is, the more we spread it out
  434. x := math.Pow(2, float64(count))
  435. // Two history data values are merged into a single value
  436. tm.history[i] = ((tm.history[i] * (x - 1)) + tm.history[i+1]) / x
  437. }
  438. }
  439. // Map the interval value down to an offset from the beginning of history
  440. func intervalToHistoryOffset(interval int) int {
  441. // The system maintains 2^m interval values in the form of m history
  442. // data values. Therefore, we access the ith interval by obtaining
  443. // the history data index = the floor of log2(i)
  444. return int(math.Floor(math.Log2(float64(interval))))
  445. }
  446. // Retrieves the actual history data value that represents the requested time interval
  447. func (tm *TrustMetric) fadedMemoryValue(interval int) float64 {
  448. first := tm.historySize - 1
  449. if interval == 0 {
  450. // Base case
  451. return tm.history[first]
  452. }
  453. offset := intervalToHistoryOffset(interval)
  454. return tm.history[first-offset]
  455. }
  456. // Calculates the integral (history) component of the trust value
  457. func (tm *TrustMetric) calcHistoryValue() float64 {
  458. var hv float64
  459. for i := 0; i < tm.numIntervals; i++ {
  460. hv += tm.fadedMemoryValue(i) * tm.historyWeights[i]
  461. }
  462. return hv / tm.historyWeightSum
  463. }
  464. // Calculates the current score for good/bad experiences
  465. func (tm *TrustMetric) proportionalValue() float64 {
  466. value := 1.0
  467. total := tm.good + tm.bad
  468. if total > 0 {
  469. value = tm.good / total
  470. }
  471. return value
  472. }
  473. // Calculates the trust value for the request processing
  474. func (tm *TrustMetric) calcTrustValue() float64 {
  475. weightedP := tm.proportionalWeight * tm.proportionalValue()
  476. weightedI := tm.integralWeight * tm.historyValue
  477. weightedD := tm.weightedDerivative()
  478. tv := weightedP + weightedI + weightedD
  479. // Do not return a negative value.
  480. if tv < 0 {
  481. tv = 0
  482. }
  483. return tv
  484. }
  485. // This method is for a goroutine that handles all requests on the metric
  486. func (tm *TrustMetric) processRequests() {
  487. t := time.NewTicker(tm.intervalLen)
  488. defer t.Stop()
  489. loop:
  490. for {
  491. select {
  492. case <-t.C:
  493. tm.NextTimeInterval()
  494. case <-tm.stop:
  495. // Stop all further tracking for this metric
  496. break loop
  497. }
  498. }
  499. }