You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

617 lines
16 KiB

7 years ago
  1. // Copyright 2017 Tendermint. All rights reserved.
  2. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
  3. package trust
  4. import (
  5. "encoding/json"
  6. "math"
  7. "sync"
  8. "time"
  9. cmn "github.com/tendermint/tmlibs/common"
  10. dbm "github.com/tendermint/tmlibs/db"
  11. )
  12. const defaultStorePeriodicSaveInterval = 1 * time.Minute
  13. // TrustMetricStore - Manages all trust metrics for peers
  14. type TrustMetricStore struct {
  15. cmn.BaseService
  16. // Maps a Peer.Key to that peer's TrustMetric
  17. peerMetrics map[string]*TrustMetric
  18. // Mutex that protects the map and history data file
  19. mtx sync.Mutex
  20. // The db where peer trust metric history data will be stored
  21. db dbm.DB
  22. // This configuration will be used when creating new TrustMetrics
  23. config TrustMetricConfig
  24. }
  25. // NewTrustMetricStore returns a store that saves data to the DB
  26. // and uses the config when creating new trust metrics
  27. func NewTrustMetricStore(db dbm.DB, tmc TrustMetricConfig) *TrustMetricStore {
  28. tms := &TrustMetricStore{
  29. peerMetrics: make(map[string]*TrustMetric),
  30. db: db,
  31. config: tmc,
  32. }
  33. tms.BaseService = *cmn.NewBaseService(nil, "TrustMetricStore", tms)
  34. return tms
  35. }
  36. // OnStart implements Service
  37. func (tms *TrustMetricStore) OnStart() error {
  38. if err := tms.BaseService.OnStart(); err != nil {
  39. return err
  40. }
  41. tms.mtx.Lock()
  42. defer tms.mtx.Unlock()
  43. tms.loadFromDB()
  44. go tms.saveRoutine()
  45. return nil
  46. }
  47. // OnStop implements Service
  48. func (tms *TrustMetricStore) OnStop() {
  49. tms.BaseService.OnStop()
  50. tms.mtx.Lock()
  51. defer tms.mtx.Unlock()
  52. // Stop all trust metric go-routines
  53. for _, tm := range tms.peerMetrics {
  54. tm.Stop()
  55. }
  56. // Make the final trust history data save
  57. tms.saveToDB()
  58. }
  59. // Size returns the number of entries in the trust metric store
  60. func (tms *TrustMetricStore) Size() int {
  61. tms.mtx.Lock()
  62. defer tms.mtx.Unlock()
  63. return tms.size()
  64. }
  65. // GetPeerTrustMetric returns a trust metric by peer key
  66. func (tms *TrustMetricStore) GetPeerTrustMetric(key string) *TrustMetric {
  67. tms.mtx.Lock()
  68. defer tms.mtx.Unlock()
  69. tm, ok := tms.peerMetrics[key]
  70. if !ok {
  71. // If the metric is not available, we will create it
  72. tm = NewMetricWithConfig(tms.config)
  73. // The metric needs to be in the map
  74. tms.peerMetrics[key] = tm
  75. }
  76. return tm
  77. }
  78. // PeerDisconnected pauses the trust metric associated with the peer identified by the key
  79. func (tms *TrustMetricStore) PeerDisconnected(key string) {
  80. tms.mtx.Lock()
  81. defer tms.mtx.Unlock()
  82. // If the Peer that disconnected has a metric, pause it
  83. if tm, ok := tms.peerMetrics[key]; ok {
  84. tm.Pause()
  85. }
  86. }
  87. // Saves the history data for all peers to the store DB.
  88. // This public method acquires the trust metric store lock
  89. func (tms *TrustMetricStore) SaveToDB() {
  90. tms.mtx.Lock()
  91. defer tms.mtx.Unlock()
  92. tms.saveToDB()
  93. }
  94. /* Private methods */
  95. // size returns the number of entries in the store without acquiring the mutex
  96. func (tms *TrustMetricStore) size() int {
  97. return len(tms.peerMetrics)
  98. }
  99. /* Loading & Saving */
  100. /* Both loadFromDB and savetoDB assume the mutex has been acquired */
  101. var trustMetricKey = []byte("trustMetricStore")
  102. // Loads the history data for all peers from the store DB
  103. // cmn.Panics if file is corrupt
  104. func (tms *TrustMetricStore) loadFromDB() bool {
  105. // Obtain the history data we have so far
  106. bytes := tms.db.Get(trustMetricKey)
  107. if bytes == nil {
  108. return false
  109. }
  110. peers := make(map[string]MetricHistoryJSON, 0)
  111. err := json.Unmarshal(bytes, &peers)
  112. if err != nil {
  113. cmn.PanicCrisis(cmn.Fmt("Could not unmarshal Trust Metric Store DB data: %v", err))
  114. }
  115. // If history data exists in the file,
  116. // load it into trust metric
  117. for key, p := range peers {
  118. tm := NewMetricWithConfig(tms.config)
  119. tm.Init(p)
  120. // Load the peer trust metric into the store
  121. tms.peerMetrics[key] = tm
  122. }
  123. return true
  124. }
  125. // Saves the history data for all peers to the store DB
  126. func (tms *TrustMetricStore) saveToDB() {
  127. tms.Logger.Debug("Saving TrustHistory to DB", "size", tms.size())
  128. peers := make(map[string]MetricHistoryJSON, 0)
  129. for key, tm := range tms.peerMetrics {
  130. // Add an entry for the peer identified by key
  131. peers[key] = tm.HistoryJSON()
  132. }
  133. // Write all the data back to the DB
  134. bytes, err := json.Marshal(peers)
  135. if err != nil {
  136. tms.Logger.Error("Failed to encode the TrustHistory", "err", err)
  137. return
  138. }
  139. tms.db.SetSync(trustMetricKey, bytes)
  140. }
  141. // Periodically saves the trust history data to the DB
  142. func (tms *TrustMetricStore) saveRoutine() {
  143. t := time.NewTicker(defaultStorePeriodicSaveInterval)
  144. defer t.Stop()
  145. loop:
  146. for {
  147. select {
  148. case <-t.C:
  149. tms.SaveToDB()
  150. case <-tms.Quit:
  151. break loop
  152. }
  153. }
  154. }
  155. //---------------------------------------------------------------------------------------
  156. const (
  157. // The weight applied to the derivative when current behavior is >= previous behavior
  158. defaultDerivativeGamma1 = 0
  159. // The weight applied to the derivative when current behavior is less than previous behavior
  160. defaultDerivativeGamma2 = 1.0
  161. // The weight applied to history data values when calculating the history value
  162. defaultHistoryDataWeight = 0.8
  163. )
  164. // TrustMetric - keeps track of peer reliability
  165. // See tendermint/docs/architecture/adr-006-trust-metric.md for details
  166. type TrustMetric struct {
  167. // Mutex that protects the metric from concurrent access
  168. mtx sync.Mutex
  169. // Determines the percentage given to current behavior
  170. proportionalWeight float64
  171. // Determines the percentage given to prior behavior
  172. integralWeight float64
  173. // Count of how many time intervals this metric has been tracking
  174. numIntervals int
  175. // Size of the time interval window for this trust metric
  176. maxIntervals int
  177. // The time duration for a single time interval
  178. intervalLen time.Duration
  179. // Stores the trust history data for this metric
  180. history []float64
  181. // Weights applied to the history data when calculating the history value
  182. historyWeights []float64
  183. // The sum of the history weights used when calculating the history value
  184. historyWeightSum float64
  185. // The current number of history data elements
  186. historySize int
  187. // The maximum number of history data elements
  188. historyMaxSize int
  189. // The calculated history value for the current time interval
  190. historyValue float64
  191. // The number of recorded good and bad events for the current time interval
  192. bad, good float64
  193. // While true, history data is not modified
  194. paused bool
  195. // Signal channel for stopping the trust metric go-routine
  196. stop chan struct{}
  197. }
  198. // MetricHistoryJSON - history data necessary to save the trust metric
  199. type MetricHistoryJSON struct {
  200. NumIntervals int `json:"intervals"`
  201. History []float64 `json:"history"`
  202. }
  203. // Returns a snapshot of the trust metric history data
  204. func (tm *TrustMetric) HistoryJSON() MetricHistoryJSON {
  205. tm.mtx.Lock()
  206. defer tm.mtx.Unlock()
  207. return MetricHistoryJSON{
  208. NumIntervals: tm.numIntervals,
  209. History: tm.history,
  210. }
  211. }
  212. // Instantiates a trust metric by loading the history data for a single peer.
  213. // This is called only once and only right after creation, which is why the
  214. // lock is not held while accessing the trust metric struct members
  215. func (tm *TrustMetric) Init(hist MetricHistoryJSON) {
  216. // Restore the number of time intervals we have previously tracked
  217. if hist.NumIntervals > tm.maxIntervals {
  218. hist.NumIntervals = tm.maxIntervals
  219. }
  220. tm.numIntervals = hist.NumIntervals
  221. // Restore the history and its current size
  222. if len(hist.History) > tm.historyMaxSize {
  223. // Keep the history no larger than historyMaxSize
  224. last := len(hist.History) - tm.historyMaxSize
  225. hist.History = hist.History[last:]
  226. }
  227. tm.history = hist.History
  228. tm.historySize = len(tm.history)
  229. // Create the history weight values and weight sum
  230. for i := 1; i <= tm.numIntervals; i++ {
  231. x := math.Pow(defaultHistoryDataWeight, float64(i)) // Optimistic weight
  232. tm.historyWeights = append(tm.historyWeights, x)
  233. }
  234. for _, v := range tm.historyWeights {
  235. tm.historyWeightSum += v
  236. }
  237. // Calculate the history value based on the loaded history data
  238. tm.historyValue = tm.calcHistoryValue()
  239. }
  240. // Pause tells the metric to pause recording data over time intervals.
  241. // All method calls that indicate events will unpause the metric
  242. func (tm *TrustMetric) Pause() {
  243. tm.mtx.Lock()
  244. defer tm.mtx.Unlock()
  245. // Pause the metric for now
  246. tm.paused = true
  247. }
  248. // Stop tells the metric to stop recording data over time intervals
  249. func (tm *TrustMetric) Stop() {
  250. tm.stop <- struct{}{}
  251. }
  252. // BadEvents indicates that an undesirable event(s) took place
  253. func (tm *TrustMetric) BadEvents(num int) {
  254. tm.mtx.Lock()
  255. defer tm.mtx.Unlock()
  256. tm.unpause()
  257. tm.bad += float64(num)
  258. }
  259. // GoodEvents indicates that a desirable event(s) took place
  260. func (tm *TrustMetric) GoodEvents(num int) {
  261. tm.mtx.Lock()
  262. defer tm.mtx.Unlock()
  263. tm.unpause()
  264. tm.good += float64(num)
  265. }
  266. // TrustValue gets the dependable trust value; always between 0 and 1
  267. func (tm *TrustMetric) TrustValue() float64 {
  268. tm.mtx.Lock()
  269. defer tm.mtx.Unlock()
  270. return tm.calcTrustValue()
  271. }
  272. // TrustScore gets a score based on the trust value always between 0 and 100
  273. func (tm *TrustMetric) TrustScore() int {
  274. score := tm.TrustValue() * 100
  275. return int(math.Floor(score))
  276. }
  277. // NextTimeInterval saves current time interval data and prepares for the following interval
  278. func (tm *TrustMetric) NextTimeInterval() {
  279. tm.mtx.Lock()
  280. defer tm.mtx.Unlock()
  281. if tm.paused {
  282. // Do not prepare for the next time interval while paused
  283. return
  284. }
  285. // Add the current trust value to the history data
  286. newHist := tm.calcTrustValue()
  287. tm.history = append(tm.history, newHist)
  288. // Update history and interval counters
  289. if tm.historySize < tm.historyMaxSize {
  290. tm.historySize++
  291. } else {
  292. // Keep the history no larger than historyMaxSize
  293. last := len(tm.history) - tm.historyMaxSize
  294. tm.history = tm.history[last:]
  295. }
  296. if tm.numIntervals < tm.maxIntervals {
  297. tm.numIntervals++
  298. // Add the optimistic weight for the new time interval
  299. wk := math.Pow(defaultHistoryDataWeight, float64(tm.numIntervals))
  300. tm.historyWeights = append(tm.historyWeights, wk)
  301. tm.historyWeightSum += wk
  302. }
  303. // Update the history data using Faded Memories
  304. tm.updateFadedMemory()
  305. // Calculate the history value for the upcoming time interval
  306. tm.historyValue = tm.calcHistoryValue()
  307. tm.good = 0
  308. tm.bad = 0
  309. }
  310. // Copy returns a new trust metric with members containing the same values
  311. func (tm *TrustMetric) Copy() *TrustMetric {
  312. if tm == nil {
  313. return nil
  314. }
  315. return &TrustMetric{
  316. proportionalWeight: tm.proportionalWeight,
  317. integralWeight: tm.integralWeight,
  318. numIntervals: tm.numIntervals,
  319. maxIntervals: tm.maxIntervals,
  320. intervalLen: tm.intervalLen,
  321. history: tm.history,
  322. historyWeights: tm.historyWeights,
  323. historyWeightSum: tm.historyWeightSum,
  324. historySize: tm.historySize,
  325. historyMaxSize: tm.historyMaxSize,
  326. historyValue: tm.historyValue,
  327. good: tm.good,
  328. bad: tm.bad,
  329. paused: tm.paused,
  330. stop: make(chan struct{}),
  331. }
  332. }
  333. // TrustMetricConfig - Configures the weight functions and time intervals for the metric
  334. type TrustMetricConfig struct {
  335. // Determines the percentage given to current behavior
  336. ProportionalWeight float64
  337. // Determines the percentage given to prior behavior
  338. IntegralWeight float64
  339. // The window of time that the trust metric will track events across.
  340. // This can be set to cover many days without issue
  341. TrackingWindow time.Duration
  342. // Each interval should be short for adapability.
  343. // Less than 30 seconds is too sensitive,
  344. // and greater than 5 minutes will make the metric numb
  345. IntervalLength time.Duration
  346. }
  347. // DefaultConfig returns a config with values that have been tested and produce desirable results
  348. func DefaultConfig() TrustMetricConfig {
  349. return TrustMetricConfig{
  350. ProportionalWeight: 0.4,
  351. IntegralWeight: 0.6,
  352. TrackingWindow: (time.Minute * 60 * 24) * 14, // 14 days.
  353. IntervalLength: 1 * time.Minute,
  354. }
  355. }
  356. // NewMetric returns a trust metric with the default configuration
  357. func NewMetric() *TrustMetric {
  358. return NewMetricWithConfig(DefaultConfig())
  359. }
  360. // NewMetricWithConfig returns a trust metric with a custom configuration
  361. func NewMetricWithConfig(tmc TrustMetricConfig) *TrustMetric {
  362. tm := new(TrustMetric)
  363. config := customConfig(tmc)
  364. // Setup using the configuration values
  365. tm.proportionalWeight = config.ProportionalWeight
  366. tm.integralWeight = config.IntegralWeight
  367. tm.intervalLen = config.IntervalLength
  368. // The maximum number of time intervals is the tracking window / interval length
  369. tm.maxIntervals = int(config.TrackingWindow / tm.intervalLen)
  370. // The history size will be determined by the maximum number of time intervals
  371. tm.historyMaxSize = intervalToHistoryOffset(tm.maxIntervals) + 1
  372. // This metric has a perfect history so far
  373. tm.historyValue = 1.0
  374. // Setup the stop channel
  375. tm.stop = make(chan struct{})
  376. go tm.processRequests()
  377. return tm
  378. }
  379. /* Private methods */
  380. // Ensures that all configuration elements have valid values
  381. func customConfig(tmc TrustMetricConfig) TrustMetricConfig {
  382. config := DefaultConfig()
  383. // Check the config for set values, and setup appropriately
  384. if tmc.ProportionalWeight > 0 {
  385. config.ProportionalWeight = tmc.ProportionalWeight
  386. }
  387. if tmc.IntegralWeight > 0 {
  388. config.IntegralWeight = tmc.IntegralWeight
  389. }
  390. if tmc.IntervalLength > time.Duration(0) {
  391. config.IntervalLength = tmc.IntervalLength
  392. }
  393. if tmc.TrackingWindow > time.Duration(0) &&
  394. tmc.TrackingWindow >= config.IntervalLength {
  395. config.TrackingWindow = tmc.TrackingWindow
  396. }
  397. return config
  398. }
  399. // Wakes the trust metric up if it is currently paused
  400. // This method needs to be called with the mutex locked
  401. func (tm *TrustMetric) unpause() {
  402. // Check if this is the first experience with
  403. // what we are tracking since being paused
  404. if tm.paused {
  405. tm.good = 0
  406. tm.bad = 0
  407. // New events cause us to unpause the metric
  408. tm.paused = false
  409. }
  410. }
  411. // Calculates the derivative component
  412. func (tm *TrustMetric) derivativeValue() float64 {
  413. return tm.proportionalValue() - tm.historyValue
  414. }
  415. // Strengthens the derivative component when the change is negative
  416. func (tm *TrustMetric) weightedDerivative() float64 {
  417. var weight float64 = defaultDerivativeGamma1
  418. d := tm.derivativeValue()
  419. if d < 0 {
  420. weight = defaultDerivativeGamma2
  421. }
  422. return weight * d
  423. }
  424. // Performs the update for our Faded Memories process, which allows the
  425. // trust metric tracking window to be large while maintaining a small
  426. // number of history data values
  427. func (tm *TrustMetric) updateFadedMemory() {
  428. if tm.historySize < 2 {
  429. return
  430. }
  431. end := tm.historySize - 1
  432. // Keep the most recent history element
  433. for count := 1; count < tm.historySize; count++ {
  434. i := end - count
  435. // The older the data is, the more we spread it out
  436. x := math.Pow(2, float64(count))
  437. // Two history data values are merged into a single value
  438. tm.history[i] = ((tm.history[i] * (x - 1)) + tm.history[i+1]) / x
  439. }
  440. }
  441. // Map the interval value down to an offset from the beginning of history
  442. func intervalToHistoryOffset(interval int) int {
  443. // The system maintains 2^m interval values in the form of m history
  444. // data values. Therefore, we access the ith interval by obtaining
  445. // the history data index = the floor of log2(i)
  446. return int(math.Floor(math.Log2(float64(interval))))
  447. }
  448. // Retrieves the actual history data value that represents the requested time interval
  449. func (tm *TrustMetric) fadedMemoryValue(interval int) float64 {
  450. first := tm.historySize - 1
  451. if interval == 0 {
  452. // Base case
  453. return tm.history[first]
  454. }
  455. offset := intervalToHistoryOffset(interval)
  456. return tm.history[first-offset]
  457. }
  458. // Calculates the integral (history) component of the trust value
  459. func (tm *TrustMetric) calcHistoryValue() float64 {
  460. var hv float64
  461. for i := 0; i < tm.numIntervals; i++ {
  462. hv += tm.fadedMemoryValue(i) * tm.historyWeights[i]
  463. }
  464. return hv / tm.historyWeightSum
  465. }
  466. // Calculates the current score for good/bad experiences
  467. func (tm *TrustMetric) proportionalValue() float64 {
  468. value := 1.0
  469. total := tm.good + tm.bad
  470. if total > 0 {
  471. value = tm.good / total
  472. }
  473. return value
  474. }
  475. // Calculates the trust value for the request processing
  476. func (tm *TrustMetric) calcTrustValue() float64 {
  477. weightedP := tm.proportionalWeight * tm.proportionalValue()
  478. weightedI := tm.integralWeight * tm.historyValue
  479. weightedD := tm.weightedDerivative()
  480. tv := weightedP + weightedI + weightedD
  481. // Do not return a negative value.
  482. if tv < 0 {
  483. tv = 0
  484. }
  485. return tv
  486. }
  487. // This method is for a goroutine that handles all requests on the metric
  488. func (tm *TrustMetric) processRequests() {
  489. t := time.NewTicker(tm.intervalLen)
  490. defer t.Stop()
  491. loop:
  492. for {
  493. select {
  494. case <-t.C:
  495. tm.NextTimeInterval()
  496. case <-tm.stop:
  497. // Stop all further tracking for this metric
  498. break loop
  499. }
  500. }
  501. }