abci: localClient improvements & bugfixes & pubsub Unsubscribe issues (#2748)
* use READ lock/unlock in ConsensusState#GetLastHeight
Refs #2721
* do not use defers when there's no need
* fix peer formatting (output its address instead of the pointer)
```
[54310]: E[11-02|11:59:39.851] Connection failed @ sendRoutine module=p2p peer=0xb78f00 conn=MConn{74.207.236.148:26656} err="pong timeout"
```
https://github.com/tendermint/tendermint/issues/2721#issuecomment-435326581
* panic if peer has no state
https://github.com/tendermint/tendermint/issues/2721#issuecomment-435347165
It's confusing that sometimes we check if peer has a state, but most of
the times we expect it to be there
1. https://github.com/tendermint/tendermint/blob/add79700b5fe84417538202b6c927c8cc5383672/mempool/reactor.go#L138
2. https://github.com/tendermint/tendermint/blob/add79700b5fe84417538202b6c927c8cc5383672/rpc/core/consensus.go#L196 (edited)
I will change everything to always assume peer has a state and panic
otherwise
that should help identify issues earlier
* abci/localclient: extend lock on app callback
App callback should be protected by lock as well (note this was already
done for InitChainAsync, why not for others???). Otherwise, when we
execute the block, tx might come in and call the callback in the same
time we're updating it in execBlockOnProxyApp => DATA RACE
Fixes #2721
Consensus state is locked
```
goroutine 113333 [semacquire, 309 minutes]:
sync.runtime_SemacquireMutex(0xc00180009c, 0xc0000c7e00)
/usr/local/go/src/runtime/sema.go:71 +0x3d
sync.(*RWMutex).RLock(0xc001800090)
/usr/local/go/src/sync/rwmutex.go:50 +0x4e
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).GetRoundState(0xc001800000, 0x0)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:218 +0x46
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusReactor).queryMaj23Routine(0xc0017def80, 0x11104a0, 0xc0072488f0, 0xc007248
9c0)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/reactor.go:735 +0x16d
created by github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusReactor).AddPeer
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/reactor.go:172 +0x236
```
because localClient is locked
```
goroutine 1899 [semacquire, 309 minutes]:
sync.runtime_SemacquireMutex(0xc00003363c, 0xc0000cb500)
/usr/local/go/src/runtime/sema.go:71 +0x3d
sync.(*Mutex).Lock(0xc000033638)
/usr/local/go/src/sync/mutex.go:134 +0xff
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/abci/client.(*localClient).SetResponseCallback(0xc0001fb560, 0xc007868540)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/abci/client/local_client.go:32 +0x33
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/proxy.(*appConnConsensus).SetResponseCallback(0xc00002f750, 0xc007868540)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/proxy/app_conn.go:57 +0x40
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/state.execBlockOnProxyApp(0x1104e20, 0xc002ca0ba0, 0x11092a0, 0xc00002f750, 0xc0001fe960, 0xc000bfc660, 0x110cfe0, 0xc000090330, 0xc9d12, 0xc000d9d5a0, ...)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/state/execution.go:230 +0x1fd
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/state.(*BlockExecutor).ApplyBlock(0xc002c2a230, 0x7, 0x0, 0xc000eae880, 0x6, 0xc002e52c60, 0x16, 0x1f927, 0xc9d12, 0xc000d9d5a0, ...)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/state/execution.go:96 +0x142
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).finalizeCommit(0xc001800000, 0x1f928)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1339 +0xa3e
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).tryFinalizeCommit(0xc001800000, 0x1f928)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1270 +0x451
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).enterCommit.func1(0xc001800000, 0x0, 0x1f928)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1218 +0x90
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).enterCommit(0xc001800000, 0x1f928, 0x0)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1247 +0x6b8
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).addVote(0xc001800000, 0xc003d8dea0, 0xc000cf4cc0, 0x28, 0xf1, 0xc003bc7ad0, 0xc003bc7b10)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1659 +0xbad
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).tryAddVote(0xc001800000, 0xc003d8dea0, 0xc000cf4cc0, 0x28, 0xf1, 0xf1, 0xf1)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:1517 +0x59
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).handleMsg(0xc001800000, 0xd98200, 0xc0070dbed0, 0xc000cf4cc0, 0x28)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:660 +0x64b
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).receiveRoutine(0xc001800000, 0x0)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:617 +0x670
created by github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus.(*ConsensusState).OnStart
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/consensus/state.go:311 +0x132
```
tx comes in and CheckTx is executed right when we execute the block
```
goroutine 111044 [semacquire, 309 minutes]:
sync.runtime_SemacquireMutex(0xc00003363c, 0x0)
/usr/local/go/src/runtime/sema.go:71 +0x3d
sync.(*Mutex).Lock(0xc000033638)
/usr/local/go/src/sync/mutex.go:134 +0xff
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/abci/client.(*localClient).CheckTxAsync(0xc0001fb0e0, 0xc002d94500, 0x13f, 0x280, 0x0)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/abci/client/local_client.go:85 +0x47
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/proxy.(*appConnMempool).CheckTxAsync(0xc00002f720, 0xc002d94500, 0x13f, 0x280, 0x1)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/proxy/app_conn.go:114 +0x51
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/mempool.(*Mempool).CheckTx(0xc002d3a320, 0xc002d94500, 0x13f, 0x280, 0xc0072355f0, 0x0, 0x0)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/mempool/mempool.go:316 +0x17b
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/core.BroadcastTxSync(0xc002d94500, 0x13f, 0x280, 0x0, 0x0, 0x0)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/core/mempool.go:93 +0xb8
reflect.Value.call(0xd85560, 0x10326c0, 0x13, 0xec7b8b, 0x4, 0xc00663f180, 0x1, 0x1, 0xc00663f180, 0xc00663f188, ...)
/usr/local/go/src/reflect/value.go:447 +0x449
reflect.Value.Call(0xd85560, 0x10326c0, 0x13, 0xc00663f180, 0x1, 0x1, 0x0, 0x0, 0xc005cc9344)
/usr/local/go/src/reflect/value.go:308 +0xa4
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server.makeHTTPHandler.func2(0x1102060, 0xc00663f100, 0xc0082d7900)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server/handlers.go:269 +0x188
net/http.HandlerFunc.ServeHTTP(0xc002c81f20, 0x1102060, 0xc00663f100, 0xc0082d7900)
/usr/local/go/src/net/http/server.go:1964 +0x44
net/http.(*ServeMux).ServeHTTP(0xc002c81b60, 0x1102060, 0xc00663f100, 0xc0082d7900)
/usr/local/go/src/net/http/server.go:2361 +0x127
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server.maxBytesHandler.ServeHTTP(0x10f8a40, 0xc002c81b60, 0xf4240, 0x1102060, 0xc00663f100, 0xc0082d7900)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server/http_server.go:219 +0xcf
github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server.RecoverAndLogHandler.func1(0x1103220, 0xc00121e620, 0xc0082d7900)
/root/go/src/github.com/MinterTeam/minter-go-node/vendor/github.com/tendermint/tendermint/rpc/lib/server/http_server.go:192 +0x394
net/http.HandlerFunc.ServeHTTP(0xc002c06ea0, 0x1103220, 0xc00121e620, 0xc0082d7900)
/usr/local/go/src/net/http/server.go:1964 +0x44
net/http.serverHandler.ServeHTTP(0xc001a1aa90, 0x1103220, 0xc00121e620, 0xc0082d7900)
/usr/local/go/src/net/http/server.go:2741 +0xab
net/http.(*conn).serve(0xc00785a3c0, 0x11041a0, 0xc000f844c0)
/usr/local/go/src/net/http/server.go:1847 +0x646
created by net/http.(*Server).Serve
/usr/local/go/src/net/http/server.go:2851 +0x2f5
```
* consensus: use read lock in Receive#VoteMessage
* use defer to unlock mutex because application might panic
* use defer in every method of the localClient
* add a changelog entry
* drain channels before Unsubscribe(All)
Read https://github.com/tendermint/tendermint/blob/55362ed76630f3e1ebec159a598f6a9fb5892cb1/libs/pubsub/pubsub.go#L13
for the detailed explanation of the issue.
We'll need to fix it someday. Make sure to keep an eye on
https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-033-pubsub.md
* retry instead of panic when peer has no state in reactors other than consensus
in /dump_consensus_state RPC endpoint, skip a peer with no state
* rpc/core/mempool: simplify error messages
* rpc/core/mempool: use time.After instead of timer
also, do not log DeliverTx result (to be consistent with other memthods)
* unlock before calling the callback in reqRes#SetCallback
6 years ago [libs/pubsub] fix memory leak
Refs #1755
I started with writing a test for wsConnection (WebsocketManager) where
I:
- create a WS connection
- do a simple echo call
- close it
No leaking goroutines, nor any leaking memory were detected.
For useful shortcuts see my blog post
https://blog.cosmos.network/debugging-the-memory-leak-in-tendermint-210186711420
Then I went to the rpc tests to see if calling Subscribe results in
memory growth. It did.
I used a slightly modified version of TestHeaderEvents function:
```
func TestHeaderEvents(t *testing.T) {
// memory heap before
f, err := os.Create("/tmp/mem1.mprof")
if err != nil {
t.Fatal(err)
}
pprof.WriteHeapProfile(f)
f.Close()
for i := 0; i < 100; i++ {
c := getHTTPClient()
err = c.Start()
require.Nil(t, err)
evtTyp := types.EventNewBlockHeader
evt, err := client.WaitForOneEvent(c, evtTyp, waitForEventTimeout)
require.Nil(t, err)
_, ok := evt.(types.EventDataNewBlockHeader)
require.True(t, ok)
c.Stop()
c = nil
}
runtime.GC()
// memory heap before
f, err = os.Create("/tmp/mem2.mprof")
if err != nil {
t.Fatal(err)
}
pprof.WriteHeapProfile(f)
f.Close()
// dump all running goroutines
time.Sleep(10 * time.Second)
pprof.Lookup("goroutine").WriteTo(os.Stdout, 1)
}
```
```
Showing nodes accounting for 35159.16kB, 100% of 35159.16kB total
Showing top 10 nodes out of 48
flat flat% sum% cum cum%
32022.23kB 91.08% 91.08% 32022.23kB 91.08% github.com/tendermint/tendermint/libs/pubsub/query.(*QueryParser).Init
1056.33kB 3.00% 94.08% 1056.33kB 3.00% bufio.NewReaderSize
528.17kB 1.50% 95.58% 528.17kB 1.50% bufio.NewWriterSize
528.17kB 1.50% 97.09% 528.17kB 1.50% github.com/tendermint/tendermint/consensus.NewConsensusState
512.19kB 1.46% 98.54% 512.19kB 1.46% runtime.malg
512.08kB 1.46% 100% 512.08kB 1.46% syscall.ByteSliceFromString
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).(github.com/tendermint/tendermint/consensus.defaultDecideProposal)-fm
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).addVote
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).defaultDecideProposal
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).enterNewRound
```
100 subscriptions produce 32MB.
Again, no additional goroutines are running after the end of the test
(wsConnection readRoutine and writeRoutine both finishes). **It means
that some exiting goroutine or object is holding a reference to the
*Query objects, which are leaking.**
One of them is pubsub#loop. It's using state.queries to map queries to
clients and state.clients to map clients to queries.
Before this commit, we're not thoroughly cleaning state.queries, which
was the reason for memory leakage.
7 years ago [libs/pubsub] fix memory leak
Refs #1755
I started with writing a test for wsConnection (WebsocketManager) where
I:
- create a WS connection
- do a simple echo call
- close it
No leaking goroutines, nor any leaking memory were detected.
For useful shortcuts see my blog post
https://blog.cosmos.network/debugging-the-memory-leak-in-tendermint-210186711420
Then I went to the rpc tests to see if calling Subscribe results in
memory growth. It did.
I used a slightly modified version of TestHeaderEvents function:
```
func TestHeaderEvents(t *testing.T) {
// memory heap before
f, err := os.Create("/tmp/mem1.mprof")
if err != nil {
t.Fatal(err)
}
pprof.WriteHeapProfile(f)
f.Close()
for i := 0; i < 100; i++ {
c := getHTTPClient()
err = c.Start()
require.Nil(t, err)
evtTyp := types.EventNewBlockHeader
evt, err := client.WaitForOneEvent(c, evtTyp, waitForEventTimeout)
require.Nil(t, err)
_, ok := evt.(types.EventDataNewBlockHeader)
require.True(t, ok)
c.Stop()
c = nil
}
runtime.GC()
// memory heap before
f, err = os.Create("/tmp/mem2.mprof")
if err != nil {
t.Fatal(err)
}
pprof.WriteHeapProfile(f)
f.Close()
// dump all running goroutines
time.Sleep(10 * time.Second)
pprof.Lookup("goroutine").WriteTo(os.Stdout, 1)
}
```
```
Showing nodes accounting for 35159.16kB, 100% of 35159.16kB total
Showing top 10 nodes out of 48
flat flat% sum% cum cum%
32022.23kB 91.08% 91.08% 32022.23kB 91.08% github.com/tendermint/tendermint/libs/pubsub/query.(*QueryParser).Init
1056.33kB 3.00% 94.08% 1056.33kB 3.00% bufio.NewReaderSize
528.17kB 1.50% 95.58% 528.17kB 1.50% bufio.NewWriterSize
528.17kB 1.50% 97.09% 528.17kB 1.50% github.com/tendermint/tendermint/consensus.NewConsensusState
512.19kB 1.46% 98.54% 512.19kB 1.46% runtime.malg
512.08kB 1.46% 100% 512.08kB 1.46% syscall.ByteSliceFromString
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).(github.com/tendermint/tendermint/consensus.defaultDecideProposal)-fm
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).addVote
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).defaultDecideProposal
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).enterNewRound
```
100 subscriptions produce 32MB.
Again, no additional goroutines are running after the end of the test
(wsConnection readRoutine and writeRoutine both finishes). **It means
that some exiting goroutine or object is holding a reference to the
*Query objects, which are leaking.**
One of them is pubsub#loop. It's using state.queries to map queries to
clients and state.clients to map clients to queries.
Before this commit, we're not thoroughly cleaning state.queries, which
was the reason for memory leakage.
7 years ago [libs/pubsub] fix memory leak
Refs #1755
I started with writing a test for wsConnection (WebsocketManager) where
I:
- create a WS connection
- do a simple echo call
- close it
No leaking goroutines, nor any leaking memory were detected.
For useful shortcuts see my blog post
https://blog.cosmos.network/debugging-the-memory-leak-in-tendermint-210186711420
Then I went to the rpc tests to see if calling Subscribe results in
memory growth. It did.
I used a slightly modified version of TestHeaderEvents function:
```
func TestHeaderEvents(t *testing.T) {
// memory heap before
f, err := os.Create("/tmp/mem1.mprof")
if err != nil {
t.Fatal(err)
}
pprof.WriteHeapProfile(f)
f.Close()
for i := 0; i < 100; i++ {
c := getHTTPClient()
err = c.Start()
require.Nil(t, err)
evtTyp := types.EventNewBlockHeader
evt, err := client.WaitForOneEvent(c, evtTyp, waitForEventTimeout)
require.Nil(t, err)
_, ok := evt.(types.EventDataNewBlockHeader)
require.True(t, ok)
c.Stop()
c = nil
}
runtime.GC()
// memory heap before
f, err = os.Create("/tmp/mem2.mprof")
if err != nil {
t.Fatal(err)
}
pprof.WriteHeapProfile(f)
f.Close()
// dump all running goroutines
time.Sleep(10 * time.Second)
pprof.Lookup("goroutine").WriteTo(os.Stdout, 1)
}
```
```
Showing nodes accounting for 35159.16kB, 100% of 35159.16kB total
Showing top 10 nodes out of 48
flat flat% sum% cum cum%
32022.23kB 91.08% 91.08% 32022.23kB 91.08% github.com/tendermint/tendermint/libs/pubsub/query.(*QueryParser).Init
1056.33kB 3.00% 94.08% 1056.33kB 3.00% bufio.NewReaderSize
528.17kB 1.50% 95.58% 528.17kB 1.50% bufio.NewWriterSize
528.17kB 1.50% 97.09% 528.17kB 1.50% github.com/tendermint/tendermint/consensus.NewConsensusState
512.19kB 1.46% 98.54% 512.19kB 1.46% runtime.malg
512.08kB 1.46% 100% 512.08kB 1.46% syscall.ByteSliceFromString
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).(github.com/tendermint/tendermint/consensus.defaultDecideProposal)-fm
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).addVote
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).defaultDecideProposal
0 0% 100% 512.08kB 1.46% github.com/tendermint/tendermint/consensus.(*ConsensusState).enterNewRound
```
100 subscriptions produce 32MB.
Again, no additional goroutines are running after the end of the test
(wsConnection readRoutine and writeRoutine both finishes). **It means
that some exiting goroutine or object is holding a reference to the
*Query objects, which are leaking.**
One of them is pubsub#loop. It's using state.queries to map queries to
clients and state.clients to map clients to queries.
Before this commit, we're not thoroughly cleaning state.queries, which
was the reason for memory leakage.
7 years ago |
|
- // Package pubsub implements a pub-sub model with a single publisher (Server)
- // and multiple subscribers (clients).
- //
- // Though you can have multiple publishers by sharing a pointer to a server or
- // by giving the same channel to each publisher and publishing messages from
- // that channel (fan-in).
- //
- // Clients subscribe for messages, which could be of any type, using a query.
- // When some message is published, we match it with all queries. If there is a
- // match, this message will be pushed to all clients, subscribed to that query.
- // See query subpackage for our implementation.
- //
- // Due to the blocking send implementation, a single subscriber can freeze an
- // entire server by not reading messages before it unsubscribes. To avoid such
- // scenario, subscribers must either:
- //
- // a) make sure they continue to read from the out channel until
- // Unsubscribe(All) is called
- //
- // s.Subscribe(ctx, sub, qry, out)
- // go func() {
- // for msg := range out {
- // // handle msg
- // // will exit automatically when out is closed by Unsubscribe(All)
- // }
- // }()
- // s.UnsubscribeAll(ctx, sub)
- //
- // b) drain the out channel before calling Unsubscribe(All)
- //
- // s.Subscribe(ctx, sub, qry, out)
- // defer func() {
- // // drain out to make sure we don't block
- // LOOP:
- // for {
- // select {
- // case <-out:
- // default:
- // break LOOP
- // }
- // }
- // s.UnsubscribeAll(ctx, sub)
- // }()
- // for msg := range out {
- // // handle msg
- // if err != nil {
- // return err
- // }
- // }
- //
- package pubsub
-
- import (
- "context"
- "errors"
- "sync"
-
- cmn "github.com/tendermint/tendermint/libs/common"
- )
-
- type operation int
-
- const (
- sub operation = iota
- pub
- unsub
- shutdown
- )
-
- var (
- // ErrSubscriptionNotFound is returned when a client tries to unsubscribe
- // from not existing subscription.
- ErrSubscriptionNotFound = errors.New("subscription not found")
-
- // ErrAlreadySubscribed is returned when a client tries to subscribe twice or
- // more using the same query.
- ErrAlreadySubscribed = errors.New("already subscribed")
- )
-
- type cmd struct {
- op operation
- query Query
- ch chan<- interface{}
- clientID string
- msg interface{}
- tags TagMap
- }
-
- // Query defines an interface for a query to be used for subscribing.
- type Query interface {
- Matches(tags TagMap) bool
- String() string
- }
-
- // Server allows clients to subscribe/unsubscribe for messages, publishing
- // messages with or without tags, and manages internal state.
- type Server struct {
- cmn.BaseService
-
- cmds chan cmd
- cmdsCap int
-
- mtx sync.RWMutex
- subscriptions map[string]map[string]Query // subscriber -> query (string) -> Query
- }
-
- // Option sets a parameter for the server.
- type Option func(*Server)
-
- // TagMap is used to associate tags to a message.
- // They can be queried by subscribers to choose messages they will received.
- type TagMap interface {
- // Get returns the value for a key, or nil if no value is present.
- // The ok result indicates whether value was found in the tags.
- Get(key string) (value string, ok bool)
- // Len returns the number of tags.
- Len() int
- }
-
- type tagMap map[string]string
-
- var _ TagMap = (*tagMap)(nil)
-
- // NewTagMap constructs a new immutable tag set from a map.
- func NewTagMap(data map[string]string) TagMap {
- return tagMap(data)
- }
-
- // Get returns the value for a key, or nil if no value is present.
- // The ok result indicates whether value was found in the tags.
- func (ts tagMap) Get(key string) (value string, ok bool) {
- value, ok = ts[key]
- return
- }
-
- // Len returns the number of tags.
- func (ts tagMap) Len() int {
- return len(ts)
- }
-
- // NewServer returns a new server. See the commentary on the Option functions
- // for a detailed description of how to configure buffering. If no options are
- // provided, the resulting server's queue is unbuffered.
- func NewServer(options ...Option) *Server {
- s := &Server{
- subscriptions: make(map[string]map[string]Query),
- }
- s.BaseService = *cmn.NewBaseService(nil, "PubSub", s)
-
- for _, option := range options {
- option(s)
- }
-
- // if BufferCapacity option was not set, the channel is unbuffered
- s.cmds = make(chan cmd, s.cmdsCap)
-
- return s
- }
-
- // BufferCapacity allows you to specify capacity for the internal server's
- // queue. Since the server, given Y subscribers, could only process X messages,
- // this option could be used to survive spikes (e.g. high amount of
- // transactions during peak hours).
- func BufferCapacity(cap int) Option {
- return func(s *Server) {
- if cap > 0 {
- s.cmdsCap = cap
- }
- }
- }
-
- // BufferCapacity returns capacity of the internal server's queue.
- func (s *Server) BufferCapacity() int {
- return s.cmdsCap
- }
-
- // Subscribe creates a subscription for the given client. It accepts a channel
- // on which messages matching the given query can be received. An error will be
- // returned to the caller if the context is canceled or if subscription already
- // exist for pair clientID and query.
- func (s *Server) Subscribe(ctx context.Context, clientID string, query Query, out chan<- interface{}) error {
- s.mtx.RLock()
- clientSubscriptions, ok := s.subscriptions[clientID]
- if ok {
- _, ok = clientSubscriptions[query.String()]
- }
- s.mtx.RUnlock()
- if ok {
- return ErrAlreadySubscribed
- }
-
- select {
- case s.cmds <- cmd{op: sub, clientID: clientID, query: query, ch: out}:
- s.mtx.Lock()
- if _, ok = s.subscriptions[clientID]; !ok {
- s.subscriptions[clientID] = make(map[string]Query)
- }
- // preserve original query
- // see Unsubscribe
- s.subscriptions[clientID][query.String()] = query
- s.mtx.Unlock()
- return nil
- case <-ctx.Done():
- return ctx.Err()
- case <-s.Quit():
- return nil
- }
- }
-
- // Unsubscribe removes the subscription on the given query. An error will be
- // returned to the caller if the context is canceled or if subscription does
- // not exist.
- func (s *Server) Unsubscribe(ctx context.Context, clientID string, query Query) error {
- var origQuery Query
- s.mtx.RLock()
- clientSubscriptions, ok := s.subscriptions[clientID]
- if ok {
- origQuery, ok = clientSubscriptions[query.String()]
- }
- s.mtx.RUnlock()
- if !ok {
- return ErrSubscriptionNotFound
- }
-
- // original query is used here because we're using pointers as map keys
- select {
- case s.cmds <- cmd{op: unsub, clientID: clientID, query: origQuery}:
- s.mtx.Lock()
- delete(clientSubscriptions, query.String())
- s.mtx.Unlock()
- return nil
- case <-ctx.Done():
- return ctx.Err()
- case <-s.Quit():
- return nil
- }
- }
-
- // UnsubscribeAll removes all client subscriptions. An error will be returned
- // to the caller if the context is canceled or if subscription does not exist.
- func (s *Server) UnsubscribeAll(ctx context.Context, clientID string) error {
- s.mtx.RLock()
- _, ok := s.subscriptions[clientID]
- s.mtx.RUnlock()
- if !ok {
- return ErrSubscriptionNotFound
- }
-
- select {
- case s.cmds <- cmd{op: unsub, clientID: clientID}:
- s.mtx.Lock()
- delete(s.subscriptions, clientID)
- s.mtx.Unlock()
- return nil
- case <-ctx.Done():
- return ctx.Err()
- case <-s.Quit():
- return nil
- }
- }
-
- // Publish publishes the given message. An error will be returned to the caller
- // if the context is canceled.
- func (s *Server) Publish(ctx context.Context, msg interface{}) error {
- return s.PublishWithTags(ctx, msg, NewTagMap(make(map[string]string)))
- }
-
- // PublishWithTags publishes the given message with the set of tags. The set is
- // matched with clients queries. If there is a match, the message is sent to
- // the client.
- func (s *Server) PublishWithTags(ctx context.Context, msg interface{}, tags TagMap) error {
- select {
- case s.cmds <- cmd{op: pub, msg: msg, tags: tags}:
- return nil
- case <-ctx.Done():
- return ctx.Err()
- case <-s.Quit():
- return nil
- }
- }
-
- // OnStop implements Service.OnStop by shutting down the server.
- func (s *Server) OnStop() {
- s.cmds <- cmd{op: shutdown}
- }
-
- // NOTE: not goroutine safe
- type state struct {
- // query -> client -> ch
- queries map[Query]map[string]chan<- interface{}
- // client -> query -> struct{}
- clients map[string]map[Query]struct{}
- }
-
- // OnStart implements Service.OnStart by starting the server.
- func (s *Server) OnStart() error {
- go s.loop(state{
- queries: make(map[Query]map[string]chan<- interface{}),
- clients: make(map[string]map[Query]struct{}),
- })
- return nil
- }
-
- // OnReset implements Service.OnReset
- func (s *Server) OnReset() error {
- return nil
- }
-
- func (s *Server) loop(state state) {
- loop:
- for cmd := range s.cmds {
- switch cmd.op {
- case unsub:
- if cmd.query != nil {
- state.remove(cmd.clientID, cmd.query)
- } else {
- state.removeAll(cmd.clientID)
- }
- case shutdown:
- for clientID := range state.clients {
- state.removeAll(clientID)
- }
- break loop
- case sub:
- state.add(cmd.clientID, cmd.query, cmd.ch)
- case pub:
- state.send(cmd.msg, cmd.tags)
- }
- }
- }
-
- func (state *state) add(clientID string, q Query, ch chan<- interface{}) {
-
- // initialize clientToChannelMap per query if needed
- if _, ok := state.queries[q]; !ok {
- state.queries[q] = make(map[string]chan<- interface{})
- }
-
- // create subscription
- state.queries[q][clientID] = ch
-
- // add client if needed
- if _, ok := state.clients[clientID]; !ok {
- state.clients[clientID] = make(map[Query]struct{})
- }
- state.clients[clientID][q] = struct{}{}
- }
-
- func (state *state) remove(clientID string, q Query) {
- clientToChannelMap, ok := state.queries[q]
- if !ok {
- return
- }
-
- ch, ok := clientToChannelMap[clientID]
- if ok {
- close(ch)
-
- delete(state.clients[clientID], q)
-
- // if it not subscribed to anything else, remove the client
- if len(state.clients[clientID]) == 0 {
- delete(state.clients, clientID)
- }
-
- delete(state.queries[q], clientID)
- if len(state.queries[q]) == 0 {
- delete(state.queries, q)
- }
- }
- }
-
- func (state *state) removeAll(clientID string) {
- queryMap, ok := state.clients[clientID]
- if !ok {
- return
- }
-
- for q := range queryMap {
- ch := state.queries[q][clientID]
- close(ch)
-
- delete(state.queries[q], clientID)
- if len(state.queries[q]) == 0 {
- delete(state.queries, q)
- }
- }
- delete(state.clients, clientID)
- }
-
- func (state *state) send(msg interface{}, tags TagMap) {
- for q, clientToChannelMap := range state.queries {
- if q.Matches(tags) {
- for _, ch := range clientToChannelMap {
- ch <- msg
- }
- }
- }
- }
|