Browse Source

Merge pull request #777 from silasdavis/fix-blocking-ws-client

Fix WSClient deadlock in the readRoutine after Stop() is called
pull/725/merge
Ethan Buchman 7 years ago
committed by GitHub
parent
commit
b2d5546cf8
5 changed files with 75 additions and 48 deletions
  1. +1
    -1
      benchmarks/simu/counter.go
  2. +7
    -5
      rpc/client/httpclient.go
  3. +16
    -18
      rpc/lib/client/ws_client.go
  4. +29
    -8
      rpc/lib/client/ws_client_test.go
  5. +22
    -16
      rpc/lib/rpc_test.go

+ 1
- 1
benchmarks/simu/counter.go View File

@ -21,7 +21,7 @@ func main() {
// Read a bunch of responses
go func() {
for {
_, ok := <-wsc.ResultsCh
_, ok := <-wsc.ResponsesCh
if !ok {
break
}


+ 7
- 5
rpc/client/httpclient.go View File

@ -318,16 +318,18 @@ func (w *WSEvents) redoSubscriptions() {
func (w *WSEvents) eventListener() {
for {
select {
case res := <-w.ws.ResultsCh:
case resp := <-w.ws.ResponsesCh:
// res is json.RawMessage
err := w.parseEvent(res)
if resp.Error != nil {
// FIXME: better logging/handling of errors??
fmt.Printf("ws err: %+v\n", resp.Error.Error())
continue
}
err := w.parseEvent(*resp.Result)
if err != nil {
// FIXME: better logging/handling of errors??
fmt.Printf("ws result: %+v\n", err)
}
case err := <-w.ws.ErrorsCh:
// FIXME: better logging/handling of errors??
fmt.Printf("ws err: %+v\n", err)
case <-w.quit:
// send a message so we can wait for the routine to exit
// before cleaning up the w.ws stuff


+ 16
- 18
rpc/lib/client/ws_client.go View File

@ -40,18 +40,17 @@ type WSClient struct {
// https://godoc.org/github.com/rcrowley/go-metrics#Timer.
PingPongLatencyTimer metrics.Timer
// user facing channels, closed only when the client is being stopped.
ResultsCh chan json.RawMessage
ErrorsCh chan error
// Single user facing channel to read RPCResponses from, closed only when the client is being stopped.
ResponsesCh chan types.RPCResponse
// Callback, which will be called each time after successful reconnect.
onReconnect func()
// internal channels
send chan types.RPCRequest // user requests
backlog chan types.RPCRequest // stores a single user request received during a conn failure
reconnectAfter chan error // reconnect requests
readRoutineQuit chan struct{} // a way for readRoutine to close writeRoutine
send chan types.RPCRequest // user requests
backlog chan types.RPCRequest // stores a single user request received during a conn failure
reconnectAfter chan error // reconnect requests
readRoutineQuit chan struct{} // a way for readRoutine to close writeRoutine
wg sync.WaitGroup
@ -148,8 +147,7 @@ func (c *WSClient) OnStart() error {
return err
}
c.ResultsCh = make(chan json.RawMessage)
c.ErrorsCh = make(chan error)
c.ResponsesCh = make(chan types.RPCResponse)
c.send = make(chan types.RPCRequest)
// 1 additional error may come from the read/write
@ -174,8 +172,7 @@ func (c *WSClient) Stop() bool {
success := c.BaseService.Stop()
// only close user-facing channels when we can't write to them
c.wg.Wait()
close(c.ResultsCh)
close(c.ErrorsCh)
close(c.ResponsesCh)
return success
}
@ -192,7 +189,7 @@ func (c *WSClient) IsActive() bool {
}
// Send the given RPC request to the server. Results will be available on
// ResultsCh, errors, if any, on ErrorsCh. Will block until send succeeds or
// ResponsesCh, errors, if any, on ErrorsCh. Will block until send succeeds or
// ctx.Done is closed.
func (c *WSClient) Send(ctx context.Context, request types.RPCRequest) error {
select {
@ -433,15 +430,16 @@ func (c *WSClient) readRoutine() {
err = json.Unmarshal(data, &response)
if err != nil {
c.Logger.Error("failed to parse response", "err", err, "data", string(data))
c.ErrorsCh <- err
continue
}
if response.Error != nil {
c.ErrorsCh <- response.Error
continue
}
c.Logger.Info("got response", "resp", response.Result)
c.ResultsCh <- *response.Result
// Combine a non-blocking read on BaseService.Quit with a non-blocking write on ResponsesCh to avoid blocking
// c.wg.Wait() in c.Stop(). Note we rely on Quit being closed so that it sends unlimited Quit signals to stop
// both readRoutine and writeRoutine
select {
case <-c.Quit:
case c.ResponsesCh <- response:
}
}
}


+ 29
- 8
rpc/lib/client/ws_client_test.go View File

@ -125,8 +125,7 @@ func TestWSClientReconnectFailure(t *testing.T) {
go func() {
for {
select {
case <-c.ResultsCh:
case <-c.ErrorsCh:
case <-c.ResponsesCh:
case <-c.Quit:
return
}
@ -162,6 +161,29 @@ func TestWSClientReconnectFailure(t *testing.T) {
}
}
func TestNotBlockingOnStop(t *testing.T) {
timeout := 2 * time.Second
s := httptest.NewServer(&myHandler{})
c := startClient(t, s.Listener.Addr())
c.Call(context.Background(), "a", make(map[string]interface{}))
// Let the readRoutine get around to blocking
time.Sleep(time.Second)
passCh := make(chan struct{})
go func() {
// Unless we have a non-blocking write to ResponsesCh from readRoutine
// this blocks forever ont the waitgroup
c.Stop()
passCh <- struct{}{}
}()
select {
case <-passCh:
// Pass
case <-time.After(timeout):
t.Fatalf("WSClient did failed to stop within %v seconds - is one of the read/write routines blocking?",
timeout.Seconds())
}
}
func startClient(t *testing.T, addr net.Addr) *WSClient {
c := NewWSClient(addr.String(), "/websocket")
_, err := c.Start()
@ -178,13 +200,12 @@ func call(t *testing.T, method string, c *WSClient) {
func callWgDoneOnResult(t *testing.T, c *WSClient, wg *sync.WaitGroup) {
for {
select {
case res := <-c.ResultsCh:
if res != nil {
wg.Done()
case resp := <-c.ResponsesCh:
if resp.Error != nil {
t.Fatalf("unexpected error: %v", resp.Error)
}
case err := <-c.ErrorsCh:
if err != nil {
t.Fatalf("unexpected error: %v", err)
if *resp.Result != nil {
wg.Done()
}
case <-c.Quit:
return


+ 22
- 16
rpc/lib/rpc_test.go View File

@ -217,15 +217,17 @@ func echoViaWS(cl *client.WSClient, val string) (string, error) {
}
select {
case msg := <-cl.ResultsCh:
case msg := <-cl.ResponsesCh:
if msg.Error != nil {
return "", err
}
result := new(ResultEcho)
err = json.Unmarshal(msg, result)
err = json.Unmarshal(*msg.Result, result)
if err != nil {
return "", nil
}
return result.Value, nil
case err := <-cl.ErrorsCh:
return "", err
}
}
@ -239,15 +241,17 @@ func echoBytesViaWS(cl *client.WSClient, bytes []byte) ([]byte, error) {
}
select {
case msg := <-cl.ResultsCh:
case msg := <-cl.ResponsesCh:
if msg.Error != nil {
return []byte{}, msg.Error
}
result := new(ResultEchoBytes)
err = json.Unmarshal(msg, result)
err = json.Unmarshal(*msg.Result, result)
if err != nil {
return []byte{}, nil
}
return result.Value, nil
case err := <-cl.ErrorsCh:
return []byte{}, err
}
}
@ -319,14 +323,15 @@ func TestWSNewWSRPCFunc(t *testing.T) {
require.Nil(t, err)
select {
case msg := <-cl.ResultsCh:
case msg := <-cl.ResponsesCh:
if msg.Error != nil {
t.Fatal(err)
}
result := new(ResultEcho)
err = json.Unmarshal(msg, result)
err = json.Unmarshal(*msg.Result, result)
require.Nil(t, err)
got := result.Value
assert.Equal(t, got, val)
case err := <-cl.ErrorsCh:
t.Fatal(err)
}
}
@ -343,14 +348,15 @@ func TestWSHandlesArrayParams(t *testing.T) {
require.Nil(t, err)
select {
case msg := <-cl.ResultsCh:
case msg := <-cl.ResponsesCh:
if msg.Error != nil {
t.Fatalf("%+v", err)
}
result := new(ResultEcho)
err = json.Unmarshal(msg, result)
err = json.Unmarshal(*msg.Result, result)
require.Nil(t, err)
got := result.Value
assert.Equal(t, got, val)
case err := <-cl.ErrorsCh:
t.Fatalf("%+v", err)
}
}


Loading…
Cancel
Save