commit 5a9c875f0f1ee83bd5889dd1ad53e9da43e6c34e Author: Willy Tarreau Date: Fri Aug 2 07:52:08 2019 +0200 BUG/MEDIUM: mux-h2: split the stream's and connection's window sizes The SETTINGS frame parser updates all streams' window for each INITIAL_WINDOW_SIZE setting received on the connection (like h2spec does in test 6.5.3), which can start to be expensive if repeated when there are many streams (up to 100 by default). A quick test shows that it's possible to parse only 35000 settings per second on a 3 GHz core for 100 streams, which is rather small. Given that window sizes are relative and may be negative, there's no point in pre-initializing them for each stream and update them from the settings. Instead, let's make them relative to the connection's initial window size so that any change immediately affects all streams. The only thing that remains needed is to wake up the streams that were unblocked by the update, which is now done once at the end of h2_process_demux() instead of once per setting. This now results in 5.7 million settings being processed per second, which is way better. In order to keep the change small, the h2s' mws field was renamed to "sws" for "stream window size", and an h2s_mws() function was added to add it to the connection's initial window setting and determine the window size to use when muxing. The h2c_update_all_ws() function was renamed to h2c_unblock_sfctl() since it's now only used to unblock previously blocked streams. This needs to be backported to all versions till 1.8. (cherry picked from commit 1d4a0f88100daeb17dd0c9470c659b1ec288bc07) [wt: context adjustment, port to legacy parts] Signed-off-by: Willy Tarreau diff --git a/src/mux_h2.c b/src/mux_h2.c index d605fe94..f90e9435 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -208,7 +208,7 @@ struct h2s { struct eb32_node by_id; /* place in h2c's streams_by_id */ int32_t id; /* stream ID */ uint32_t flags; /* H2_SF_* */ - int mws; /* mux window size for this stream */ + int sws; /* stream window size, to be added to the mux's initial window size */ enum h2_err errcode; /* H2 err code (H2_ERR_*) */ enum h2_ss st; uint16_t status; /* HTTP response status */ @@ -707,6 +707,14 @@ static inline __maybe_unused int h2s_id(const struct h2s *h2s) return h2s ? h2s->id : 0; } +/* returns the sum of the stream's own window size and the mux's initial + * window, which together form the stream's effective window size. + */ +static inline int h2s_mws(const struct h2s *h2s) +{ + return h2s->sws + h2s->h2c->miw; +} + /* returns true of the mux is currently busy as seen from stream */ static inline __maybe_unused int h2c_mux_busy(const struct h2c *h2c, const struct h2s *h2s) { @@ -945,7 +953,7 @@ static struct h2s *h2s_new(struct h2c *h2c, int id) LIST_INIT(&h2s->sending_list); h2s->h2c = h2c; h2s->cs = NULL; - h2s->mws = h2c->miw; + h2s->sws = 0; h2s->flags = H2_SF_NONE; h2s->errcode = H2_ERR_NO_ERROR; h2s->st = H2_SS_IDLE; @@ -1543,30 +1551,23 @@ static void h2_wake_some_streams(struct h2c *h2c, int last) } } -/* Increase all streams' outgoing window size by the difference passed in - * argument. This is needed upon receipt of the settings frame if the initial - * window size is different. The difference may be negative and the resulting - * window size as well, for the time it takes to receive some window updates. +/* Wake up all blocked streams whose window size has become positive after the + * mux's initial window was adjusted. This should be done after having processed + * SETTINGS frames which have updated the mux's initial window size. */ -static void h2c_update_all_ws(struct h2c *h2c, int diff) +static void h2c_unblock_sfctl(struct h2c *h2c) { struct h2s *h2s; struct eb32_node *node; - if (!diff) - return; - node = eb32_first(&h2c->streams_by_id); while (node) { h2s = container_of(node, struct h2s, by_id); - h2s->mws += diff; - - if (h2s->mws > 0 && (h2s->flags & H2_SF_BLK_SFCTL)) { + if (h2s->flags & H2_SF_BLK_SFCTL && h2s_mws(h2s) > 0) { h2s->flags &= ~H2_SF_BLK_SFCTL; if (h2s->send_wait && !LIST_ADDED(&h2s->list)) LIST_ADDQ(&h2c->send_list, &h2s->list); } - node = eb32_next(node); } } @@ -1607,7 +1608,6 @@ static int h2c_handle_settings(struct h2c *h2c) error = H2_ERR_FLOW_CONTROL_ERROR; goto fail; } - h2c_update_all_ws(h2c, arg - h2c->miw); h2c->miw = arg; break; case H2_SETTINGS_MAX_FRAME_SIZE: @@ -1869,13 +1869,13 @@ static int h2c_handle_window_update(struct h2c *h2c, struct h2s *h2s) goto strm_err; } - if (h2s->mws >= 0 && h2s->mws + inc < 0) { + if (h2s_mws(h2s) >= 0 && h2s_mws(h2s) + inc < 0) { error = H2_ERR_FLOW_CONTROL_ERROR; goto strm_err; } - h2s->mws += inc; - if (h2s->mws > 0 && (h2s->flags & H2_SF_BLK_SFCTL)) { + h2s->sws += inc; + if (h2s_mws(h2s) > 0 && (h2s->flags & H2_SF_BLK_SFCTL)) { h2s->flags &= ~H2_SF_BLK_SFCTL; if (h2s->send_wait && !LIST_ADDED(&h2s->list)) LIST_ADDQ(&h2c->send_list, &h2s->list); @@ -2237,6 +2237,7 @@ static void h2_process_demux(struct h2c *h2c) struct h2s *h2s = NULL, *tmp_h2s; struct h2_fh hdr; unsigned int padlen = 0; + int32_t old_iw = h2c->miw; if (h2c->st0 >= H2_CS_ERROR) return; @@ -2625,6 +2626,9 @@ static void h2_process_demux(struct h2c *h2c) h2s_notify_recv(h2s); } + if (old_iw != h2c->miw) + h2c_unblock_sfctl(h2c); + h2c_restart_reading(h2c, 0); } @@ -4259,8 +4263,8 @@ static size_t h2s_frt_make_resp_data(struct h2s *h2s, const struct buffer *buf, if (size > max) size = max; - if (size > h2s->mws) - size = h2s->mws; + if (size > h2s_mws(h2s)) + size = h2s_mws(h2s); if (size <= 0) { h2s->flags |= H2_SF_BLK_SFCTL; @@ -4362,7 +4366,7 @@ static size_t h2s_frt_make_resp_data(struct h2s *h2s, const struct buffer *buf, ofs += size; total += size; h1m->curr_len -= size; - h2s->mws -= size; + h2s->sws -= size; h2c->mws -= size; if (size && !h1m->curr_len && (h1m->flags & H1_MF_CHNK)) { @@ -4390,7 +4394,7 @@ static size_t h2s_frt_make_resp_data(struct h2s *h2s, const struct buffer *buf, } end: - trace("[%d] sent simple H2 DATA response (sid=%d) = %d bytes out (%u in, st=%s, ep=%u, es=%s, h2cws=%d h2sws=%d) data=%u", h2c->st0, h2s->id, size+9, (unsigned int)total, h1m_state_str(h1m->state), h1m->err_pos, h1m_state_str(h1m->err_state), h2c->mws, h2s->mws, (unsigned int)b_data(buf)); + trace("[%d] sent simple H2 DATA response (sid=%d) = %d bytes out (%u in, st=%s, ep=%u, es=%s, h2cws=%d h2sws=%d) data=%u", h2c->st0, h2s->id, size+9, (unsigned int)total, h1m_state_str(h1m->state), h1m->err_pos, h1m_state_str(h1m->err_state), h2c->mws, h2s_mws(h2s), (unsigned int)b_data(buf)); return total; } @@ -4937,7 +4941,7 @@ static size_t h2s_htx_frt_make_resp_data(struct h2s *h2s, struct buffer *buf, si */ if (unlikely(fsize == count && htx->used == 1 && type == HTX_BLK_DATA && - fsize <= h2s->mws && fsize <= h2c->mws && fsize <= h2c->mfs)) { + fsize <= h2s_mws(h2s) && fsize <= h2c->mws && fsize <= h2c->mfs)) { void *old_area = mbuf->area; if (b_data(mbuf)) { @@ -4972,7 +4976,7 @@ static size_t h2s_htx_frt_make_resp_data(struct h2s *h2s, struct buffer *buf, si h2_set_frame_size(outbuf.area, fsize); /* update windows */ - h2s->mws -= fsize; + h2s->sws -= fsize; h2c->mws -= fsize; /* and exchange with our old area */ @@ -5024,7 +5028,7 @@ static size_t h2s_htx_frt_make_resp_data(struct h2s *h2s, struct buffer *buf, si if (!fsize) goto send_empty; - if (h2s->mws <= 0) { + if (h2s_mws(h2s) <= 0) { h2s->flags |= H2_SF_BLK_SFCTL; if (LIST_ADDED(&h2s->list)) LIST_DEL_INIT(&h2s->list); @@ -5034,8 +5038,8 @@ static size_t h2s_htx_frt_make_resp_data(struct h2s *h2s, struct buffer *buf, si if (fsize > count) fsize = count; - if (fsize > h2s->mws) - fsize = h2s->mws; // >0 + if (fsize > h2s_mws(h2s)) + fsize = h2s_mws(h2s); // >0 if (h2c->mfs && fsize > h2c->mfs) fsize = h2c->mfs; // >0 @@ -5071,7 +5075,7 @@ static size_t h2s_htx_frt_make_resp_data(struct h2s *h2s, struct buffer *buf, si /* now let's copy this this into the output buffer */ memcpy(outbuf.area + 9, htx_get_blk_ptr(htx, blk), fsize); - h2s->mws -= fsize; + h2s->sws -= fsize; h2c->mws -= fsize; count -= fsize;