diff --git a/net/haproxy/Makefile b/net/haproxy/Makefile index 8c3a714e9..fb213260c 100644 --- a/net/haproxy/Makefile +++ b/net/haproxy/Makefile @@ -10,7 +10,7 @@ include $(TOPDIR)/rules.mk PKG_NAME:=haproxy PKG_VERSION:=1.8.8 -PKG_RELEASE:=04 +PKG_RELEASE:=05 PKG_SOURCE:=haproxy-$(PKG_VERSION).tar.gz PKG_SOURCE_URL:=https://www.haproxy.org/download/1.8/src/ diff --git a/net/haproxy/patches/0017-BUG-MEDIUM-pollers-Use-a-global-list-for-fd-shared-between-threads.patch b/net/haproxy/patches/0017-BUG-MEDIUM-pollers-Use-a-global-list-for-fd-shared-between-threads.patch new file mode 100644 index 000000000..b3ae30e51 --- /dev/null +++ b/net/haproxy/patches/0017-BUG-MEDIUM-pollers-Use-a-global-list-for-fd-shared-between-threads.patch @@ -0,0 +1,709 @@ +commit 954db1d01a3d706d4cacd288f28e8517a635d36e +Author: Olivier Houchard +Date: Thu May 17 18:34:02 2018 +0200 + + BUG/MEDIUM: pollers: Use a global list for fd shared between threads. + + With the old model, any fd shared by multiple threads, such as listeners + or dns sockets, would only be updated on one threads, so that could lead + to missed event, or spurious wakeups. + To avoid this, add a global list for fd that are shared, and only remove + entries from this list when every thread as updated its poller. + This subtly changes the semantics of updt_fd_polling(), as it now unlocks + the FD_LOCK on exit. + + This is similar in spirit to commit 6b96f7289c2f401deef4bdc6e20792360807dde4 + (with the bugfix from c55b88ece616afe0b28dc81eb39bad37b5f9c33f) applied, + but had to be rewrote, because of the differences between 1.8 and master. + + This should only be applied to 1.8. + +diff --git a/include/common/hathreads.h b/include/common/hathreads.h +index 325a869a..86db4d5c 100644 +--- a/include/common/hathreads.h ++++ b/include/common/hathreads.h +@@ -201,6 +201,8 @@ void thread_exit_sync(void); + int thread_no_sync(void); + int thread_need_sync(void); + ++extern unsigned long all_threads_mask; ++ + #if defined(DEBUG_THREAD) || defined(DEBUG_FULL) + + /* WARNING!!! if you update this enum, please also keep lock_label() up to date below */ +@@ -209,6 +211,7 @@ enum lock_label { + FDTAB_LOCK, + FDCACHE_LOCK, + FD_LOCK, ++ FD_UPDATE_LOCK, + POLL_LOCK, + TASK_RQ_LOCK, + TASK_WQ_LOCK, +@@ -330,6 +333,7 @@ static inline const char *lock_label(enum lock_label label) + case FDCACHE_LOCK: return "FDCACHE"; + case FD_LOCK: return "FD"; + case FDTAB_LOCK: return "FDTAB"; ++ case FD_UPDATE_LOCK: return "FD_UPDATE"; + case POLL_LOCK: return "POLL"; + case TASK_RQ_LOCK: return "TASK_RQ"; + case TASK_WQ_LOCK: return "TASK_WQ"; +diff --git a/include/proto/fd.h b/include/proto/fd.h +index bb91bb2c..b6199ccf 100644 +--- a/include/proto/fd.h ++++ b/include/proto/fd.h +@@ -43,6 +43,9 @@ extern THREAD_LOCAL int fd_nbupdt; // number of updates in the list + __decl_hathreads(extern HA_SPINLOCK_T __attribute__((aligned(64))) fdtab_lock); /* global lock to protect fdtab array */ + __decl_hathreads(extern HA_RWLOCK_T __attribute__((aligned(64))) fdcache_lock); /* global lock to protect fd_cache array */ + __decl_hathreads(extern HA_SPINLOCK_T __attribute__((aligned(64))) poll_lock); /* global lock to protect poll info */ ++__decl_hathreads(extern HA_SPINLOCK_T __attribute__((aligned(64))) fd_updt_lock); /* global lock to protect the update list */ ++ ++extern struct fdlist update_list; // Global update list + + /* Deletes an FD from the fdsets, and recomputes the maxfd limit. + * The file descriptor is also closed. +@@ -96,14 +99,70 @@ void fd_process_cached_events(); + + /* Mark fd as updated for polling and allocate an entry in the update list + * for this if it was not already there. This can be done at any time. ++ * This function expects the FD lock to be locked, and returns with the ++ * FD lock unlocked. + */ + static inline void updt_fd_polling(const int fd) + { +- if (fdtab[fd].update_mask & tid_bit) ++ if ((fdtab[fd].update_mask & fdtab[fd].thread_mask) == ++ fdtab[fd].thread_mask) { ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + /* already scheduled for update */ + return; +- fdtab[fd].update_mask |= tid_bit; +- fd_updt[fd_nbupdt++] = fd; ++ } ++ if (fdtab[fd].thread_mask == tid_bit) { ++ fdtab[fd].update_mask |= tid_bit; ++ fd_updt[fd_nbupdt++] = fd; ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ } else { ++ /* This is ugly, but we can afford to unlock the FD lock ++ * before we acquire the fd_updt_lock, to prevent a ++ * lock order reversal, because this function is only called ++ * from fd_update_cache(), and all users of fd_update_cache() ++ * used to just unlock the fd lock just after, anyway. ++ */ ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ HA_SPIN_LOCK(FD_UPDATE_LOCK, &fd_updt_lock); ++ /* If update_mask is non-nul, then it's already in the list ++ * so we don't have to add it. ++ */ ++ if (fdtab[fd].update_mask == 0) { ++ if (update_list.first == -1) { ++ update_list.first = update_list.last = fd; ++ fdtab[fd].update.next = fdtab[fd].update.prev = -1; ++ } else { ++ fdtab[update_list.last].update.next = fd; ++ fdtab[fd].update.prev = update_list.last; ++ fdtab[fd].update.next = -1; ++ update_list.last = fd; ++ } ++ } ++ fdtab[fd].update_mask |= fdtab[fd].thread_mask; ++ HA_SPIN_UNLOCK(FD_UPDATE_LOCK, &fd_updt_lock); ++ ++ } ++} ++ ++/* Called from the poller to acknoledge we read an entry from the global ++ * update list, to remove our bit from the update_mask, and remove it from ++ * the list if we were the last one. ++ */ ++/* Expects to be called with the FD lock and the FD update lock held */ ++static inline void done_update_polling(int fd) ++{ ++ fdtab[fd].update_mask &= ~tid_bit; ++ if ((fdtab[fd].update_mask & all_threads_mask) == 0) { ++ if (fdtab[fd].update.prev != -1) ++ fdtab[fdtab[fd].update.prev].update.next = ++ fdtab[fd].update.next; ++ else ++ update_list.first = fdtab[fd].update.next; ++ if (fdtab[fd].update.next != -1) ++ fdtab[fdtab[fd].update.next].update.prev = ++ fdtab[fd].update.prev; ++ else ++ update_list.last = fdtab[fd].update.prev; ++ } + } + + +@@ -175,13 +234,6 @@ static inline int fd_compute_new_polled_status(int state) + */ + static inline void fd_update_cache(int fd) + { +- /* 3 states for each direction require a polling update */ +- if ((fdtab[fd].state & (FD_EV_POLLED_R | FD_EV_ACTIVE_R)) == FD_EV_POLLED_R || +- (fdtab[fd].state & (FD_EV_POLLED_R | FD_EV_READY_R | FD_EV_ACTIVE_R)) == FD_EV_ACTIVE_R || +- (fdtab[fd].state & (FD_EV_POLLED_W | FD_EV_ACTIVE_W)) == FD_EV_POLLED_W || +- (fdtab[fd].state & (FD_EV_POLLED_W | FD_EV_READY_W | FD_EV_ACTIVE_W)) == FD_EV_ACTIVE_W) +- updt_fd_polling(fd); +- + /* only READY and ACTIVE states (the two with both flags set) require a cache entry */ + if (((fdtab[fd].state & (FD_EV_READY_R | FD_EV_ACTIVE_R)) == (FD_EV_READY_R | FD_EV_ACTIVE_R)) || + ((fdtab[fd].state & (FD_EV_READY_W | FD_EV_ACTIVE_W)) == (FD_EV_READY_W | FD_EV_ACTIVE_W))) { +@@ -190,6 +242,14 @@ static inline void fd_update_cache(int fd) + else { + fd_release_cache_entry(fd); + } ++ /* 3 states for each direction require a polling update */ ++ if ((fdtab[fd].state & (FD_EV_POLLED_R | FD_EV_ACTIVE_R)) == FD_EV_POLLED_R || ++ (fdtab[fd].state & (FD_EV_POLLED_R | FD_EV_READY_R | FD_EV_ACTIVE_R)) == FD_EV_ACTIVE_R || ++ (fdtab[fd].state & (FD_EV_POLLED_W | FD_EV_ACTIVE_W)) == FD_EV_POLLED_W || ++ (fdtab[fd].state & (FD_EV_POLLED_W | FD_EV_READY_W | FD_EV_ACTIVE_W)) == FD_EV_ACTIVE_W) ++ updt_fd_polling(fd); ++ else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* +@@ -271,8 +331,9 @@ static inline void fd_stop_recv(int fd) + if (fd_recv_active(fd)) { + fdtab[fd].state &= ~FD_EV_ACTIVE_R; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Disable processing send events on fd */ +@@ -282,8 +343,9 @@ static inline void fd_stop_send(int fd) + if (fd_send_active(fd)) { + fdtab[fd].state &= ~FD_EV_ACTIVE_W; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Disable processing of events on fd for both directions. */ +@@ -293,8 +355,9 @@ static inline void fd_stop_both(int fd) + if (fd_active(fd)) { + fdtab[fd].state &= ~FD_EV_ACTIVE_RW; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Report that FD cannot receive anymore without polling (EAGAIN detected). */ +@@ -304,8 +367,9 @@ static inline void fd_cant_recv(const int fd) + if (fd_recv_ready(fd)) { + fdtab[fd].state &= ~FD_EV_READY_R; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Report that FD can receive anymore without polling. */ +@@ -315,8 +379,9 @@ static inline void fd_may_recv(const int fd) + if (!fd_recv_ready(fd)) { + fdtab[fd].state |= FD_EV_READY_R; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Disable readiness when polled. This is useful to interrupt reading when it +@@ -330,8 +395,9 @@ static inline void fd_done_recv(const int fd) + if (fd_recv_polled(fd) && fd_recv_ready(fd)) { + fdtab[fd].state &= ~FD_EV_READY_R; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Report that FD cannot send anymore without polling (EAGAIN detected). */ +@@ -341,8 +407,9 @@ static inline void fd_cant_send(const int fd) + if (fd_send_ready(fd)) { + fdtab[fd].state &= ~FD_EV_READY_W; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Report that FD can send anymore without polling (EAGAIN detected). */ +@@ -352,8 +419,9 @@ static inline void fd_may_send(const int fd) + if (!fd_send_ready(fd)) { + fdtab[fd].state |= FD_EV_READY_W; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Prepare FD to try to receive */ +@@ -363,8 +431,9 @@ static inline void fd_want_recv(int fd) + if (!fd_recv_active(fd)) { + fdtab[fd].state |= FD_EV_ACTIVE_R; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Prepare FD to try to send */ +@@ -374,8 +443,9 @@ static inline void fd_want_send(int fd) + if (!fd_send_active(fd)) { + fdtab[fd].state |= FD_EV_ACTIVE_W; + fd_update_cache(fd); /* need an update entry to change the state */ +- } +- HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ /* the FD lock is unlocked by fd_update_cache() */ ++ } else ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + } + + /* Update events seen for FD and its state if needed. This should be called +diff --git a/include/types/fd.h b/include/types/fd.h +index 9f2c5fee..8e34c624 100644 +--- a/include/types/fd.h ++++ b/include/types/fd.h +@@ -90,11 +90,24 @@ enum fd_states { + */ + #define DEAD_FD_MAGIC 0xFDDEADFD + ++struct fdlist_entry { ++ int next; ++ int prev; ++} __attribute__ ((aligned(8))); ++ ++/* head of the fd list */ ++struct fdlist { ++ int first; ++ int last; ++} __attribute__ ((aligned(8))); ++ ++ + /* info about one given fd */ + struct fdtab { + __decl_hathreads(HA_SPINLOCK_T lock); + unsigned long thread_mask; /* mask of thread IDs authorized to process the task */ + unsigned long polled_mask; /* mask of thread IDs currently polling this fd */ ++ struct fdlist_entry update; /* Entry in the global update list */ + unsigned long update_mask; /* mask of thread IDs having an update for fd */ + void (*iocb)(int fd); /* I/O handler */ + void *owner; /* the connection or listener associated with this fd, NULL if closed */ +diff --git a/src/ev_epoll.c b/src/ev_epoll.c +index 124b8163..adc15acd 100644 +--- a/src/ev_epoll.c ++++ b/src/ev_epoll.c +@@ -59,13 +59,51 @@ REGPRM1 static void __fd_clo(int fd) + } + } + ++static void _update_fd(int fd) ++{ ++ int en, opcode; ++ ++ en = fdtab[fd].state; ++ ++ if (fdtab[fd].polled_mask & tid_bit) { ++ if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { ++ /* fd removed from poll list */ ++ opcode = EPOLL_CTL_DEL; ++ HA_ATOMIC_AND(&fdtab[fd].polled_mask, ~tid_bit); ++ } ++ else { ++ /* fd status changed */ ++ opcode = EPOLL_CTL_MOD; ++ } ++ } ++ else if ((fdtab[fd].thread_mask & tid_bit) && (en & FD_EV_POLLED_RW)) { ++ /* new fd in the poll list */ ++ opcode = EPOLL_CTL_ADD; ++ HA_ATOMIC_OR(&fdtab[fd].polled_mask, tid_bit); ++ } ++ else { ++ return; ++ } ++ ++ /* construct the epoll events based on new state */ ++ ev.events = 0; ++ if (en & FD_EV_POLLED_R) ++ ev.events |= EPOLLIN | EPOLLRDHUP; ++ ++ if (en & FD_EV_POLLED_W) ++ ev.events |= EPOLLOUT; ++ ++ ev.data.fd = fd; ++ epoll_ctl(epoll_fd[tid], opcode, fd, &ev); ++} ++ + /* + * Linux epoll() poller + */ + REGPRM2 static void _do_poll(struct poller *p, int exp) + { + int status, eo, en; +- int fd, opcode; ++ int fd; + int count; + int updt_idx; + int wait_time; +@@ -89,39 +127,31 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) + en = fd_compute_new_polled_status(eo); + fdtab[fd].state = en; + HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); +- +- if (fdtab[fd].polled_mask & tid_bit) { +- if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { +- /* fd removed from poll list */ +- opcode = EPOLL_CTL_DEL; +- HA_ATOMIC_AND(&fdtab[fd].polled_mask, ~tid_bit); +- } +- else { +- /* fd status changed */ +- opcode = EPOLL_CTL_MOD; +- } +- } +- else if ((fdtab[fd].thread_mask & tid_bit) && (en & FD_EV_POLLED_RW)) { +- /* new fd in the poll list */ +- opcode = EPOLL_CTL_ADD; +- HA_ATOMIC_OR(&fdtab[fd].polled_mask, tid_bit); +- } ++ _update_fd(fd); ++ } ++ fd_nbupdt = 0; ++ /* Scan the global update list */ ++ HA_SPIN_LOCK(FD_UPDATE_LOCK, &fd_updt_lock); ++ for (fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) { ++ HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); ++ if (fdtab[fd].update_mask & tid_bit) ++ done_update_polling(fd); + else { ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + continue; + } ++ fdtab[fd].new = 0; + +- /* construct the epoll events based on new state */ +- ev.events = 0; +- if (en & FD_EV_POLLED_R) +- ev.events |= EPOLLIN | EPOLLRDHUP; +- +- if (en & FD_EV_POLLED_W) +- ev.events |= EPOLLOUT; ++ eo = fdtab[fd].state; ++ en = fd_compute_new_polled_status(eo); ++ fdtab[fd].state = en; + +- ev.data.fd = fd; +- epoll_ctl(epoll_fd[tid], opcode, fd, &ev); ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ if (!fdtab[fd].owner) ++ continue; ++ _update_fd(fd); + } +- fd_nbupdt = 0; ++ HA_SPIN_UNLOCK(FD_UPDATE_LOCK, &fd_updt_lock); + + /* compute the epoll_wait() timeout */ + if (!exp) +@@ -208,8 +238,10 @@ static int init_epoll_per_thread() + * fd for this thread. Let's just mark them as updated, the poller will + * do the rest. + */ +- for (fd = 0; fd < maxfd; fd++) ++ for (fd = 0; fd < maxfd; fd++) { ++ HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); + updt_fd_polling(fd); ++ } + + return 1; + fail_fd: +diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c +index 8cd6dd84..642de8b3 100644 +--- a/src/ev_kqueue.c ++++ b/src/ev_kqueue.c +@@ -33,6 +33,41 @@ static int kqueue_fd[MAX_THREADS]; // per-thread kqueue_fd + static THREAD_LOCAL struct kevent *kev = NULL; + static struct kevent *kev_out = NULL; // Trash buffer for kevent() to write the eventlist in + ++static int _update_fd(int fd, int start) ++{ ++ int en; ++ int changes = start; ++ ++ en = fdtab[fd].state; ++ ++ if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { ++ if (!(fdtab[fd].polled_mask & tid_bit)) { ++ /* fd was not watched, it's still not */ ++ return 0; ++ } ++ /* fd totally removed from poll list */ ++ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); ++ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); ++ HA_ATOMIC_AND(&fdtab[fd].polled_mask, ~tid_bit); ++ } ++ else { ++ /* OK fd has to be monitored, it was either added or changed */ ++ ++ if (en & FD_EV_POLLED_R) ++ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_ADD, 0, 0, NULL); ++ else if (fdtab[fd].polled_mask & tid_bit) ++ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); ++ ++ if (en & FD_EV_POLLED_W) ++ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); ++ else if (fdtab[fd].polled_mask & tid_bit) ++ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); ++ ++ HA_ATOMIC_OR(&fdtab[fd].polled_mask, tid_bit); ++ } ++ return changes; ++} ++ + /* + * kqueue() poller + */ +@@ -66,32 +101,32 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) + fdtab[fd].state = en; + HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + +- if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { +- if (!(fdtab[fd].polled_mask & tid_bit)) { +- /* fd was not watched, it's still not */ +- continue; +- } +- /* fd totally removed from poll list */ +- EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); +- EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); +- HA_ATOMIC_AND(&fdtab[fd].polled_mask, ~tid_bit); +- } +- else { +- /* OK fd has to be monitored, it was either added or changed */ ++ changes = _update_fd(fd, changes); ++ } + +- if (en & FD_EV_POLLED_R) +- EV_SET(&kev[changes++], fd, EVFILT_READ, EV_ADD, 0, 0, NULL); +- else if (fdtab[fd].polled_mask & tid_bit) +- EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); ++ /* Scan the global update list */ ++ HA_SPIN_LOCK(FD_UPDATE_LOCK, &fd_updt_lock); ++ for (fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) { ++ HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); ++ if (fdtab[fd].update_mask & tid_bit) ++ done_update_polling(fd); ++ else { ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ continue; ++ } ++ fdtab[fd].new = 0; + +- if (en & FD_EV_POLLED_W) +- EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); +- else if (fdtab[fd].polled_mask & tid_bit) +- EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); ++ eo = fdtab[fd].state; ++ en = fd_compute_new_polled_status(eo); ++ fdtab[fd].state = en; + +- HA_ATOMIC_OR(&fdtab[fd].polled_mask, tid_bit); +- } ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ if (!fdtab[fd].owner) ++ continue; ++ changes = _update_fd(fd, changes); + } ++ HA_SPIN_UNLOCK(FD_UPDATE_LOCK, &fd_updt_lock); ++ + if (changes) { + #ifdef EV_RECEIPT + kev[0].flags |= EV_RECEIPT; +@@ -189,8 +224,10 @@ static int init_kqueue_per_thread() + * fd for this thread. Let's just mark them as updated, the poller will + * do the rest. + */ +- for (fd = 0; fd < maxfd; fd++) ++ for (fd = 0; fd < maxfd; fd++) { ++ HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); + updt_fd_polling(fd); ++ } + + return 1; + fail_fd: +diff --git a/src/ev_poll.c b/src/ev_poll.c +index b7cc0bb3..c913ced2 100644 +--- a/src/ev_poll.c ++++ b/src/ev_poll.c +@@ -104,6 +104,51 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) + HA_SPIN_UNLOCK(POLL_LOCK, &poll_lock); + } + } ++ HA_SPIN_LOCK(FD_UPDATE_LOCK, &fd_updt_lock); ++ for (fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) { ++ HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); ++ if (fdtab[fd].update_mask & tid_bit) { ++ /* Cheat a bit, as the state is global to all pollers ++ * we don't need every thread ot take care of the ++ * update. ++ */ ++ fdtab[fd].update_mask &= ~all_threads_mask; ++ done_update_polling(fd); ++ } else { ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ continue; ++ } ++ ++ if (!fdtab[fd].owner) { ++ activity[tid].poll_drop++; ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ continue; ++ } ++ ++ fdtab[fd].new = 0; ++ ++ eo = fdtab[fd].state; ++ en = fd_compute_new_polled_status(eo); ++ fdtab[fd].state = en; ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ ++ if ((eo ^ en) & FD_EV_POLLED_RW) { ++ /* poll status changed, update the lists */ ++ HA_SPIN_LOCK(POLL_LOCK, &poll_lock); ++ if ((eo & ~en) & FD_EV_POLLED_R) ++ hap_fd_clr(fd, fd_evts[DIR_RD]); ++ else if ((en & ~eo) & FD_EV_POLLED_R) ++ hap_fd_set(fd, fd_evts[DIR_RD]); ++ ++ if ((eo & ~en) & FD_EV_POLLED_W) ++ hap_fd_clr(fd, fd_evts[DIR_WR]); ++ else if ((en & ~eo) & FD_EV_POLLED_W) ++ hap_fd_set(fd, fd_evts[DIR_WR]); ++ HA_SPIN_UNLOCK(POLL_LOCK, &poll_lock); ++ } ++ ++ } ++ HA_SPIN_UNLOCK(FD_UPDATE_LOCK, &fd_updt_lock); + fd_nbupdt = 0; + + nbfd = 0; +diff --git a/src/ev_select.c b/src/ev_select.c +index 5f3486ed..bde923ea 100644 +--- a/src/ev_select.c ++++ b/src/ev_select.c +@@ -70,7 +70,42 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) + en = fd_compute_new_polled_status(eo); + fdtab[fd].state = en; + HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ if ((eo ^ en) & FD_EV_POLLED_RW) { ++ /* poll status changed, update the lists */ ++ HA_SPIN_LOCK(POLL_LOCK, &poll_lock); ++ if ((eo & ~en) & FD_EV_POLLED_R) ++ FD_CLR(fd, fd_evts[DIR_RD]); ++ else if ((en & ~eo) & FD_EV_POLLED_R) ++ FD_SET(fd, fd_evts[DIR_RD]); ++ ++ if ((eo & ~en) & FD_EV_POLLED_W) ++ FD_CLR(fd, fd_evts[DIR_WR]); ++ else if ((en & ~eo) & FD_EV_POLLED_W) ++ FD_SET(fd, fd_evts[DIR_WR]); ++ HA_SPIN_UNLOCK(POLL_LOCK, &poll_lock); ++ } ++ } ++ HA_SPIN_LOCK(FD_UPDATE_LOCK, &fd_updt_lock); ++ for (fd = update_list.first; fd != -1; fd = fdtab[fd].update.next) { ++ HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); ++ if (fdtab[fd].update_mask & tid_bit) { ++ /* Cheat a bit, as the state is global to all pollers ++ * we don't need every thread ot take care of the ++ * update. ++ */ ++ fdtab[fd].update_mask &= ~all_threads_mask; ++ done_update_polling(fd); ++ } else { ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); ++ continue; ++ } + ++ fdtab[fd].new = 0; ++ ++ eo = fdtab[fd].state; ++ en = fd_compute_new_polled_status(eo); ++ fdtab[fd].state = en; ++ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); + if ((eo ^ en) & FD_EV_POLLED_RW) { + /* poll status changed, update the lists */ + HA_SPIN_LOCK(POLL_LOCK, &poll_lock); +@@ -85,7 +120,9 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) + FD_SET(fd, fd_evts[DIR_WR]); + HA_SPIN_UNLOCK(POLL_LOCK, &poll_lock); + } ++ + } ++ HA_SPIN_UNLOCK(FD_UPDATE_LOCK, &fd_updt_lock); + fd_nbupdt = 0; + + /* let's restore fdset state */ +diff --git a/src/fd.c b/src/fd.c +index b64130ed..a134e93e 100644 +--- a/src/fd.c ++++ b/src/fd.c +@@ -175,9 +175,12 @@ unsigned long fd_cache_mask = 0; // Mask of threads with events in the cache + THREAD_LOCAL int *fd_updt = NULL; // FD updates list + THREAD_LOCAL int fd_nbupdt = 0; // number of updates in the list + ++struct fdlist update_list; // Global update list + __decl_hathreads(HA_SPINLOCK_T fdtab_lock); /* global lock to protect fdtab array */ + __decl_hathreads(HA_RWLOCK_T fdcache_lock); /* global lock to protect fd_cache array */ + __decl_hathreads(HA_SPINLOCK_T poll_lock); /* global lock to protect poll info */ ++__decl_hathreads(HA_SPINLOCK_T) fd_updt_lock; /* global lock to protect the update list */ ++ + + /* Deletes an FD from the fdsets, and recomputes the maxfd limit. + * The file descriptor is also closed. +@@ -341,6 +344,9 @@ int init_pollers() + HA_SPIN_INIT(&fdtab_lock); + HA_RWLOCK_INIT(&fdcache_lock); + HA_SPIN_INIT(&poll_lock); ++ HA_SPIN_INIT(&fd_updt_lock); ++ update_list.first = update_list.last = -1; ++ + do { + bp = NULL; + for (p = 0; p < nbpollers; p++) +diff --git a/src/hathreads.c b/src/hathreads.c +index 944a0d5b..66ed482a 100644 +--- a/src/hathreads.c ++++ b/src/hathreads.c +@@ -31,7 +31,7 @@ void thread_sync_io_handler(int fd) + static HA_SPINLOCK_T sync_lock; + static int threads_sync_pipe[2]; + static unsigned long threads_want_sync = 0; +-static unsigned long all_threads_mask = 0; ++unsigned long all_threads_mask = 0; + + #if defined(DEBUG_THREAD) || defined(DEBUG_FULL) + struct lock_stat lock_stats[LOCK_LABELS]; diff --git a/net/haproxy/patches/0018-BUG-MEDIUM-ssl-properly-protect-SSL-cert-generation.patch b/net/haproxy/patches/0018-BUG-MEDIUM-ssl-properly-protect-SSL-cert-generation.patch new file mode 100644 index 000000000..b94bce939 --- /dev/null +++ b/net/haproxy/patches/0018-BUG-MEDIUM-ssl-properly-protect-SSL-cert-generation.patch @@ -0,0 +1,45 @@ +commit f571613244e4c02ca7aada30c89a6244d09d58d4 +Author: Willy Tarreau +Date: Thu May 17 10:56:47 2018 +0200 + + BUG/MEDIUM: ssl: properly protect SSL cert generation + + Commit 821bb9b ("MAJOR: threads/ssl: Make SSL part thread-safe") added + insufficient locking to the cert lookup and generation code : it uses + lru64_lookup(), which will automatically remove and add a list element + to the LRU list. It cannot be simply read-locked. + + A long-term improvement should consist in using a lockless mechanism + in lru64_lookup() to safely move the list element at the head. For now + let's simply use a write lock during the lookup. The effect will be + minimal since it's used only in conjunction with automatically generated + certificates, which are much more expensive and rarely used. + + This fix must be backported to 1.8. + + (cherry picked from commit 03f4ec47d9ffff629b07dcba9f0f134a7c7e44b2) + Signed-off-by: William Lallemand + +diff --git a/src/ssl_sock.c b/src/ssl_sock.c +index 1196d111..9fb2bb15 100644 +--- a/src/ssl_sock.c ++++ b/src/ssl_sock.c +@@ -1812,15 +1812,15 @@ ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SS + struct lru64 *lru = NULL; + + if (ssl_ctx_lru_tree) { +- HA_RWLOCK_RDLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); ++ HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + lru = lru64_lookup(key, ssl_ctx_lru_tree, bind_conf->ca_sign_cert, 0); + if (lru && lru->domain) { + if (ssl) + SSL_set_SSL_CTX(ssl, (SSL_CTX *)lru->data); +- HA_RWLOCK_RDUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); ++ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return (SSL_CTX *)lru->data; + } +- HA_RWLOCK_RDUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); ++ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + } + return NULL; + }