You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

186 lines
4.9 KiB

  1. commit cd753064396f9563640fef940ce2a89e192042b1
  2. Author: Olivier Houchard <ohouchard@haproxy.com>
  3. Date: Thu Dec 21 17:13:05 2017 +0100
  4. MINOR: threads: Introduce double-width CAS on x86_64 and arm.
  5. Introduce double-width compare-and-swap on arches that support it, right now
  6. x86_64, arm, and aarch64.
  7. Also introduce functions to do memory barriers.
  8. (cherry picked from commit f61f0cb95ffbfe403219226d427cd292ca79965a)
  9. [wt: this is backported only to have the barriers for the new rdv point]
  10. Signed-off-by: Willy Tarreau <w@1wt.eu>
  11. diff --git a/include/common/hathreads.h b/include/common/hathreads.h
  12. index 25cadf10..543ab95c 100644
  13. --- a/include/common/hathreads.h
  14. +++ b/include/common/hathreads.h
  15. @@ -98,6 +98,19 @@ extern THREAD_LOCAL unsigned long tid_bit; /* The bit corresponding to the threa
  16. #define ha_sigmask(how, set, oldset) sigprocmask(how, set, oldset)
  17. +
  18. +static inline void __ha_barrier_load(void)
  19. +{
  20. +}
  21. +
  22. +static inline void __ha_barrier_store(void)
  23. +{
  24. +}
  25. +
  26. +static inline void __ha_barrier_full(void)
  27. +{
  28. +}
  29. +
  30. #else /* USE_THREAD */
  31. #include <stdio.h>
  32. @@ -694,8 +707,147 @@ static inline void __spin_unlock(enum lock_label lbl, struct ha_spinlock *l,
  33. #endif /* DEBUG_THREAD */
  34. +#ifdef __x86_64__
  35. +#define HA_HAVE_CAS_DW 1
  36. +#define HA_CAS_IS_8B
  37. +static __inline int
  38. +__ha_cas_dw(void *target, void *compare, const void *set)
  39. +{
  40. + char ret;
  41. +
  42. + __asm __volatile("lock cmpxchg16b %0; setz %3"
  43. + : "+m" (*(void **)target),
  44. + "=a" (((void **)compare)[0]),
  45. + "=d" (((void **)compare)[1]),
  46. + "=q" (ret)
  47. + : "a" (((void **)compare)[0]),
  48. + "d" (((void **)compare)[1]),
  49. + "b" (((const void **)set)[0]),
  50. + "c" (((const void **)set)[1])
  51. + : "memory", "cc");
  52. + return (ret);
  53. +}
  54. +
  55. +static __inline void
  56. +__ha_barrier_load(void)
  57. +{
  58. + __asm __volatile("lfence" ::: "memory");
  59. +}
  60. +
  61. +static __inline void
  62. +__ha_barrier_store(void)
  63. +{
  64. + __asm __volatile("sfence" ::: "memory");
  65. +}
  66. +
  67. +static __inline void
  68. +__ha_barrier_full(void)
  69. +{
  70. + __asm __volatile("mfence" ::: "memory");
  71. +}
  72. +
  73. +#elif defined(__arm__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__))
  74. +#define HA_HAVE_CAS_DW 1
  75. +static __inline void
  76. +__ha_barrier_load(void)
  77. +{
  78. + __asm __volatile("dmb" ::: "memory");
  79. +}
  80. +
  81. +static __inline void
  82. +__ha_barrier_store(void)
  83. +{
  84. + __asm __volatile("dsb" ::: "memory");
  85. +}
  86. +
  87. +static __inline void
  88. +__ha_barrier_full(void)
  89. +{
  90. + __asm __volatile("dmb" ::: "memory");
  91. +}
  92. +
  93. +static __inline int __ha_cas_dw(void *target, void *compare, void *set)
  94. +{
  95. + uint64_t previous;
  96. + int tmp;
  97. +
  98. + __asm __volatile("1:"
  99. + "ldrexd %0, [%4];"
  100. + "cmp %Q0, %Q2;"
  101. + "ittt eq;"
  102. + "cmpeq %R0, %R2;"
  103. + "strexdeq %1, %3, [%4];"
  104. + "cmpeq %1, #1;"
  105. + "beq 1b;"
  106. + : "=&r" (previous), "=&r" (tmp)
  107. + : "r" (compare), "r" (set), "r" (target)
  108. + : "memory", "cc");
  109. + tmp = (previous == *(uint64_t *)compare);
  110. + *(uint64_t *)compare = previous;
  111. + return (tmp);
  112. +}
  113. +
  114. +#elif defined (__aarch64__)
  115. +#define HA_HAVE_CAS_DW 1
  116. +#define HA_CAS_IS_8B
  117. +
  118. +static __inline void
  119. +__ha_barrier_load(void)
  120. +{
  121. + __asm __volatile("dmb ishld" ::: "memory");
  122. +}
  123. +
  124. +static __inline void
  125. +__ha_barrier_store(void)
  126. +{
  127. + __asm __volatile("dmb ishst" ::: "memory");
  128. +}
  129. +
  130. +static __inline void
  131. +__ha_barrier_full(void)
  132. +{
  133. + __asm __volatile("dmb ish" ::: "memory");
  134. +}
  135. +
  136. +static __inline int __ha_cas_dw(void *target, void *compare, void *set)
  137. +{
  138. + void *value[2];
  139. + uint64_t tmp1, tmp2;
  140. +
  141. + __asm__ __volatile__("1:"
  142. + "ldxp %0, %1, [%4];"
  143. + "mov %2, %0;"
  144. + "mov %3, %1;"
  145. + "eor %0, %0, %5;"
  146. + "eor %1, %1, %6;"
  147. + "orr %1, %0, %1;"
  148. + "mov %w0, #0;"
  149. + "cbnz %1, 2f;"
  150. + "stxp %w0, %7, %8, [%4];"
  151. + "cbnz %w0, 1b;"
  152. + "mov %w0, #1;"
  153. + "2:"
  154. + : "=&r" (tmp1), "=&r" (tmp2), "=&r" (value[0]), "=&r" (value[1])
  155. + : "r" (target), "r" (((void **)(compare))[0]), "r" (((void **)(compare))[1]), "r" (((void **)(set))[0]), "r" (((void **)(set))[1])
  156. + : "cc", "memory");
  157. +
  158. + memcpy(compare, &value, sizeof(value));
  159. + return (tmp1);
  160. +}
  161. +
  162. +#else
  163. +#define __ha_barrier_load __sync_synchronize
  164. +#define __ha_barrier_store __sync_synchronize
  165. +#define __ha_barrier_full __sync_synchronize
  166. +#endif
  167. +
  168. #endif /* USE_THREAD */
  169. +static inline void __ha_compiler_barrier(void)
  170. +{
  171. + __asm __volatile("" ::: "memory");
  172. +}
  173. +
  174. /* Dummy I/O handler used by the sync pipe.*/
  175. void thread_sync_io_handler(int fd);
  176. int parse_nbthread(const char *arg, char **err);