You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

642 lines
17 KiB

  1. Add support for use of the system timezone database, rather
  2. than embedding a copy. Discussed upstream but was not desired.
  3. History:
  4. r11: adopted to php 5.6.9
  5. r10: make timezone case insensitive
  6. r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold)
  7. r8: fix compile error without --with-system-tzdata configured
  8. r7: improve check for valid timezone id to exclude directories
  9. r6: fix fd leak in r5, fix country code/BC flag use in.
  10. timezone_identifiers_list() using system db,
  11. fix use of PECL timezonedb to override system db,
  12. r5: reverts addition of "System/Localtime" fake tzname.
  13. updated for 5.3.0, parses zone.tab to pick up mapping between
  14. timezone name, country code and long/lat coords
  15. r4: added "System/Localtime" tzname which uses /etc/localtime
  16. r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
  17. r2: add filesystem trawl to set up name alias index
  18. r1: initial revision
  19. diff -Naur php-5.6.9.orig/ext/date/lib/parse_tz.c php-5.6.9/ext/date/lib/parse_tz.c
  20. --- php-5.6.9.orig/ext/date/lib/parse_tz.c 2015-05-14 01:13:33.000000000 +0200
  21. +++ php-5.6.9/ext/date/lib/parse_tz.c 2015-05-18 22:40:55.000000000 +0200
  22. @@ -18,8 +18,22 @@
  23. /* $Id$ */
  24. +#ifndef PATH_MAX
  25. +#define PATH_MAX 4096
  26. +#endif
  27. +
  28. #include "timelib.h"
  29. +#ifdef HAVE_SYSTEM_TZDATA
  30. +#include <sys/mman.h>
  31. +#include <sys/stat.h>
  32. +#include <limits.h>
  33. +#include <fcntl.h>
  34. +#include <unistd.h>
  35. +
  36. +#include "php_scandir.h"
  37. +#endif
  38. +
  39. #include <stdio.h>
  40. #ifdef HAVE_LOCALE_H
  41. @@ -31,7 +45,12 @@
  42. #else
  43. #include <strings.h>
  44. #endif
  45. +
  46. +#ifndef HAVE_SYSTEM_TZDATA
  47. #include "timezonedb.h"
  48. +#endif
  49. +
  50. +#include <ctype.h>
  51. #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
  52. # if defined(__LITTLE_ENDIAN__)
  53. @@ -53,6 +72,11 @@
  54. {
  55. uint32_t version;
  56. + if (memcmp(tzf, "TZif", 4) == 0) {
  57. + *tzf += 20;
  58. + return -1;
  59. + }
  60. +
  61. /* read ID */
  62. version = (*tzf)[3] - '0';
  63. *tzf += 4;
  64. @@ -296,7 +320,406 @@
  65. }
  66. }
  67. -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
  68. +#ifdef HAVE_SYSTEM_TZDATA
  69. +
  70. +#ifdef HAVE_SYSTEM_TZDATA_PREFIX
  71. +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
  72. +#else
  73. +#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
  74. +#endif
  75. +
  76. +/* System timezone database pointer. */
  77. +static const timelib_tzdb *timezonedb_system;
  78. +
  79. +/* Hash table entry for the cache of the zone.tab mapping table. */
  80. +struct location_info {
  81. + char code[2];
  82. + double latitude, longitude;
  83. + char name[64];
  84. + char *comment;
  85. + struct location_info *next;
  86. +};
  87. +
  88. +/* Cache of zone.tab. */
  89. +static struct location_info **system_location_table;
  90. +
  91. +/* Size of the zone.tab hash table; a random-ish prime big enough to
  92. + * prevent too many collisions. */
  93. +#define LOCINFO_HASH_SIZE (1021)
  94. +
  95. +/* Compute a case insensitive hash of str */
  96. +static uint32_t tz_hash(const char *str)
  97. +{
  98. + const unsigned char *p = (const unsigned char *)str;
  99. + uint32_t hash = 5381;
  100. + int c;
  101. +
  102. + while ((c = tolower(*p++)) != '\0') {
  103. + hash = (hash << 5) ^ hash ^ c;
  104. + }
  105. +
  106. + return hash % LOCINFO_HASH_SIZE;
  107. +}
  108. +
  109. +/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
  110. + * parsed string on success, or NULL on parse error. On success,
  111. + * writes the parsed number to *result. */
  112. +static char *parse_iso6709(char *p, double *result)
  113. +{
  114. + double v, sign;
  115. + char *pend;
  116. + size_t len;
  117. +
  118. + if (*p == '+')
  119. + sign = 1.0;
  120. + else if (*p == '-')
  121. + sign = -1.0;
  122. + else
  123. + return NULL;
  124. +
  125. + p++;
  126. + for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
  127. + ;;
  128. +
  129. + /* Annoying encoding used by zone.tab has no decimal point, so use
  130. + * the length to determine the format:
  131. + *
  132. + * 4 = DDMM
  133. + * 5 = DDDMM
  134. + * 6 = DDMMSS
  135. + * 7 = DDDMMSS
  136. + */
  137. + len = pend - p;
  138. + if (len < 4 || len > 7) {
  139. + return NULL;
  140. + }
  141. +
  142. + /* p => [D]DD */
  143. + v = (p[0] - '0') * 10.0 + (p[1] - '0');
  144. + p += 2;
  145. + if (len == 5 || len == 7)
  146. + v = v * 10.0 + (*p++ - '0');
  147. + /* p => MM[SS] */
  148. + v += (10.0 * (p[0] - '0')
  149. + + p[1] - '0') / 60.0;
  150. + p += 2;
  151. + /* p => [SS] */
  152. + if (len > 5) {
  153. + v += (10.0 * (p[0] - '0')
  154. + + p[1] - '0') / 3600.0;
  155. + p += 2;
  156. + }
  157. +
  158. + /* Round to five decimal place, not because it's a good idea,
  159. + * but, because the builtin data uses rounded data, so, match
  160. + * that. */
  161. + *result = round(v * sign * 100000.0) / 100000.0;
  162. +
  163. + return p;
  164. +}
  165. +
  166. +/* This function parses the zone.tab file to build up the mapping of
  167. + * timezone to country code and geographic location, and returns a
  168. + * hash table. The hash table is indexed by the function:
  169. + *
  170. + * tz_hash(timezone-name)
  171. + */
  172. +static struct location_info **create_location_table(void)
  173. +{
  174. + struct location_info **li, *i;
  175. + char zone_tab[PATH_MAX];
  176. + char line[512];
  177. + FILE *fp;
  178. +
  179. + strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
  180. +
  181. + fp = fopen(zone_tab, "r");
  182. + if (!fp) {
  183. + return NULL;
  184. + }
  185. +
  186. + li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
  187. +
  188. + while (fgets(line, sizeof line, fp)) {
  189. + char *p = line, *code, *name, *comment;
  190. + uint32_t hash;
  191. + double latitude, longitude;
  192. +
  193. + while (isspace(*p))
  194. + p++;
  195. +
  196. + if (*p == '#' || *p == '\0' || *p == '\n')
  197. + continue;
  198. +
  199. + if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
  200. + continue;
  201. +
  202. + /* code => AA */
  203. + code = p;
  204. + p[2] = 0;
  205. + p += 3;
  206. +
  207. + /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
  208. + p = parse_iso6709(p, &latitude);
  209. + if (!p) {
  210. + continue;
  211. + }
  212. + p = parse_iso6709(p, &longitude);
  213. + if (!p) {
  214. + continue;
  215. + }
  216. +
  217. + if (!p || *p != '\t') {
  218. + continue;
  219. + }
  220. +
  221. + /* name = string */
  222. + name = ++p;
  223. + while (*p != '\t' && *p && *p != '\n')
  224. + p++;
  225. +
  226. + *p++ = '\0';
  227. +
  228. + /* comment = string */
  229. + comment = p;
  230. + while (*p != '\t' && *p && *p != '\n')
  231. + p++;
  232. +
  233. + if (*p == '\n' || *p == '\t')
  234. + *p = '\0';
  235. +
  236. + hash = tz_hash(name);
  237. + i = malloc(sizeof *i);
  238. + memcpy(i->code, code, 2);
  239. + strncpy(i->name, name, sizeof i->name);
  240. + i->comment = strdup(comment);
  241. + i->longitude = longitude;
  242. + i->latitude = latitude;
  243. + i->next = li[hash];
  244. + li[hash] = i;
  245. + /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
  246. + }
  247. +
  248. + fclose(fp);
  249. +
  250. + return li;
  251. +}
  252. +
  253. +/* Return location info from hash table, using given timezone name.
  254. + * Returns NULL if the name could not be found. */
  255. +const struct location_info *find_zone_info(struct location_info **li,
  256. + const char *name)
  257. +{
  258. + uint32_t hash = tz_hash(name);
  259. + const struct location_info *l;
  260. +
  261. + if (!li) {
  262. + return NULL;
  263. + }
  264. +
  265. + for (l = li[hash]; l; l = l->next) {
  266. + if (strcasecmp(l->name, name) == 0)
  267. + return l;
  268. + }
  269. +
  270. + return NULL;
  271. +}
  272. +
  273. +/* Filter out some non-tzdata files and the posix/right databases, if
  274. + * present. */
  275. +static int index_filter(const struct dirent *ent)
  276. +{
  277. + return strcmp(ent->d_name, ".") != 0
  278. + && strcmp(ent->d_name, "..") != 0
  279. + && strcmp(ent->d_name, "posix") != 0
  280. + && strcmp(ent->d_name, "posixrules") != 0
  281. + && strcmp(ent->d_name, "right") != 0
  282. + && strstr(ent->d_name, ".tab") == NULL;
  283. +}
  284. +
  285. +static int sysdbcmp(const void *first, const void *second)
  286. +{
  287. + const timelib_tzdb_index_entry *alpha = first, *beta = second;
  288. +
  289. + return strcmp(alpha->id, beta->id);
  290. +}
  291. +
  292. +
  293. +/* Create the zone identifier index by trawling the filesystem. */
  294. +static void create_zone_index(timelib_tzdb *db)
  295. +{
  296. + size_t dirstack_size, dirstack_top;
  297. + size_t index_size, index_next;
  298. + timelib_tzdb_index_entry *db_index;
  299. + char **dirstack;
  300. +
  301. + /* LIFO stack to hold directory entries to scan; each slot is a
  302. + * directory name relative to the zoneinfo prefix. */
  303. + dirstack_size = 32;
  304. + dirstack = malloc(dirstack_size * sizeof *dirstack);
  305. + dirstack_top = 1;
  306. + dirstack[0] = strdup("");
  307. +
  308. + /* Index array. */
  309. + index_size = 64;
  310. + db_index = malloc(index_size * sizeof *db_index);
  311. + index_next = 0;
  312. +
  313. + do {
  314. + struct dirent **ents;
  315. + char name[PATH_MAX], *top;
  316. + int count;
  317. +
  318. + /* Pop the top stack entry, and iterate through its contents. */
  319. + top = dirstack[--dirstack_top];
  320. + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
  321. +
  322. + count = php_scandir(name, &ents, index_filter, php_alphasort);
  323. +
  324. + while (count > 0) {
  325. + struct stat st;
  326. + const char *leaf = ents[count - 1]->d_name;
  327. +
  328. + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s",
  329. + top, leaf);
  330. +
  331. + if (strlen(name) && stat(name, &st) == 0) {
  332. + /* Name, relative to the zoneinfo prefix. */
  333. + const char *root = top;
  334. +
  335. + if (root[0] == '/') root++;
  336. +
  337. + snprintf(name, sizeof name, "%s%s%s", root,
  338. + *root ? "/": "", leaf);
  339. +
  340. + if (S_ISDIR(st.st_mode)) {
  341. + if (dirstack_top == dirstack_size) {
  342. + dirstack_size *= 2;
  343. + dirstack = realloc(dirstack,
  344. + dirstack_size * sizeof *dirstack);
  345. + }
  346. + dirstack[dirstack_top++] = strdup(name);
  347. + }
  348. + else {
  349. + if (index_next == index_size) {
  350. + index_size *= 2;
  351. + db_index = realloc(db_index,
  352. + index_size * sizeof *db_index);
  353. + }
  354. +
  355. + db_index[index_next++].id = strdup(name);
  356. + }
  357. + }
  358. +
  359. + free(ents[--count]);
  360. + }
  361. +
  362. + if (count != -1) free(ents);
  363. + free(top);
  364. + } while (dirstack_top);
  365. +
  366. + qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
  367. +
  368. + db->index = db_index;
  369. + db->index_size = index_next;
  370. +
  371. + free(dirstack);
  372. +}
  373. +
  374. +#define FAKE_HEADER "1234\0??\1??"
  375. +#define FAKE_UTC_POS (7 - 4)
  376. +
  377. +/* Create a fake data segment for database 'sysdb'. */
  378. +static void fake_data_segment(timelib_tzdb *sysdb,
  379. + struct location_info **info)
  380. +{
  381. + size_t n;
  382. + char *data, *p;
  383. +
  384. + data = malloc(3 * sysdb->index_size + 7);
  385. +
  386. + p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
  387. +
  388. + for (n = 0; n < sysdb->index_size; n++) {
  389. + const struct location_info *li;
  390. + timelib_tzdb_index_entry *ent;
  391. +
  392. + ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
  393. +
  394. + /* Lookup the timezone name in the hash table. */
  395. + if (strcmp(ent->id, "UTC") == 0) {
  396. + ent->pos = FAKE_UTC_POS;
  397. + continue;
  398. + }
  399. +
  400. + li = find_zone_info(info, ent->id);
  401. + if (li) {
  402. + /* If found, append the BC byte and the
  403. + * country code; set the position for this
  404. + * section of timezone data. */
  405. + ent->pos = (p - data) - 4;
  406. + *p++ = '\1';
  407. + *p++ = li->code[0];
  408. + *p++ = li->code[1];
  409. + }
  410. + else {
  411. + /* If not found, the timezone data can
  412. + * point at the header. */
  413. + ent->pos = 0;
  414. + }
  415. + }
  416. +
  417. + sysdb->data = (unsigned char *)data;
  418. +}
  419. +
  420. +/* Returns true if the passed-in stat structure describes a
  421. + * probably-valid timezone file. */
  422. +static int is_valid_tzfile(const struct stat *st)
  423. +{
  424. + return S_ISREG(st->st_mode) && st->st_size > 20;
  425. +}
  426. +
  427. +/* Return the mmap()ed tzfile if found, else NULL. On success, the
  428. + * length of the mapped data is placed in *length. */
  429. +static char *map_tzfile(const char *timezone, size_t *length)
  430. +{
  431. + char fname[PATH_MAX];
  432. + struct stat st;
  433. + char *p;
  434. + int fd;
  435. +
  436. + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
  437. + return NULL;
  438. + }
  439. +
  440. + if (system_location_table) {
  441. + const struct location_info *li;
  442. + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
  443. + /* Use the stored name to avoid case issue */
  444. + timezone = li->name;
  445. + }
  446. + }
  447. +
  448. + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
  449. +
  450. + fd = open(fname, O_RDONLY);
  451. + if (fd == -1) {
  452. + return NULL;
  453. + } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st)) {
  454. + close(fd);
  455. + return NULL;
  456. + }
  457. +
  458. + *length = st.st_size;
  459. + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  460. + close(fd);
  461. +
  462. + return p != MAP_FAILED ? p : NULL;
  463. +}
  464. +
  465. +#endif
  466. +
  467. +static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
  468. {
  469. int left = 0, right = tzdb->index_size - 1;
  470. #ifdef HAVE_SETLOCALE
  471. @@ -335,21 +758,90 @@
  472. return 0;
  473. }
  474. +static int seek_to_tz_position(const unsigned char **tzf, char *timezone,
  475. + char **map, size_t *maplen,
  476. + const timelib_tzdb *tzdb)
  477. +{
  478. +#ifdef HAVE_SYSTEM_TZDATA
  479. + if (tzdb == timezonedb_system) {
  480. + char *orig;
  481. +
  482. + orig = map_tzfile(timezone, maplen);
  483. + if (orig == NULL) {
  484. + return 0;
  485. + }
  486. +
  487. + (*tzf) = (unsigned char *)orig;
  488. + *map = orig;
  489. +
  490. + return 1;
  491. + }
  492. + else
  493. +#endif
  494. + {
  495. + return inmem_seek_to_tz_position(tzf, timezone, tzdb);
  496. + }
  497. +}
  498. +
  499. const timelib_tzdb *timelib_builtin_db(void)
  500. {
  501. +#ifdef HAVE_SYSTEM_TZDATA
  502. + if (timezonedb_system == NULL) {
  503. + timelib_tzdb *tmp = malloc(sizeof *tmp);
  504. +
  505. + tmp->version = "0.system";
  506. + tmp->data = NULL;
  507. + create_zone_index(tmp);
  508. + system_location_table = create_location_table();
  509. + fake_data_segment(tmp, system_location_table);
  510. + timezonedb_system = tmp;
  511. + }
  512. +
  513. +
  514. + return timezonedb_system;
  515. +#else
  516. return &timezonedb_builtin;
  517. +#endif
  518. }
  519. const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
  520. {
  521. +#ifdef HAVE_SYSTEM_TZDATA
  522. + *count = timezonedb_system->index_size;
  523. + return timezonedb_system->index;
  524. +#else
  525. *count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
  526. return timezonedb_idx_builtin;
  527. +#endif
  528. }
  529. int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
  530. {
  531. const unsigned char *tzf;
  532. - return (seek_to_tz_position(&tzf, timezone, tzdb));
  533. +
  534. +#ifdef HAVE_SYSTEM_TZDATA
  535. + if (tzdb == timezonedb_system) {
  536. + char fname[PATH_MAX];
  537. + struct stat st;
  538. +
  539. + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
  540. + return 0;
  541. + }
  542. +
  543. + if (system_location_table) {
  544. + if (find_zone_info(system_location_table, timezone) != NULL) {
  545. + /* found in cache */
  546. + return 1;
  547. + }
  548. + }
  549. +
  550. + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
  551. +
  552. + return stat(fname, &st) == 0 && is_valid_tzfile(&st);
  553. + }
  554. +#endif
  555. +
  556. + return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
  557. }
  558. static void skip_64bit_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
  559. @@ -374,10 +866,12 @@
  560. timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
  561. {
  562. const unsigned char *tzf;
  563. + char *memmap = NULL;
  564. + size_t maplen;
  565. timelib_tzinfo *tmp;
  566. int version;
  567. - if (seek_to_tz_position(&tzf, timezone, tzdb)) {
  568. + if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
  569. tmp = timelib_tzinfo_ctor(timezone);
  570. version = read_preamble(&tzf, tmp);
  571. @@ -391,7 +885,34 @@
  572. skip_64bit_types(&tzf, tmp);
  573. skip_posix_string(&tzf, tmp);
  574. }
  575. - read_location(&tzf, tmp);
  576. +
  577. +#ifdef HAVE_SYSTEM_TZDATA
  578. + if (memmap) {
  579. + const struct location_info *li;
  580. +
  581. + /* TZif-style - grok the location info from the system database,
  582. + * if possible. */
  583. +
  584. + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
  585. + tmp->location.comments = strdup(li->comment);
  586. + strncpy(tmp->location.country_code, li->code, 2);
  587. + tmp->location.longitude = li->longitude;
  588. + tmp->location.latitude = li->latitude;
  589. + tmp->bc = 1;
  590. + } else {
  591. + strcpy(tmp->location.country_code, "??");
  592. + tmp->bc = 0;
  593. + tmp->location.comments = strdup("");
  594. + }
  595. +
  596. + /* Now done with the mmap segment - discard it. */
  597. + munmap(memmap, maplen);
  598. + } else
  599. +#endif
  600. + {
  601. + /* PHP-style - use the embedded info. */
  602. + read_location(&tzf, tmp);
  603. + }
  604. } else {
  605. tmp = NULL;
  606. }
  607. diff -Naur php-5.6.9.orig/ext/date/lib/timelib.m4 php-5.6.9/ext/date/lib/timelib.m4
  608. --- php-5.6.9.orig/ext/date/lib/timelib.m4 2015-05-14 01:13:33.000000000 +0200
  609. +++ php-5.6.9/ext/date/lib/timelib.m4 2015-05-18 22:31:36.000000000 +0200
  610. @@ -78,3 +78,17 @@
  611. dnl Check for strtoll, atoll
  612. AC_CHECK_FUNCS(strtoll atoll strftime)
  613. +
  614. +PHP_ARG_WITH(system-tzdata, for use of system timezone data,
  615. +[ --with-system-tzdata[=DIR] to specify use of system timezone data],
  616. +no, no)
  617. +
  618. +if test "$PHP_SYSTEM_TZDATA" != "no"; then
  619. + AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
  620. +
  621. + if test "$PHP_SYSTEM_TZDATA" != "yes"; then
  622. + AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
  623. + [Define for location of system timezone data])
  624. + fi
  625. +fi
  626. +