You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

640 lines
17 KiB

  1. Add support for use of the system timezone database, rather
  2. than embedding a copy. Discussed upstream but was not desired.
  3. History:
  4. r11: adopted to php 5.6.9
  5. r10: make timezone case insensitive
  6. r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold)
  7. r8: fix compile error without --with-system-tzdata configured
  8. r7: improve check for valid timezone id to exclude directories
  9. r6: fix fd leak in r5, fix country code/BC flag use in.
  10. timezone_identifiers_list() using system db,
  11. fix use of PECL timezonedb to override system db,
  12. r5: reverts addition of "System/Localtime" fake tzname.
  13. updated for 5.3.0, parses zone.tab to pick up mapping between
  14. timezone name, country code and long/lat coords
  15. r4: added "System/Localtime" tzname which uses /etc/localtime
  16. r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
  17. r2: add filesystem trawl to set up name alias index
  18. r1: initial revision
  19. --- a/ext/date/lib/parse_tz.c
  20. +++ b/ext/date/lib/parse_tz.c
  21. @@ -18,8 +18,22 @@
  22. /* $Id$ */
  23. +#ifndef PATH_MAX
  24. +#define PATH_MAX 4096
  25. +#endif
  26. +
  27. #include "timelib.h"
  28. +#ifdef HAVE_SYSTEM_TZDATA
  29. +#include <sys/mman.h>
  30. +#include <sys/stat.h>
  31. +#include <limits.h>
  32. +#include <fcntl.h>
  33. +#include <unistd.h>
  34. +
  35. +#include "php_scandir.h"
  36. +#endif
  37. +
  38. #include <stdio.h>
  39. #ifdef HAVE_LOCALE_H
  40. @@ -32,8 +46,12 @@
  41. #include <strings.h>
  42. #endif
  43. +#ifndef HAVE_SYSTEM_TZDATA
  44. #define TIMELIB_SUPPORTS_V2DATA
  45. #include "timezonedb.h"
  46. +#endif
  47. +
  48. +#include <ctype.h>
  49. #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
  50. # if defined(__LITTLE_ENDIAN__)
  51. @@ -55,6 +73,11 @@ static int read_preamble(const unsigned
  52. {
  53. uint32_t version;
  54. + if (memcmp(tzf, "TZif", 4) == 0) {
  55. + *tzf += 20;
  56. + return -1;
  57. + }
  58. +
  59. /* read ID */
  60. version = (*tzf)[3] - '0';
  61. *tzf += 4;
  62. @@ -298,7 +321,406 @@ void timelib_dump_tzinfo(timelib_tzinfo
  63. }
  64. }
  65. -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
  66. +#ifdef HAVE_SYSTEM_TZDATA
  67. +
  68. +#ifdef HAVE_SYSTEM_TZDATA_PREFIX
  69. +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
  70. +#else
  71. +#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
  72. +#endif
  73. +
  74. +/* System timezone database pointer. */
  75. +static const timelib_tzdb *timezonedb_system;
  76. +
  77. +/* Hash table entry for the cache of the zone.tab mapping table. */
  78. +struct location_info {
  79. + char code[2];
  80. + double latitude, longitude;
  81. + char name[64];
  82. + char *comment;
  83. + struct location_info *next;
  84. +};
  85. +
  86. +/* Cache of zone.tab. */
  87. +static struct location_info **system_location_table;
  88. +
  89. +/* Size of the zone.tab hash table; a random-ish prime big enough to
  90. + * prevent too many collisions. */
  91. +#define LOCINFO_HASH_SIZE (1021)
  92. +
  93. +/* Compute a case insensitive hash of str */
  94. +static uint32_t tz_hash(const char *str)
  95. +{
  96. + const unsigned char *p = (const unsigned char *)str;
  97. + uint32_t hash = 5381;
  98. + int c;
  99. +
  100. + while ((c = tolower(*p++)) != '\0') {
  101. + hash = (hash << 5) ^ hash ^ c;
  102. + }
  103. +
  104. + return hash % LOCINFO_HASH_SIZE;
  105. +}
  106. +
  107. +/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
  108. + * parsed string on success, or NULL on parse error. On success,
  109. + * writes the parsed number to *result. */
  110. +static char *parse_iso6709(char *p, double *result)
  111. +{
  112. + double v, sign;
  113. + char *pend;
  114. + size_t len;
  115. +
  116. + if (*p == '+')
  117. + sign = 1.0;
  118. + else if (*p == '-')
  119. + sign = -1.0;
  120. + else
  121. + return NULL;
  122. +
  123. + p++;
  124. + for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
  125. + ;;
  126. +
  127. + /* Annoying encoding used by zone.tab has no decimal point, so use
  128. + * the length to determine the format:
  129. + *
  130. + * 4 = DDMM
  131. + * 5 = DDDMM
  132. + * 6 = DDMMSS
  133. + * 7 = DDDMMSS
  134. + */
  135. + len = pend - p;
  136. + if (len < 4 || len > 7) {
  137. + return NULL;
  138. + }
  139. +
  140. + /* p => [D]DD */
  141. + v = (p[0] - '0') * 10.0 + (p[1] - '0');
  142. + p += 2;
  143. + if (len == 5 || len == 7)
  144. + v = v * 10.0 + (*p++ - '0');
  145. + /* p => MM[SS] */
  146. + v += (10.0 * (p[0] - '0')
  147. + + p[1] - '0') / 60.0;
  148. + p += 2;
  149. + /* p => [SS] */
  150. + if (len > 5) {
  151. + v += (10.0 * (p[0] - '0')
  152. + + p[1] - '0') / 3600.0;
  153. + p += 2;
  154. + }
  155. +
  156. + /* Round to five decimal place, not because it's a good idea,
  157. + * but, because the builtin data uses rounded data, so, match
  158. + * that. */
  159. + *result = round(v * sign * 100000.0) / 100000.0;
  160. +
  161. + return p;
  162. +}
  163. +
  164. +/* This function parses the zone.tab file to build up the mapping of
  165. + * timezone to country code and geographic location, and returns a
  166. + * hash table. The hash table is indexed by the function:
  167. + *
  168. + * tz_hash(timezone-name)
  169. + */
  170. +static struct location_info **create_location_table(void)
  171. +{
  172. + struct location_info **li, *i;
  173. + char zone_tab[PATH_MAX];
  174. + char line[512];
  175. + FILE *fp;
  176. +
  177. + strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
  178. +
  179. + fp = fopen(zone_tab, "r");
  180. + if (!fp) {
  181. + return NULL;
  182. + }
  183. +
  184. + li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
  185. +
  186. + while (fgets(line, sizeof line, fp)) {
  187. + char *p = line, *code, *name, *comment;
  188. + uint32_t hash;
  189. + double latitude, longitude;
  190. +
  191. + while (isspace(*p))
  192. + p++;
  193. +
  194. + if (*p == '#' || *p == '\0' || *p == '\n')
  195. + continue;
  196. +
  197. + if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
  198. + continue;
  199. +
  200. + /* code => AA */
  201. + code = p;
  202. + p[2] = 0;
  203. + p += 3;
  204. +
  205. + /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
  206. + p = parse_iso6709(p, &latitude);
  207. + if (!p) {
  208. + continue;
  209. + }
  210. + p = parse_iso6709(p, &longitude);
  211. + if (!p) {
  212. + continue;
  213. + }
  214. +
  215. + if (!p || *p != '\t') {
  216. + continue;
  217. + }
  218. +
  219. + /* name = string */
  220. + name = ++p;
  221. + while (*p != '\t' && *p && *p != '\n')
  222. + p++;
  223. +
  224. + *p++ = '\0';
  225. +
  226. + /* comment = string */
  227. + comment = p;
  228. + while (*p != '\t' && *p && *p != '\n')
  229. + p++;
  230. +
  231. + if (*p == '\n' || *p == '\t')
  232. + *p = '\0';
  233. +
  234. + hash = tz_hash(name);
  235. + i = malloc(sizeof *i);
  236. + memcpy(i->code, code, 2);
  237. + strncpy(i->name, name, sizeof i->name);
  238. + i->comment = strdup(comment);
  239. + i->longitude = longitude;
  240. + i->latitude = latitude;
  241. + i->next = li[hash];
  242. + li[hash] = i;
  243. + /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
  244. + }
  245. +
  246. + fclose(fp);
  247. +
  248. + return li;
  249. +}
  250. +
  251. +/* Return location info from hash table, using given timezone name.
  252. + * Returns NULL if the name could not be found. */
  253. +const struct location_info *find_zone_info(struct location_info **li,
  254. + const char *name)
  255. +{
  256. + uint32_t hash = tz_hash(name);
  257. + const struct location_info *l;
  258. +
  259. + if (!li) {
  260. + return NULL;
  261. + }
  262. +
  263. + for (l = li[hash]; l; l = l->next) {
  264. + if (strcasecmp(l->name, name) == 0)
  265. + return l;
  266. + }
  267. +
  268. + return NULL;
  269. +}
  270. +
  271. +/* Filter out some non-tzdata files and the posix/right databases, if
  272. + * present. */
  273. +static int index_filter(const struct dirent *ent)
  274. +{
  275. + return strcmp(ent->d_name, ".") != 0
  276. + && strcmp(ent->d_name, "..") != 0
  277. + && strcmp(ent->d_name, "posix") != 0
  278. + && strcmp(ent->d_name, "posixrules") != 0
  279. + && strcmp(ent->d_name, "right") != 0
  280. + && strstr(ent->d_name, ".tab") == NULL;
  281. +}
  282. +
  283. +static int sysdbcmp(const void *first, const void *second)
  284. +{
  285. + const timelib_tzdb_index_entry *alpha = first, *beta = second;
  286. +
  287. + return strcmp(alpha->id, beta->id);
  288. +}
  289. +
  290. +
  291. +/* Create the zone identifier index by trawling the filesystem. */
  292. +static void create_zone_index(timelib_tzdb *db)
  293. +{
  294. + size_t dirstack_size, dirstack_top;
  295. + size_t index_size, index_next;
  296. + timelib_tzdb_index_entry *db_index;
  297. + char **dirstack;
  298. +
  299. + /* LIFO stack to hold directory entries to scan; each slot is a
  300. + * directory name relative to the zoneinfo prefix. */
  301. + dirstack_size = 32;
  302. + dirstack = malloc(dirstack_size * sizeof *dirstack);
  303. + dirstack_top = 1;
  304. + dirstack[0] = strdup("");
  305. +
  306. + /* Index array. */
  307. + index_size = 64;
  308. + db_index = malloc(index_size * sizeof *db_index);
  309. + index_next = 0;
  310. +
  311. + do {
  312. + struct dirent **ents;
  313. + char name[PATH_MAX], *top;
  314. + int count;
  315. +
  316. + /* Pop the top stack entry, and iterate through its contents. */
  317. + top = dirstack[--dirstack_top];
  318. + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
  319. +
  320. + count = php_scandir(name, &ents, index_filter, php_alphasort);
  321. +
  322. + while (count > 0) {
  323. + struct stat st;
  324. + const char *leaf = ents[count - 1]->d_name;
  325. +
  326. + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s",
  327. + top, leaf);
  328. +
  329. + if (strlen(name) && stat(name, &st) == 0) {
  330. + /* Name, relative to the zoneinfo prefix. */
  331. + const char *root = top;
  332. +
  333. + if (root[0] == '/') root++;
  334. +
  335. + snprintf(name, sizeof name, "%s%s%s", root,
  336. + *root ? "/": "", leaf);
  337. +
  338. + if (S_ISDIR(st.st_mode)) {
  339. + if (dirstack_top == dirstack_size) {
  340. + dirstack_size *= 2;
  341. + dirstack = realloc(dirstack,
  342. + dirstack_size * sizeof *dirstack);
  343. + }
  344. + dirstack[dirstack_top++] = strdup(name);
  345. + }
  346. + else {
  347. + if (index_next == index_size) {
  348. + index_size *= 2;
  349. + db_index = realloc(db_index,
  350. + index_size * sizeof *db_index);
  351. + }
  352. +
  353. + db_index[index_next++].id = strdup(name);
  354. + }
  355. + }
  356. +
  357. + free(ents[--count]);
  358. + }
  359. +
  360. + if (count != -1) free(ents);
  361. + free(top);
  362. + } while (dirstack_top);
  363. +
  364. + qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
  365. +
  366. + db->index = db_index;
  367. + db->index_size = index_next;
  368. +
  369. + free(dirstack);
  370. +}
  371. +
  372. +#define FAKE_HEADER "1234\0??\1??"
  373. +#define FAKE_UTC_POS (7 - 4)
  374. +
  375. +/* Create a fake data segment for database 'sysdb'. */
  376. +static void fake_data_segment(timelib_tzdb *sysdb,
  377. + struct location_info **info)
  378. +{
  379. + size_t n;
  380. + char *data, *p;
  381. +
  382. + data = malloc(3 * sysdb->index_size + 7);
  383. +
  384. + p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
  385. +
  386. + for (n = 0; n < sysdb->index_size; n++) {
  387. + const struct location_info *li;
  388. + timelib_tzdb_index_entry *ent;
  389. +
  390. + ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
  391. +
  392. + /* Lookup the timezone name in the hash table. */
  393. + if (strcmp(ent->id, "UTC") == 0) {
  394. + ent->pos = FAKE_UTC_POS;
  395. + continue;
  396. + }
  397. +
  398. + li = find_zone_info(info, ent->id);
  399. + if (li) {
  400. + /* If found, append the BC byte and the
  401. + * country code; set the position for this
  402. + * section of timezone data. */
  403. + ent->pos = (p - data) - 4;
  404. + *p++ = '\1';
  405. + *p++ = li->code[0];
  406. + *p++ = li->code[1];
  407. + }
  408. + else {
  409. + /* If not found, the timezone data can
  410. + * point at the header. */
  411. + ent->pos = 0;
  412. + }
  413. + }
  414. +
  415. + sysdb->data = (unsigned char *)data;
  416. +}
  417. +
  418. +/* Returns true if the passed-in stat structure describes a
  419. + * probably-valid timezone file. */
  420. +static int is_valid_tzfile(const struct stat *st)
  421. +{
  422. + return S_ISREG(st->st_mode) && st->st_size > 20;
  423. +}
  424. +
  425. +/* Return the mmap()ed tzfile if found, else NULL. On success, the
  426. + * length of the mapped data is placed in *length. */
  427. +static char *map_tzfile(const char *timezone, size_t *length)
  428. +{
  429. + char fname[PATH_MAX];
  430. + struct stat st;
  431. + char *p;
  432. + int fd;
  433. +
  434. + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
  435. + return NULL;
  436. + }
  437. +
  438. + if (system_location_table) {
  439. + const struct location_info *li;
  440. + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
  441. + /* Use the stored name to avoid case issue */
  442. + timezone = li->name;
  443. + }
  444. + }
  445. +
  446. + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
  447. +
  448. + fd = open(fname, O_RDONLY);
  449. + if (fd == -1) {
  450. + return NULL;
  451. + } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st)) {
  452. + close(fd);
  453. + return NULL;
  454. + }
  455. +
  456. + *length = st.st_size;
  457. + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  458. + close(fd);
  459. +
  460. + return p != MAP_FAILED ? p : NULL;
  461. +}
  462. +
  463. +#endif
  464. +
  465. +static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
  466. {
  467. int left = 0, right = tzdb->index_size - 1;
  468. #ifdef HAVE_SETLOCALE
  469. @@ -337,21 +759,90 @@ static int seek_to_tz_position(const uns
  470. return 0;
  471. }
  472. +static int seek_to_tz_position(const unsigned char **tzf, char *timezone,
  473. + char **map, size_t *maplen,
  474. + const timelib_tzdb *tzdb)
  475. +{
  476. +#ifdef HAVE_SYSTEM_TZDATA
  477. + if (tzdb == timezonedb_system) {
  478. + char *orig;
  479. +
  480. + orig = map_tzfile(timezone, maplen);
  481. + if (orig == NULL) {
  482. + return 0;
  483. + }
  484. +
  485. + (*tzf) = (unsigned char *)orig;
  486. + *map = orig;
  487. +
  488. + return 1;
  489. + }
  490. + else
  491. +#endif
  492. + {
  493. + return inmem_seek_to_tz_position(tzf, timezone, tzdb);
  494. + }
  495. +}
  496. +
  497. const timelib_tzdb *timelib_builtin_db(void)
  498. {
  499. +#ifdef HAVE_SYSTEM_TZDATA
  500. + if (timezonedb_system == NULL) {
  501. + timelib_tzdb *tmp = malloc(sizeof *tmp);
  502. +
  503. + tmp->version = "0.system";
  504. + tmp->data = NULL;
  505. + create_zone_index(tmp);
  506. + system_location_table = create_location_table();
  507. + fake_data_segment(tmp, system_location_table);
  508. + timezonedb_system = tmp;
  509. + }
  510. +
  511. +
  512. + return timezonedb_system;
  513. +#else
  514. return &timezonedb_builtin;
  515. +#endif
  516. }
  517. const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
  518. {
  519. +#ifdef HAVE_SYSTEM_TZDATA
  520. + *count = timezonedb_system->index_size;
  521. + return timezonedb_system->index;
  522. +#else
  523. *count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
  524. return timezonedb_idx_builtin;
  525. +#endif
  526. }
  527. int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
  528. {
  529. const unsigned char *tzf;
  530. - return (seek_to_tz_position(&tzf, timezone, tzdb));
  531. +
  532. +#ifdef HAVE_SYSTEM_TZDATA
  533. + if (tzdb == timezonedb_system) {
  534. + char fname[PATH_MAX];
  535. + struct stat st;
  536. +
  537. + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
  538. + return 0;
  539. + }
  540. +
  541. + if (system_location_table) {
  542. + if (find_zone_info(system_location_table, timezone) != NULL) {
  543. + /* found in cache */
  544. + return 1;
  545. + }
  546. + }
  547. +
  548. + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
  549. +
  550. + return stat(fname, &st) == 0 && is_valid_tzfile(&st);
  551. + }
  552. +#endif
  553. +
  554. + return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
  555. }
  556. static void skip_64bit_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
  557. @@ -376,10 +867,12 @@ static void read_64bit_header(const unsi
  558. timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
  559. {
  560. const unsigned char *tzf;
  561. + char *memmap = NULL;
  562. + size_t maplen;
  563. timelib_tzinfo *tmp;
  564. int version;
  565. - if (seek_to_tz_position(&tzf, timezone, tzdb)) {
  566. + if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
  567. tmp = timelib_tzinfo_ctor(timezone);
  568. version = read_preamble(&tzf, tmp);
  569. @@ -393,7 +886,34 @@ timelib_tzinfo *timelib_parse_tzfile(cha
  570. skip_64bit_types(&tzf, tmp);
  571. skip_posix_string(&tzf, tmp);
  572. }
  573. - read_location(&tzf, tmp);
  574. +
  575. +#ifdef HAVE_SYSTEM_TZDATA
  576. + if (memmap) {
  577. + const struct location_info *li;
  578. +
  579. + /* TZif-style - grok the location info from the system database,
  580. + * if possible. */
  581. +
  582. + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
  583. + tmp->location.comments = strdup(li->comment);
  584. + strncpy(tmp->location.country_code, li->code, 2);
  585. + tmp->location.longitude = li->longitude;
  586. + tmp->location.latitude = li->latitude;
  587. + tmp->bc = 1;
  588. + } else {
  589. + strcpy(tmp->location.country_code, "??");
  590. + tmp->bc = 0;
  591. + tmp->location.comments = strdup("");
  592. + }
  593. +
  594. + /* Now done with the mmap segment - discard it. */
  595. + munmap(memmap, maplen);
  596. + } else
  597. +#endif
  598. + {
  599. + /* PHP-style - use the embedded info. */
  600. + read_location(&tzf, tmp);
  601. + }
  602. } else {
  603. tmp = NULL;
  604. }
  605. --- a/ext/date/lib/timelib.m4
  606. +++ b/ext/date/lib/timelib.m4
  607. @@ -78,3 +78,17 @@ stdlib.h
  608. dnl Check for strtoll, atoll
  609. AC_CHECK_FUNCS(strtoll atoll strftime)
  610. +
  611. +PHP_ARG_WITH(system-tzdata, for use of system timezone data,
  612. +[ --with-system-tzdata[=DIR] to specify use of system timezone data],
  613. +no, no)
  614. +
  615. +if test "$PHP_SYSTEM_TZDATA" != "no"; then
  616. + AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
  617. +
  618. + if test "$PHP_SYSTEM_TZDATA" != "yes"; then
  619. + AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
  620. + [Define for location of system timezone data])
  621. + fi
  622. +fi
  623. +