You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

620 lines
16 KiB

  1. Add support for use of the system timezone database, rather
  2. than embedding a copy. Discussed upstream but was not desired.
  3. History:
  4. r9: fix another compile error without --with-system-tzdata configured
  5. r8: fix compile error without --with-system-tzdata configured
  6. r7: improve check for valid timezone id to exclude directories
  7. r6: fix fd leak in r5, fix country code/BC flag use in
  8. timezone_identifiers_list() using system db,
  9. fix use of PECL timezonedb to override system db,
  10. r5: reverts addition of "System/Localtime" fake tzname.
  11. updated for 5.3.0, parses zone.tab to pick up mapping between
  12. timezone name, country code and long/lat coords
  13. r4: added "System/Localtime" tzname which uses /etc/localtime
  14. r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
  15. r2: add filesystem trawl to set up name alias index
  16. r1: initial revision
  17. --- a/ext/date/lib/parse_tz.c
  18. +++ b/ext/date/lib/parse_tz.c
  19. @@ -20,6 +20,16 @@
  20. #include "timelib.h"
  21. +#ifdef HAVE_SYSTEM_TZDATA
  22. +#include <sys/mman.h>
  23. +#include <sys/stat.h>
  24. +#include <limits.h>
  25. +#include <fcntl.h>
  26. +#include <unistd.h>
  27. +
  28. +#include "php_scandir.h"
  29. +#endif
  30. +
  31. #include <stdio.h>
  32. #ifdef HAVE_LOCALE_H
  33. @@ -31,7 +41,12 @@
  34. #else
  35. #include <strings.h>
  36. #endif
  37. +
  38. +#ifndef HAVE_SYSTEM_TZDATA
  39. #include "timezonedb.h"
  40. +#endif
  41. +
  42. +#include <ctype.h>
  43. #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
  44. # if defined(__LITTLE_ENDIAN__)
  45. @@ -51,9 +66,14 @@
  46. static void read_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
  47. {
  48. - /* skip ID */
  49. - *tzf += 4;
  50. -
  51. + if (memcmp(tzf, "TZif", 4) == 0) {
  52. + *tzf += 20;
  53. + return;
  54. + }
  55. +
  56. + /* skip ID */
  57. + *tzf += 4;
  58. +
  59. /* read BC flag */
  60. tz->bc = (**tzf == '\1');
  61. *tzf += 1;
  62. @@ -256,7 +276,397 @@ void timelib_dump_tzinfo(timelib_tzinfo
  63. }
  64. }
  65. -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
  66. +#ifdef HAVE_SYSTEM_TZDATA
  67. +
  68. +#ifdef HAVE_SYSTEM_TZDATA_PREFIX
  69. +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
  70. +#else
  71. +#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
  72. +#endif
  73. +
  74. +/* System timezone database pointer. */
  75. +static const timelib_tzdb *timezonedb_system = NULL;
  76. +
  77. +/* Hash table entry for the cache of the zone.tab mapping table. */
  78. +struct location_info {
  79. + char code[2];
  80. + double latitude, longitude;
  81. + char name[64];
  82. + char *comment;
  83. + struct location_info *next;
  84. +};
  85. +
  86. +/* Cache of zone.tab. */
  87. +static struct location_info **system_location_table;
  88. +
  89. +/* Size of the zone.tab hash table; a random-ish prime big enough to
  90. + * prevent too many collisions. */
  91. +#define LOCINFO_HASH_SIZE (1021)
  92. +
  93. +static uint32_t tz_hash(const char *str)
  94. +{
  95. + const unsigned char *p = (const unsigned char *)str;
  96. + uint32_t hash = 5381;
  97. + int c;
  98. +
  99. + while ((c = *p++) != '\0') {
  100. + hash = (hash << 5) ^ hash ^ c;
  101. + }
  102. +
  103. + return hash % LOCINFO_HASH_SIZE;
  104. +}
  105. +
  106. +/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
  107. + * parsed string on success, or NULL on parse error. On success,
  108. + * writes the parsed number to *result. */
  109. +static char *parse_iso6709(char *p, double *result)
  110. +{
  111. + double v, sign;
  112. + char *pend;
  113. + size_t len;
  114. +
  115. + if (*p == '+')
  116. + sign = 1.0;
  117. + else if (*p == '-')
  118. + sign = -1.0;
  119. + else
  120. + return NULL;
  121. +
  122. + p++;
  123. + for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
  124. + ;;
  125. +
  126. + /* Annoying encoding used by zone.tab has no decimal point, so use
  127. + * the length to determine the format:
  128. + *
  129. + * 4 = DDMM
  130. + * 5 = DDDMM
  131. + * 6 = DDMMSS
  132. + * 7 = DDDMMSS
  133. + */
  134. + len = pend - p;
  135. + if (len < 4 || len > 7) {
  136. + return NULL;
  137. + }
  138. +
  139. + /* p => [D]DD */
  140. + v = (p[0] - '0') * 10.0 + (p[1] - '0');
  141. + p += 2;
  142. + if (len == 5 || len == 7)
  143. + v = v * 10.0 + (*p++ - '0');
  144. + /* p => MM[SS] */
  145. + v += (10.0 * (p[0] - '0')
  146. + + p[1] - '0') / 60.0;
  147. + p += 2;
  148. + /* p => [SS] */
  149. + if (len > 5) {
  150. + v += (10.0 * (p[0] - '0')
  151. + + p[1] - '0') / 3600.0;
  152. + p += 2;
  153. + }
  154. +
  155. + /* Round to five decimal place, not because it's a good idea,
  156. + * but, because the builtin data uses rounded data, so, match
  157. + * that. */
  158. + *result = round(v * sign * 100000.0) / 100000.0;
  159. +
  160. + return p;
  161. +}
  162. +
  163. +/* This function parses the zone.tab file to build up the mapping of
  164. + * timezone to country code and geographic location, and returns a
  165. + * hash table. The hash table is indexed by the function:
  166. + *
  167. + * tz_hash(timezone-name)
  168. + */
  169. +static struct location_info **create_location_table(void)
  170. +{
  171. + struct location_info **li, *i;
  172. + char zone_tab[PATH_MAX];
  173. + char line[512];
  174. + FILE *fp;
  175. +
  176. + strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
  177. +
  178. + fp = fopen(zone_tab, "r");
  179. + if (!fp) {
  180. + return NULL;
  181. + }
  182. +
  183. + li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
  184. +
  185. + while (fgets(line, sizeof line, fp)) {
  186. + char *p = line, *code, *name, *comment;
  187. + uint32_t hash;
  188. + double latitude, longitude;
  189. +
  190. + while (isspace(*p))
  191. + p++;
  192. +
  193. + if (*p == '#' || *p == '\0' || *p == '\n')
  194. + continue;
  195. +
  196. + if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
  197. + continue;
  198. +
  199. + /* code => AA */
  200. + code = p;
  201. + p[2] = 0;
  202. + p += 3;
  203. +
  204. + /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
  205. + p = parse_iso6709(p, &latitude);
  206. + if (!p) {
  207. + continue;
  208. + }
  209. + p = parse_iso6709(p, &longitude);
  210. + if (!p) {
  211. + continue;
  212. + }
  213. +
  214. + if (!p || *p != '\t') {
  215. + continue;
  216. + }
  217. +
  218. + /* name = string */
  219. + name = ++p;
  220. + while (*p != '\t' && *p && *p != '\n')
  221. + p++;
  222. +
  223. + *p++ = '\0';
  224. +
  225. + /* comment = string */
  226. + comment = p;
  227. + while (*p != '\t' && *p && *p != '\n')
  228. + p++;
  229. +
  230. + if (*p == '\n' || *p == '\t')
  231. + *p = '\0';
  232. +
  233. + hash = tz_hash(name);
  234. + i = malloc(sizeof *i);
  235. + memcpy(i->code, code, 2);
  236. + strncpy(i->name, name, sizeof i->name);
  237. + i->comment = strdup(comment);
  238. + i->longitude = longitude;
  239. + i->latitude = latitude;
  240. + i->next = li[hash];
  241. + li[hash] = i;
  242. + /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
  243. + }
  244. +
  245. + fclose(fp);
  246. +
  247. + return li;
  248. +}
  249. +
  250. +/* Return location info from hash table, using given timezone name.
  251. + * Returns NULL if the name could not be found. */
  252. +const struct location_info *find_zone_info(struct location_info **li,
  253. + const char *name)
  254. +{
  255. + uint32_t hash = tz_hash(name);
  256. + const struct location_info *l;
  257. +
  258. + if (!li) {
  259. + return NULL;
  260. + }
  261. +
  262. + for (l = li[hash]; l; l = l->next) {
  263. + if (strcasecmp(l->name, name) == 0)
  264. + return l;
  265. + }
  266. +
  267. + return NULL;
  268. +}
  269. +
  270. +/* Filter out some non-tzdata files and the posix/right databases, if
  271. + * present. */
  272. +static int index_filter(const struct dirent *ent)
  273. +{
  274. + return strcmp(ent->d_name, ".") != 0
  275. + && strcmp(ent->d_name, "..") != 0
  276. + && strcmp(ent->d_name, "posix") != 0
  277. + && strcmp(ent->d_name, "posixrules") != 0
  278. + && strcmp(ent->d_name, "right") != 0
  279. + && strstr(ent->d_name, ".tab") == NULL;
  280. +}
  281. +
  282. +static int sysdbcmp(const void *first, const void *second)
  283. +{
  284. + const timelib_tzdb_index_entry *alpha = first, *beta = second;
  285. +
  286. + return strcmp(alpha->id, beta->id);
  287. +}
  288. +
  289. +
  290. +/* Create the zone identifier index by trawling the filesystem. */
  291. +static void create_zone_index(timelib_tzdb *db)
  292. +{
  293. + size_t dirstack_size, dirstack_top;
  294. + size_t index_size, index_next;
  295. + timelib_tzdb_index_entry *db_index;
  296. + char **dirstack;
  297. +
  298. + /* LIFO stack to hold directory entries to scan; each slot is a
  299. + * directory name relative to the zoneinfo prefix. */
  300. + dirstack_size = 32;
  301. + dirstack = malloc(dirstack_size * sizeof *dirstack);
  302. + dirstack_top = 1;
  303. + dirstack[0] = strdup("");
  304. +
  305. + /* Index array. */
  306. + index_size = 64;
  307. + db_index = malloc(index_size * sizeof *db_index);
  308. + index_next = 0;
  309. +
  310. + do {
  311. + struct dirent **ents;
  312. + char name[PATH_MAX], *top;
  313. + int count;
  314. +
  315. + /* Pop the top stack entry, and iterate through its contents. */
  316. + top = dirstack[--dirstack_top];
  317. + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
  318. +
  319. + count = php_scandir(name, &ents, index_filter, php_alphasort);
  320. +
  321. + while (count > 0) {
  322. + struct stat st;
  323. + const char *leaf = ents[count - 1]->d_name;
  324. +
  325. + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s",
  326. + top, leaf);
  327. +
  328. + if (strlen(name) && stat(name, &st) == 0) {
  329. + /* Name, relative to the zoneinfo prefix. */
  330. + const char *root = top;
  331. +
  332. + if (root[0] == '/') root++;
  333. +
  334. + snprintf(name, sizeof name, "%s%s%s", root,
  335. + *root ? "/": "", leaf);
  336. +
  337. + if (S_ISDIR(st.st_mode)) {
  338. + if (dirstack_top == dirstack_size) {
  339. + dirstack_size *= 2;
  340. + dirstack = realloc(dirstack,
  341. + dirstack_size * sizeof *dirstack);
  342. + }
  343. + dirstack[dirstack_top++] = strdup(name);
  344. + }
  345. + else {
  346. + if (index_next == index_size) {
  347. + index_size *= 2;
  348. + db_index = realloc(db_index,
  349. + index_size * sizeof *db_index);
  350. + }
  351. +
  352. + db_index[index_next++].id = strdup(name);
  353. + }
  354. + }
  355. +
  356. + free(ents[--count]);
  357. + }
  358. +
  359. + if (count != -1) free(ents);
  360. + free(top);
  361. + } while (dirstack_top);
  362. +
  363. + qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
  364. +
  365. + db->index = db_index;
  366. + db->index_size = index_next;
  367. +
  368. + free(dirstack);
  369. +}
  370. +
  371. +#define FAKE_HEADER "1234\0??\1??"
  372. +#define FAKE_UTC_POS (7 - 4)
  373. +
  374. +/* Create a fake data segment for database 'sysdb'. */
  375. +static void fake_data_segment(timelib_tzdb *sysdb,
  376. + struct location_info **info)
  377. +{
  378. + size_t n;
  379. + char *data, *p;
  380. +
  381. + data = malloc(3 * sysdb->index_size + 7);
  382. +
  383. + p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
  384. +
  385. + for (n = 0; n < sysdb->index_size; n++) {
  386. + const struct location_info *li;
  387. + timelib_tzdb_index_entry *ent;
  388. +
  389. + ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
  390. +
  391. + /* Lookup the timezone name in the hash table. */
  392. + if (strcmp(ent->id, "UTC") == 0) {
  393. + ent->pos = FAKE_UTC_POS;
  394. + continue;
  395. + }
  396. +
  397. + li = find_zone_info(info, ent->id);
  398. + if (li) {
  399. + /* If found, append the BC byte and the
  400. + * country code; set the position for this
  401. + * section of timezone data. */
  402. + ent->pos = (p - data) - 4;
  403. + *p++ = '\1';
  404. + *p++ = li->code[0];
  405. + *p++ = li->code[1];
  406. + }
  407. + else {
  408. + /* If not found, the timezone data can
  409. + * point at the header. */
  410. + ent->pos = 0;
  411. + }
  412. + }
  413. +
  414. + sysdb->data = (unsigned char *)data;
  415. +}
  416. +
  417. +/* Returns true if the passed-in stat structure describes a
  418. + * probably-valid timezone file. */
  419. +static int is_valid_tzfile(const struct stat *st)
  420. +{
  421. + return S_ISREG(st->st_mode) && st->st_size > 20;
  422. +}
  423. +
  424. +/* Return the mmap()ed tzfile if found, else NULL. On success, the
  425. + * length of the mapped data is placed in *length. */
  426. +static char *map_tzfile(const char *timezone, size_t *length)
  427. +{
  428. + char fname[PATH_MAX];
  429. + struct stat st;
  430. + char *p;
  431. + int fd;
  432. +
  433. + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
  434. + return NULL;
  435. + }
  436. +
  437. + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
  438. +
  439. + fd = open(fname, O_RDONLY);
  440. + if (fd == -1) {
  441. + return NULL;
  442. + } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st)) {
  443. + close(fd);
  444. + return NULL;
  445. + }
  446. +
  447. + *length = st.st_size;
  448. + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
  449. + close(fd);
  450. +
  451. + return p != MAP_FAILED ? p : NULL;
  452. +}
  453. +
  454. +#endif
  455. +
  456. +static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
  457. {
  458. int left = 0, right = tzdb->index_size - 1;
  459. #ifdef HAVE_SETLOCALE
  460. @@ -295,36 +705,128 @@ static int seek_to_tz_position(const uns
  461. return 0;
  462. }
  463. +static int seek_to_tz_position(const unsigned char **tzf, char *timezone,
  464. + char **map, size_t *maplen,
  465. + const timelib_tzdb *tzdb)
  466. +{
  467. +#ifdef HAVE_SYSTEM_TZDATA
  468. + if (tzdb == timezonedb_system) {
  469. + char *orig;
  470. +
  471. + orig = map_tzfile(timezone, maplen);
  472. + if (orig == NULL) {
  473. + return 0;
  474. + }
  475. +
  476. + (*tzf) = (unsigned char *)orig ;
  477. + *map = orig;
  478. +
  479. + return 1;
  480. + }
  481. + else
  482. +#endif
  483. + {
  484. + return inmem_seek_to_tz_position(tzf, timezone, tzdb);
  485. + }
  486. +}
  487. +
  488. const timelib_tzdb *timelib_builtin_db(void)
  489. {
  490. +#ifdef HAVE_SYSTEM_TZDATA
  491. + if (timezonedb_system == NULL) {
  492. + timelib_tzdb *tmp = malloc(sizeof *tmp);
  493. +
  494. + tmp->version = "0.system";
  495. + tmp->data = NULL;
  496. + create_zone_index(tmp);
  497. + system_location_table = create_location_table();
  498. + fake_data_segment(tmp, system_location_table);
  499. + timezonedb_system = tmp;
  500. + }
  501. +
  502. +
  503. + return timezonedb_system;
  504. +#else
  505. return &timezonedb_builtin;
  506. +#endif
  507. }
  508. const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
  509. {
  510. +#ifdef HAVE_SYSTEM_TZDATA
  511. + *count = timezonedb_system->index_size;
  512. + return timezonedb_system->index;
  513. +#else
  514. *count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
  515. return timezonedb_idx_builtin;
  516. +#endif
  517. }
  518. int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
  519. {
  520. const unsigned char *tzf;
  521. - return (seek_to_tz_position(&tzf, timezone, tzdb));
  522. +
  523. +#ifdef HAVE_SYSTEM_TZDATA
  524. + if (tzdb == timezonedb_system) {
  525. + char fname[PATH_MAX];
  526. + struct stat st;
  527. +
  528. + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
  529. + return 0;
  530. + }
  531. +
  532. + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
  533. +
  534. + return stat(fname, &st) == 0 && is_valid_tzfile(&st);
  535. + }
  536. +#endif
  537. +
  538. + return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
  539. }
  540. timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
  541. {
  542. const unsigned char *tzf;
  543. + char *memmap = NULL;
  544. + size_t maplen;
  545. timelib_tzinfo *tmp;
  546. - if (seek_to_tz_position(&tzf, timezone, tzdb)) {
  547. + if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
  548. tmp = timelib_tzinfo_ctor(timezone);
  549. read_preamble(&tzf, tmp);
  550. read_header(&tzf, tmp);
  551. read_transistions(&tzf, tmp);
  552. read_types(&tzf, tmp);
  553. - read_location(&tzf, tmp);
  554. +
  555. +#ifdef HAVE_SYSTEM_TZDATA
  556. + if (memmap) {
  557. + const struct location_info *li;
  558. +
  559. + /* TZif-style - grok the location info from the system database,
  560. + * if possible. */
  561. +
  562. + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
  563. + tmp->location.comments = strdup(li->comment);
  564. + strncpy(tmp->location.country_code, li->code, 2);
  565. + tmp->location.longitude = li->longitude;
  566. + tmp->location.latitude = li->latitude;
  567. + tmp->bc = 1;
  568. + }
  569. + else {
  570. + strcpy(tmp->location.country_code, "??");
  571. + tmp->bc = 0;
  572. + tmp->location.comments = strdup("");
  573. + }
  574. +
  575. + /* Now done with the mmap segment - discard it. */
  576. + munmap(memmap, maplen);
  577. + } else
  578. +#endif
  579. + {
  580. + /* PHP-style - use the embedded info. */
  581. + read_location(&tzf, tmp);
  582. + }
  583. } else {
  584. tmp = NULL;
  585. }
  586. --- a/ext/date/lib/timelib.m4
  587. +++ b/ext/date/lib/timelib.m4
  588. @@ -78,3 +78,17 @@ stdlib.h
  589. dnl Check for strtoll, atoll
  590. AC_CHECK_FUNCS(strtoll atoll strftime)
  591. +
  592. +PHP_ARG_WITH(system-tzdata, for use of system timezone data,
  593. +[ --with-system-tzdata[=DIR] to specify use of system timezone data],
  594. +no, no)
  595. +
  596. +if test "$PHP_SYSTEM_TZDATA" != "no"; then
  597. + AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
  598. +
  599. + if test "$PHP_SYSTEM_TZDATA" != "yes"; then
  600. + AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
  601. + [Define for location of system timezone data])
  602. + fi
  603. +fi
  604. +