You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

673 lines
18 KiB

From: Joe Orton <jorton@redhat.com>
Date: Sun, 18 Oct 2015 02:15:17 +0200
Subject: Add support for use of the system timezone database
Add support for use of the system timezone database, rather
than embedding a copy. Discussed upstream but was not desired.
History:
r13: adapt for upstream changes to use PHP allocator
r12: adapt for upstream changes for new zic
r11: use canonical names to avoid more case sensitivity issues
round lat/long from zone.tab towards zero per builtin db
r10: make timezone case insensitive
r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold)
r8: fix compile error without --with-system-tzdata configured
r7: improve check for valid timezone id to exclude directories
r6: fix fd leak in r5, fix country code/BC flag use in
timezone_identifiers_list() using system db,
fix use of PECL timezonedb to override system db,
r5: reverts addition of "System/Localtime" fake tzname.
updated for 5.3.0, parses zone.tab to pick up mapping between
timezone name, country code and long/lat coords
r4: added "System/Localtime" tzname which uses /etc/localtime
r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
r2: add filesystem trawl to set up name alias index
r1: initial revision
---
ext/date/lib/parse_tz.c | 549 +++++++++++++++++++++++++++++++++++++++++++++++-
ext/date/lib/timelib.m4 | 14 ++
2 files changed, 552 insertions(+), 11 deletions(-)
diff --git a/ext/date/lib/parse_tz.c b/ext/date/lib/parse_tz.c
index 20d7eea..6301dc5 100644
--- a/ext/date/lib/parse_tz.c
+++ b/ext/date/lib/parse_tz.c
@@ -24,6 +24,16 @@
#include "timelib.h"
+#ifdef HAVE_SYSTEM_TZDATA
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "php_scandir.h"
+#endif
+
#include <stdio.h>
#ifdef HAVE_LOCALE_H
@@ -36,8 +46,12 @@
#include <strings.h>
#endif
+#ifndef HAVE_SYSTEM_TZDATA
#define TIMELIB_SUPPORTS_V2DATA
#include "timezonedb.h"
+#endif
+
+#include <ctype.h>
#if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
# if defined(__LITTLE_ENDIAN__)
@@ -59,6 +73,11 @@ static int read_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
{
uint32_t version;
+ if (memcmp(*tzf, "TZif", 4) == 0) {
+ *tzf += 20;
+ return 0;
+ }
+
/* read ID */
version = (*tzf)[3] - '0';
*tzf += 4;
@@ -302,7 +321,418 @@ void timelib_dump_tzinfo(timelib_tzinfo *tz)
}
}
-static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
+#ifdef HAVE_SYSTEM_TZDATA
+
+#ifdef HAVE_SYSTEM_TZDATA_PREFIX
+#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
+#else
+#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
+#endif
+
+/* System timezone database pointer. */
+static const timelib_tzdb *timezonedb_system;
+
+/* Hash table entry for the cache of the zone.tab mapping table. */
+struct location_info {
+ char code[2];
+ double latitude, longitude;
+ char name[64];
+ char *comment;
+ struct location_info *next;
+};
+
+/* Cache of zone.tab. */
+static struct location_info **system_location_table;
+
+/* Size of the zone.tab hash table; a random-ish prime big enough to
+ * prevent too many collisions. */
+#define LOCINFO_HASH_SIZE (1021)
+
+/* Compute a case insensitive hash of str */
+static uint32_t tz_hash(const char *str)
+{
+ const unsigned char *p = (const unsigned char *)str;
+ uint32_t hash = 5381;
+ int c;
+
+ while ((c = tolower(*p++)) != '\0') {
+ hash = (hash << 5) ^ hash ^ c;
+ }
+
+ return hash % LOCINFO_HASH_SIZE;
+}
+
+/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
+ * parsed string on success, or NULL on parse error. On success,
+ * writes the parsed number to *result. */
+static char *parse_iso6709(char *p, double *result)
+{
+ double v, sign;
+ char *pend;
+ size_t len;
+
+ if (*p == '+')
+ sign = 1.0;
+ else if (*p == '-')
+ sign = -1.0;
+ else
+ return NULL;
+
+ p++;
+ for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
+ ;;
+
+ /* Annoying encoding used by zone.tab has no decimal point, so use
+ * the length to determine the format:
+ *
+ * 4 = DDMM
+ * 5 = DDDMM
+ * 6 = DDMMSS
+ * 7 = DDDMMSS
+ */
+ len = pend - p;
+ if (len < 4 || len > 7) {
+ return NULL;
+ }
+
+ /* p => [D]DD */
+ v = (p[0] - '0') * 10.0 + (p[1] - '0');
+ p += 2;
+ if (len == 5 || len == 7)
+ v = v * 10.0 + (*p++ - '0');
+ /* p => MM[SS] */
+ v += (10.0 * (p[0] - '0')
+ + p[1] - '0') / 60.0;
+ p += 2;
+ /* p => [SS] */
+ if (len > 5) {
+ v += (10.0 * (p[0] - '0')
+ + p[1] - '0') / 3600.0;
+ p += 2;
+ }
+
+ /* Round to five decimal place, not because it's a good idea,
+ * but, because the builtin data uses rounded data, so, match
+ * that. */
+ *result = trunc(v * sign * 100000.0) / 100000.0;
+
+ return p;
+}
+
+/* This function parses the zone.tab file to build up the mapping of
+ * timezone to country code and geographic location, and returns a
+ * hash table. The hash table is indexed by the function:
+ *
+ * tz_hash(timezone-name)
+ */
+static struct location_info **create_location_table(void)
+{
+ struct location_info **li, *i;
+ char zone_tab[PATH_MAX];
+ char line[512];
+ FILE *fp;
+
+ strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
+
+ fp = fopen(zone_tab, "r");
+ if (!fp) {
+ return NULL;
+ }
+
+ li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
+
+ while (fgets(line, sizeof line, fp)) {
+ char *p = line, *code, *name, *comment;
+ uint32_t hash;
+ double latitude, longitude;
+
+ while (isspace(*p))
+ p++;
+
+ if (*p == '#' || *p == '\0' || *p == '\n')
+ continue;
+
+ if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
+ continue;
+
+ /* code => AA */
+ code = p;
+ p[2] = 0;
+ p += 3;
+
+ /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
+ p = parse_iso6709(p, &latitude);
+ if (!p) {
+ continue;
+ }
+ p = parse_iso6709(p, &longitude);
+ if (!p) {
+ continue;
+ }
+
+ if (!p || *p != '\t') {
+ continue;
+ }
+
+ /* name = string */
+ name = ++p;
+ while (*p != '\t' && *p && *p != '\n')
+ p++;
+
+ *p++ = '\0';
+
+ /* comment = string */
+ comment = p;
+ while (*p != '\t' && *p && *p != '\n')
+ p++;
+
+ if (*p == '\n' || *p == '\t')
+ *p = '\0';
+
+ hash = tz_hash(name);
+ i = malloc(sizeof *i);
+ memcpy(i->code, code, 2);
+ strncpy(i->name, name, sizeof i->name);
+ i->comment = strdup(comment);
+ i->longitude = longitude;
+ i->latitude = latitude;
+ i->next = li[hash];
+ li[hash] = i;
+ /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
+ }
+
+ fclose(fp);
+
+ return li;
+}
+
+/* Return location info from hash table, using given timezone name.
+ * Returns NULL if the name could not be found. */
+const struct location_info *find_zone_info(struct location_info **li,
+ const char *name)
+{
+ uint32_t hash = tz_hash(name);
+ const struct location_info *l;
+
+ if (!li) {
+ return NULL;
+ }
+
+ for (l = li[hash]; l; l = l->next) {
+ if (strcasecmp(l->name, name) == 0)
+ return l;
+ }
+
+ return NULL;
+}
+
+/* Filter out some non-tzdata files and the posix/right databases, if
+ * present. */
+static int index_filter(const struct dirent *ent)
+{
+ return strcmp(ent->d_name, ".") != 0
+ && strcmp(ent->d_name, "..") != 0
+ && strcmp(ent->d_name, "posix") != 0
+ && strcmp(ent->d_name, "posixrules") != 0
+ && strcmp(ent->d_name, "right") != 0
+ && strstr(ent->d_name, ".tab") == NULL;
+}
+
+static int sysdbcmp(const void *first, const void *second)
+{
+ const timelib_tzdb_index_entry *alpha = first, *beta = second;
+
+ return strcasecmp(alpha->id, beta->id);
+}
+
+
+/* Create the zone identifier index by trawling the filesystem. */
+static void create_zone_index(timelib_tzdb *db)
+{
+ size_t dirstack_size, dirstack_top;
+ size_t index_size, index_next;
+ timelib_tzdb_index_entry *db_index;
+ char **dirstack;
+
+ /* LIFO stack to hold directory entries to scan; each slot is a
+ * directory name relative to the zoneinfo prefix. */
+ dirstack_size = 32;
+ dirstack = malloc(dirstack_size * sizeof *dirstack);
+ dirstack_top = 1;
+ dirstack[0] = strdup("");
+
+ /* Index array. */
+ index_size = 64;
+ db_index = malloc(index_size * sizeof *db_index);
+ index_next = 0;
+
+ do {
+ struct dirent **ents;
+ char name[PATH_MAX], *top;
+ int count;
+
+ /* Pop the top stack entry, and iterate through its contents. */
+ top = dirstack[--dirstack_top];
+ snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
+
+ count = php_scandir(name, &ents, index_filter, php_alphasort);
+
+ while (count > 0) {
+ struct stat st;
+ const char *leaf = ents[count - 1]->d_name;
+
+ snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s",
+ top, leaf);
+
+ if (strlen(name) && stat(name, &st) == 0) {
+ /* Name, relative to the zoneinfo prefix. */
+ const char *root = top;
+
+ if (root[0] == '/') root++;
+
+ snprintf(name, sizeof name, "%s%s%s", root,
+ *root ? "/": "", leaf);
+
+ if (S_ISDIR(st.st_mode)) {
+ if (dirstack_top == dirstack_size) {
+ dirstack_size *= 2;
+ dirstack = realloc(dirstack,
+ dirstack_size * sizeof *dirstack);
+ }
+ dirstack[dirstack_top++] = strdup(name);
+ }
+ else {
+ if (index_next == index_size) {
+ index_size *= 2;
+ db_index = realloc(db_index,
+ index_size * sizeof *db_index);
+ }
+
+ db_index[index_next++].id = strdup(name);
+ }
+ }
+
+ free(ents[--count]);
+ }
+
+ if (count != -1) free(ents);
+ free(top);
+ } while (dirstack_top);
+
+ qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
+
+ db->index = db_index;
+ db->index_size = index_next;
+
+ free(dirstack);
+}
+
+#define FAKE_HEADER "1234\0??\1??"
+#define FAKE_UTC_POS (7 - 4)
+
+/* Create a fake data segment for database 'sysdb'. */
+static void fake_data_segment(timelib_tzdb *sysdb,
+ struct location_info **info)
+{
+ size_t n;
+ char *data, *p;
+
+ data = malloc(3 * sysdb->index_size + 7);
+
+ p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
+
+ for (n = 0; n < sysdb->index_size; n++) {
+ const struct location_info *li;
+ timelib_tzdb_index_entry *ent;
+
+ ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
+
+ /* Lookup the timezone name in the hash table. */
+ if (strcmp(ent->id, "UTC") == 0) {
+ ent->pos = FAKE_UTC_POS;
+ continue;
+ }
+
+ li = find_zone_info(info, ent->id);
+ if (li) {
+ /* If found, append the BC byte and the
+ * country code; set the position for this
+ * section of timezone data. */
+ ent->pos = (p - data) - 4;
+ *p++ = '\1';
+ *p++ = li->code[0];
+ *p++ = li->code[1];
+ }
+ else {
+ /* If not found, the timezone data can
+ * point at the header. */
+ ent->pos = 0;
+ }
+ }
+
+ sysdb->data = (unsigned char *)data;
+}
+
+/* Returns true if the passed-in stat structure describes a
+ * probably-valid timezone file. */
+static int is_valid_tzfile(const struct stat *st)
+{
+ return S_ISREG(st->st_mode) && st->st_size > 20;
+}
+
+/* To allow timezone names to be used case-insensitively, find the
+ * canonical name for this timezone, if possible. */
+static const char *canonical_tzname(const char *timezone)
+{
+ if (timezonedb_system) {
+ timelib_tzdb_index_entry *ent, lookup;
+
+ lookup.id = (char *)timezone;
+
+ ent = bsearch(&lookup, timezonedb_system->index,
+ timezonedb_system->index_size, sizeof lookup,
+ sysdbcmp);
+ if (ent) {
+ return ent->id;
+ }
+ }
+
+ return timezone;
+}
+
+/* Return the mmap()ed tzfile if found, else NULL. On success, the
+ * length of the mapped data is placed in *length. */
+static char *map_tzfile(const char *timezone, size_t *length)
+{
+ char fname[PATH_MAX];
+ struct stat st;
+ char *p;
+ int fd;
+
+ if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
+ return NULL;
+ }
+
+ snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone));
+
+ fd = open(fname, O_RDONLY);
+ if (fd == -1) {
+ return NULL;
+ } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st)) {
+ close(fd);
+ return NULL;
+ }
+
+ *length = st.st_size;
+ p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ close(fd);
+
+ return p != MAP_FAILED ? p : NULL;
+}
+
+#endif
+
+static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
{
int left = 0, right = tzdb->index_size - 1;
#ifdef HAVE_SETLOCALE
@@ -341,21 +771,88 @@ static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const
return 0;
}
+static int seek_to_tz_position(const unsigned char **tzf, char *timezone,
+ char **map, size_t *maplen,
+ const timelib_tzdb *tzdb)
+{
+#ifdef HAVE_SYSTEM_TZDATA
+ if (tzdb == timezonedb_system) {
+ char *orig;
+
+ orig = map_tzfile(timezone, maplen);
+ if (orig == NULL) {
+ return 0;
+ }
+
+ (*tzf) = (unsigned char *)orig;
+ *map = orig;
+ return 1;
+ }
+ else
+#endif
+ {
+ return inmem_seek_to_tz_position(tzf, timezone, tzdb);
+ }
+}
+
const timelib_tzdb *timelib_builtin_db(void)
{
+#ifdef HAVE_SYSTEM_TZDATA
+ if (timezonedb_system == NULL) {
+ timelib_tzdb *tmp = malloc(sizeof *tmp);
+
+ tmp->version = "0.system";
+ tmp->data = NULL;
+ create_zone_index(tmp);
+ system_location_table = create_location_table();
+ fake_data_segment(tmp, system_location_table);
+ timezonedb_system = tmp;
+ }
+
+ return timezonedb_system;
+#else
return &timezonedb_builtin;
+#endif
}
const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
{
+#ifdef HAVE_SYSTEM_TZDATA
+ *count = timezonedb_system->index_size;
+ return timezonedb_system->index;
+#else
*count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
return timezonedb_idx_builtin;
+#endif
}
int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
{
const unsigned char *tzf;
- return (seek_to_tz_position(&tzf, timezone, tzdb));
+
+#ifdef HAVE_SYSTEM_TZDATA
+ if (tzdb == timezonedb_system) {
+ char fname[PATH_MAX];
+ struct stat st;
+
+ if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
+ return 0;
+ }
+
+ if (system_location_table) {
+ if (find_zone_info(system_location_table, timezone) != NULL) {
+ /* found in cache */
+ return 1;
+ }
+ }
+
+ snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone));
+
+ return stat(fname, &st) == 0 && is_valid_tzfile(&st);
+ }
+#endif
+
+ return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
}
static void skip_64bit_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
@@ -380,24 +877,54 @@ static void read_64bit_header(const unsigned char **tzf, timelib_tzinfo *tz)
timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
{
const unsigned char *tzf;
+ char *memmap = NULL;
+ size_t maplen;
timelib_tzinfo *tmp;
int version;
- if (seek_to_tz_position(&tzf, timezone, tzdb)) {
+ if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
tmp = timelib_tzinfo_ctor(timezone);
version = read_preamble(&tzf, tmp);
read_header(&tzf, tmp);
read_transistions(&tzf, tmp);
read_types(&tzf, tmp);
- if (version == 2) {
- skip_64bit_preamble(&tzf, tmp);
- read_64bit_header(&tzf, tmp);
- skip_64bit_transistions(&tzf, tmp);
- skip_64bit_types(&tzf, tmp);
- skip_posix_string(&tzf, tmp);
- }
- read_location(&tzf, tmp);
+
+#ifdef HAVE_SYSTEM_TZDATA
+ if (memmap) {
+ const struct location_info *li;
+
+ /* TZif-style - grok the location info from the system database,
+ * if possible. */
+
+ if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
+ tmp->location.comments = timelib_strdup(li->comment);
+ strncpy(tmp->location.country_code, li->code, 2);
+ tmp->location.longitude = li->longitude;
+ tmp->location.latitude = li->latitude;
+ tmp->bc = 1;
+ }
+ else {
+ strcpy(tmp->location.country_code, "??");
+ tmp->bc = 0;
+ tmp->location.comments = timelib_strdup("");
+ }
+
+ /* Now done with the mmap segment - discard it. */
+ munmap(memmap, maplen);
+ } else
+#endif
+ {
+ /* PHP-style - use the embedded info. */
+ if (version == 2) {
+ skip_64bit_preamble(&tzf, tmp);
+ read_64bit_header(&tzf, tmp);
+ skip_64bit_transistions(&tzf, tmp);
+ skip_64bit_types(&tzf, tmp);
+ skip_posix_string(&tzf, tmp);
+ }
+ read_location(&tzf, tmp);
+ }
} else {
tmp = NULL;
}
diff --git a/ext/date/lib/timelib.m4 b/ext/date/lib/timelib.m4
index c725572..4c837c7 100644
--- a/ext/date/lib/timelib.m4
+++ b/ext/date/lib/timelib.m4
@@ -78,3 +78,17 @@ stdlib.h
dnl Check for strtoll, atoll
AC_CHECK_FUNCS(strtoll atoll strftime)
+
+PHP_ARG_WITH(system-tzdata, for use of system timezone data,
+[ --with-system-tzdata[=DIR] to specify use of system timezone data],
+no, no)
+
+if test "$PHP_SYSTEM_TZDATA" != "no"; then
+ AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
+
+ if test "$PHP_SYSTEM_TZDATA" != "yes"; then
+ AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
+ [Define for location of system timezone data])
+ fi
+fi
+