From 544fd974ebbd885b3eb3b583f6e5af366db322a4 Mon Sep 17 00:00:00 2001
From: jim <jim@rybarski.com>
Date: Sat, 26 Sep 2015 17:49:57 -0500
Subject: [PATCH] #66 more refactoring

---
 nd2reader/driver/driver.py                 |  11 -
 nd2reader/driver/v3.py                     | 397 ---------------------
 nd2reader/{driver => imreader}/__init__.py |   0
 nd2reader/{driver => imreader}/v2.py       |   0
 nd2reader/imreader/v3.py                   |  96 +++++
 nd2reader/interface.py                     |   6 +-
 nd2reader/model/metadata.py                |  62 ++++
 nd2reader/parser/__init__.py               |   1 +
 nd2reader/parser/parser.py                 |   2 +
 nd2reader/{parser.py => parser/v2.py}      |   0
 nd2reader/parser/v3.py                     | 295 +++++++++++++++
 11 files changed, 459 insertions(+), 411 deletions(-)
 delete mode 100644 nd2reader/driver/driver.py
 delete mode 100644 nd2reader/driver/v3.py
 rename nd2reader/{driver => imreader}/__init__.py (100%)
 rename nd2reader/{driver => imreader}/v2.py (100%)
 create mode 100644 nd2reader/imreader/v3.py
 create mode 100644 nd2reader/model/metadata.py
 create mode 100644 nd2reader/parser/__init__.py
 create mode 100644 nd2reader/parser/parser.py
 rename nd2reader/{parser.py => parser/v2.py} (100%)
 create mode 100644 nd2reader/parser/v3.py

diff --git a/nd2reader/driver/driver.py b/nd2reader/driver/driver.py
deleted file mode 100644
index 8358870..0000000
--- a/nd2reader/driver/driver.py
+++ /dev/null
@@ -1,11 +0,0 @@
-def get_driver(filename, version):
-    """
-    Instantiates the correct driver for the ND2, which allows us to parse metadata and access images.
-
-    :param filename:    the path to the ND2
-    :type filename:     str
-    :param version:     the version of the ND2. Note that this is different than the version of NIS Elements used to create the ND2.
-    :type version:      tuple
-
-    """
-    return 1
diff --git a/nd2reader/driver/v3.py b/nd2reader/driver/v3.py
deleted file mode 100644
index c5a095b..0000000
--- a/nd2reader/driver/v3.py
+++ /dev/null
@@ -1,397 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import array
-from datetime import datetime
-import numpy as np
-import re
-import struct
-import six
-from nd2reader.model.image import Image
-
-
-class Nd2Parser(object):
-    """
-    Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data.
-    You should not ever need to instantiate this class manually unless you're a developer.
-
-    """
-    CHUNK_HEADER = 0xabeceda
-    CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!")
-    CHUNK_MAP_END = six.b("ND2 CHUNK MAP SIGNATURE 0000001!")
-
-    def __init__(self, filename):
-        self._absolute_start = None
-        self._filename = filename
-        self._fh = None
-        self._channels = None
-        self._channel_count = None
-        self._chunk_map_start_location = None
-        self._cursor_position = 0
-        self._dimension_text = None
-        self._fields_of_view = None
-        self._label_map = {}
-        self._metadata = {}
-        self._read_map()
-        self._time_indexes = None
-        self._parse_metadata()
-        self._z_levels = None
-
-    def get_image(self, index):
-        channel_offset = index % len(self._metadata.channels)
-        fov = self._calculate_field_of_view(index)
-        channel = self._calculate_channel(index)
-        z_level = self._calculate_z_level(index)
-        image_group_number = int(index / len(self._metadata.channels))
-        frame_number = self._calculate_frame_number(image_group_number, fov, z_level)
-        timestamp, image = self._get_raw_image_data(image_group_number, channel_offset, self._metadata.height, self._metadata.width)
-        image.add_params(timestamp, frame_number, fov, channel, z_level)
-
-    @property
-    def absolute_start(self):
-        """
-        The date and time when acquisition began.
-
-        :rtype: datetime.datetime()
-
-        """
-        if self._absolute_start is None:
-            for line in self.metadata[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values():
-                line = line.decode("utf8")
-                absolute_start_12 = None
-                absolute_start_24 = None
-                # ND2s seem to randomly switch between 12- and 24-hour representations.
-                try:
-                    absolute_start_24 = datetime.strptime(line, "%m/%d/%Y  %H:%M:%S")
-                except (TypeError, ValueError):
-                    pass
-                try:
-                    absolute_start_12 = datetime.strptime(line, "%m/%d/%Y  %I:%M:%S %p")
-                except (TypeError, ValueError):
-                    pass
-                if not absolute_start_12 and not absolute_start_24:
-                    continue
-                return absolute_start_12 if absolute_start_12 else absolute_start_24
-            raise ValueError("This ND2 has no recorded start time. This is probably a bug.")
-        return self._absolute_start
-
-    @property
-    def channels(self):
-        """
-        These are labels created by the NIS Elements user. Typically they may a short description of the filter cube
-        used (e.g. "bright field", "GFP", etc.)
-
-        :rtype: list
-
-        """
-        if not self._channels:
-            self._channels = []
-            metadata = self.metadata[six.b('ImageMetadataSeq')][six.b('SLxPictureMetadata')][six.b('sPicturePlanes')]
-            try:
-                validity = self.metadata[six.b('ImageMetadata')][six.b('SLxExperiment')][six.b('ppNextLevelEx')][six.b('')][0][six.b('ppNextLevelEx')][six.b('')][0][six.b('pItemValid')]
-            except KeyError:
-                # If none of the channels have been deleted, there is no validity list, so we just make one
-                validity = [True for _ in metadata]
-            # Channel information is contained in dictionaries with the keys a0, a1...an where the number
-            # indicates the order in which the channel is stored. So by sorting the dicts alphabetically
-            # we get the correct order.
-            for (label, chan), valid in zip(sorted(metadata[six.b('sPlaneNew')].items()), validity):
-                if not valid:
-                    continue
-                self._channels.append(chan[six.b('sDescription')].decode("utf8"))
-        return self._channels
-
-    @property
-    def fields_of_view(self):
-        """
-        The metadata contains information about fields of view, but it contains it even if some fields
-        of view were cropped. We can't find anything that states which fields of view are actually
-        in the image data, so we have to calculate it. There probably is something somewhere, since
-        NIS Elements can figure it out, but we haven't found it yet.
-
-        :rtype: list
-
-        """
-        if self._fields_of_view is None:
-            self._fields_of_view = self._parse_dimension_text(r""".*?XY\((\d+)\).*?""")
-        return self._fields_of_view
-
-    @property
-    def frames(self):
-        """
-        The number of cycles.
-
-        :rtype:     list
-
-        """
-        if self._time_indexes is None:
-            self._time_indexes = self._parse_dimension_text(r""".*?T'\((\d+)\).*?""")
-        return self._time_indexes
-
-    @property
-    def z_levels(self):
-        """
-        The different levels in the Z-plane. Just a sequence from 0 to n.
-
-        :rtype: list
-
-        """
-        if self._z_levels is None:
-            self._z_levels = self._parse_dimension_text(r""".*?Z\((\d+)\).*?""")
-        return self._z_levels
-
-    def _calculate_field_of_view(self, frame_number):
-        images_per_cycle = len(self.z_levels) * len(self.channels)
-        return int((frame_number - (frame_number % images_per_cycle)) / images_per_cycle) % len(self.fields_of_view)
-
-    def _calculate_channel(self, frame_number):
-        return self.channels[frame_number % len(self.channels)]
-
-    def _calculate_z_level(self, frame_number):
-        return self.z_levels[int(((frame_number - (frame_number % len(self.channels))) / len(self.channels)) % len(self.z_levels))]
-
-    @property
-    def _file_handle(self):
-        if self._fh is None:
-            self._fh = open(self._filename, "rb")
-        return self._fh
-
-    def _get_raw_image_data(self, image_group_number, channel_offset, height, width):
-        """
-        Reads the raw bytes and the timestamp of an image.
-
-        :param image_group_number: groups are made of images with the same time index, field of view and z-level.
-        :type image_group_number: int
-        :param channel_offset: the offset in the array where the bytes for this image are found.
-        :type channel_offset: int
-
-        :return: (int, array.array()) or None
-
-        """
-        chunk = self._label_map[six.b("ImageDataSeq|%d!" % image_group_number)]
-        data = self._read_chunk(chunk)
-        # All images in the same image group share the same timestamp! So if you have complicated image data,
-        # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few
-        # seconds unless you're doing something super weird.
-        timestamp = struct.unpack("d", data[:8])[0]
-        image_group_data = array.array("H", data)
-        image_data_start = 4 + channel_offset
-        # The images for the various channels are interleaved within the same array. For example, the second image
-        # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design
-        # a data structure that way, please send the author of this library a message.
-        image_data = np.reshape(image_group_data[image_data_start::len(self.channels)], (height, width))
-        # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you
-        # don't have the same number of images each cycle. We discovered this because we only took GFP images every
-        # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take
-        # them every cycle.
-        if np.any(image_data):
-            return timestamp, Image(image_data)
-        return None
-
-    @property
-    def _dimensions(self):
-        """
-        While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable.
-        Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text
-        is always there and in the same exact format, so we just parse that instead.
-
-        :rtype: str
-
-        """
-        if self._dimension_text is None:
-            for line in self.metadata[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values():
-                if six.b("Dimensions:") in line:
-                    metadata = line
-                    break
-            else:
-                raise ValueError("Could not parse metadata dimensions!")
-            for line in metadata.split(six.b("\r\n")):
-                if line.startswith(six.b("Dimensions:")):
-                    self._dimension_text = line
-                    break
-            else:
-                raise ValueError("Could not parse metadata dimensions!")
-        return self._dimension_text
-
-    def _calculate_image_group_number(self, time_index, fov, z_level):
-        """
-        Images are grouped together if they share the same time index, field of view, and z-level.
-
-        :type time_index: int
-        :type fov: int
-        :type z_level: int
-
-        :rtype: int
-
-        """
-        return time_index * len(self.fields_of_view) * len(self.z_levels) + (fov * len(self.z_levels) + z_level)
-
-    def _calculate_frame_number(self, image_group_number, fov, z_level):
-        return (image_group_number - (fov * len(self.z_levels) + z_level)) / (len(self.fields_of_view) * len(self.z_levels))
-
-    @property
-    def _channel_offset(self):
-        """
-        Image data is interleaved for each image set. That is, if there are four images in a set, the first image
-        will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth.
-
-        :rtype: dict
-
-        """
-        channel_offset = {}
-        for n, channel in enumerate(self._channels):
-            channel_offset[channel] = n
-        return channel_offset
-
-    def _parse_dimension_text(self, pattern):
-        try:
-            count = int(re.match(pattern, self._dimensions).group(1))
-        except AttributeError:
-            return [0]
-        except TypeError:
-            match = re.match(pattern, self._dimensions.decode("utf8"))
-            if not match:
-                return [0]
-            return list(range(int(match.group(1))))
-        else:
-            return list(range(count))
-
-    @property
-    def _total_images_per_channel(self):
-        """
-        The total number of images per channel. Warning: this may be inaccurate as it includes "gap" images.
-
-        :rtype: int
-
-        """
-        return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiSequenceCount')]
-
-    def _parse_metadata(self):
-        """
-        Reads all metadata.
-
-        """
-        for label in self._label_map.keys():
-            if label.endswith(six.b("LV!")) or six.b("LV|") in label:
-                data = self._read_chunk(self._label_map[label])
-                stop = label.index(six.b("LV"))
-                self.metadata[label[:stop]] = self._read_metadata(data, 1)
-
-    def _read_map(self):
-        """
-        Every label ends with an exclamation point, however, we can't directly search for those to find all the labels
-        as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label,
-        grab the subsequent data (always 16 bytes long), advance to the next label and repeat.
-
-        """
-        self._file_handle.seek(-8, 2)
-        chunk_map_start_location = struct.unpack("Q", self._file_handle.read(8))[0]
-        self._file_handle.seek(chunk_map_start_location)
-        raw_text = self._file_handle.read(-1)
-        label_start = raw_text.index(Nd2Parser.CHUNK_MAP_START) + 32
-
-        while True:
-            data_start = raw_text.index(six.b("!"), label_start) + 1
-            key = raw_text[label_start: data_start]
-            location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16])
-            if key == Nd2Parser.CHUNK_MAP_END:
-                # We've reached the end of the chunk map
-                break
-            self._label_map[key] = location
-            label_start = data_start + 16
-
-    def _read_chunk(self, chunk_location):
-        """
-        Gets the data for a given chunk pointer
-
-        """
-        self._file_handle.seek(chunk_location)
-        # The chunk metadata is always 16 bytes long
-        chunk_metadata = self._file_handle.read(16)
-        header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata)
-        if header != Nd2Parser.CHUNK_HEADER:
-            raise ValueError("The ND2 file seems to be corrupted.")
-        # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the
-        # start of the actual data field, which is at some arbitrary place after the metadata.
-        self._file_handle.seek(chunk_location + 16 + relative_offset)
-        return self._file_handle.read(data_length)
-
-    def _parse_unsigned_char(self, data):
-        return struct.unpack("B", data.read(1))[0]
-
-    def _parse_unsigned_int(self, data):
-        return struct.unpack("I", data.read(4))[0]
-
-    def _parse_unsigned_long(self, data):
-        return struct.unpack("Q", data.read(8))[0]
-
-    def _parse_double(self, data):
-        return struct.unpack("d", data.read(8))[0]
-
-    def _parse_string(self, data):
-        value = data.read(2)
-        while not value.endswith(six.b("\x00\x00")):
-            # the string ends at the first instance of \x00\x00
-            value += data.read(2)
-        return value.decode("utf16")[:-1].encode("utf8")
-
-    def _parse_char_array(self, data):
-        array_length = struct.unpack("Q", data.read(8))[0]
-        return array.array("B", data.read(array_length))
-
-    def _parse_metadata_item(self, data):
-        """
-        Reads hierarchical data, analogous to a Python dict.
-
-        """
-        new_count, length = struct.unpack("<IQ", data.read(12))
-        length -= data.tell() - self._cursor_position
-        next_data_length = data.read(length)
-        value = self._read_metadata(next_data_length, new_count)
-        # Skip some offsets
-        data.read(new_count * 8)
-        return value
-
-    def _get_value(self, data, data_type):
-        """
-        ND2s use various codes to indicate different data types, which we translate here.
-
-        """
-        parser = {1: self._parse_unsigned_char,
-                  2: self._parse_unsigned_int,
-                  3: self._parse_unsigned_int,
-                  5: self._parse_unsigned_long,
-                  6: self._parse_double,
-                  8: self._parse_string,
-                  9: self._parse_char_array,
-                  11: self._parse_metadata_item}
-        return parser[data_type](data)
-
-    def _read_metadata(self, data, count):
-        """
-        Iterates over each element some section of the metadata and parses it.
-
-        """
-        data = six.BytesIO(data)
-        metadata = {}
-        for _ in range(count):
-            self._cursor_position = data.tell()
-            header = data.read(2)
-            if not header:
-                # We've reached the end of some hierarchy of data
-                break
-            if six.PY3:
-                header = header.decode("utf8")
-            data_type, name_length = map(ord, header)
-            name = data.read(name_length * 2).decode("utf16")[:-1].encode("utf8")
-            value = self._get_value(data, data_type)
-            if name not in metadata.keys():
-                metadata[name] = value
-            else:
-                if not isinstance(metadata[name], list):
-                    # We have encountered this key exactly once before. Since we're seeing it again, we know we
-                    # need to convert it to a list before proceeding.
-                    metadata[name] = [metadata[name]]
-                # We've encountered this key before so we're guaranteed to be dealing with a list. Thus we append
-                # the value to the already-existing list.
-                metadata[name].append(value)
-        return metadata
diff --git a/nd2reader/driver/__init__.py b/nd2reader/imreader/__init__.py
similarity index 100%
rename from nd2reader/driver/__init__.py
rename to nd2reader/imreader/__init__.py
diff --git a/nd2reader/driver/v2.py b/nd2reader/imreader/v2.py
similarity index 100%
rename from nd2reader/driver/v2.py
rename to nd2reader/imreader/v2.py
diff --git a/nd2reader/imreader/v3.py b/nd2reader/imreader/v3.py
new file mode 100644
index 0000000..28f448a
--- /dev/null
+++ b/nd2reader/imreader/v3.py
@@ -0,0 +1,96 @@
+# -*- coding: utf-8 -*-
+
+import array
+from datetime import datetime
+import numpy as np
+import re
+import struct
+import six
+from nd2reader.model.image import Image
+
+
+class V3ImageReader(object):
+    def __init__(self, metadata):
+        self._metadata = metadata
+
+    def _calculate_field_of_view(self, frame_number):
+        images_per_cycle = len(self._metadata.z_levels) * len(self._metadata.channels)
+        return int((frame_number - (frame_number % images_per_cycle)) / images_per_cycle) % len(self._metadata.fields_of_view)
+
+    def _calculate_channel(self, frame_number):
+        return self._metadata.channels[frame_number % len(self._metadata.channels)]
+
+    def _calculate_z_level(self, frame_number):
+        return self._metadata.z_levels[int(((frame_number - (frame_number % len(self._metadata.channels))) / len(self._metadata.channels)) % len(self._metadata.z_levels))]
+
+    def _calculate_image_group_number(self, time_index, fov, z_level):
+        """
+        Images are grouped together if they share the same time index, field of view, and z-level.
+
+        :type time_index: int
+        :type fov: int
+        :type z_level: int
+
+        :rtype: int
+
+        """
+        return time_index * len(self._metadata.fields_of_view) * len(self._metadata.z_levels) + (fov * len(self._metadata.z_levels) + z_level)
+
+    def _calculate_frame_number(self, image_group_number, fov, z_level):
+        return (image_group_number - (fov * len(self._metadata.z_levels) + z_level)) / (len(self._metadata.fields_of_view) * len(self._metadata.z_levels))
+
+    def get_image(self, index):
+        channel_offset = index % len(self._metadata.channels)
+        fov = self._calculate_field_of_view(index)
+        channel = self._calculate_channel(index)
+        z_level = self._calculate_z_level(index)
+        image_group_number = int(index / len(self._metadata.channels))
+        frame_number = self._calculate_frame_number(image_group_number, fov, z_level)
+        timestamp, image = self._get_raw_image_data(image_group_number, channel_offset, self._metadata.height, self._metadata.width)
+        image.add_params(timestamp, frame_number, fov, channel, z_level)
+
+    @property
+    def _channel_offset(self):
+        """
+        Image data is interleaved for each image set. That is, if there are four images in a set, the first image
+        will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth.
+
+        :rtype: dict
+
+        """
+        channel_offset = {}
+        for n, channel in enumerate(self._channels):
+            channel_offset[channel] = n
+        return channel_offset
+
+    def _get_raw_image_data(self, image_group_number, channel_offset, height, width):
+        """
+        Reads the raw bytes and the timestamp of an image.
+
+        :param image_group_number: groups are made of images with the same time index, field of view and z-level.
+        :type image_group_number: int
+        :param channel_offset: the offset in the array where the bytes for this image are found.
+        :type channel_offset: int
+
+        :return: (int, array.array()) or None
+
+        """
+        chunk = self._label_map[six.b("ImageDataSeq|%d!" % image_group_number)]
+        data = self._read_chunk(chunk)
+        # All images in the same image group share the same timestamp! So if you have complicated image data,
+        # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few
+        # seconds unless you're doing something super weird.
+        timestamp = struct.unpack("d", data[:8])[0]
+        image_group_data = array.array("H", data)
+        image_data_start = 4 + channel_offset
+        # The images for the various channels are interleaved within the same array. For example, the second image
+        # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design
+        # a data structure that way, please send the author of this library a message.
+        image_data = np.reshape(image_group_data[image_data_start::len(self.channels)], (height, width))
+        # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you
+        # don't have the same number of images each cycle. We discovered this because we only took GFP images every
+        # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take
+        # them every cycle.
+        if np.any(image_data):
+            return timestamp, Image(image_data)
+        return None
diff --git a/nd2reader/interface.py b/nd2reader/interface.py
index fa48a92..a54b5dd 100644
--- a/nd2reader/interface.py
+++ b/nd2reader/interface.py
@@ -12,8 +12,8 @@ class Nd2(object):
 
     """
     def __init__(self, filename):
-        version = get_version(filename)
-        parser = get_parser(filename, version)
+        major_version, minor_version = get_version(filename)
+        parser = get_parser(filename, major_version, minor_version)
         self._driver = parser.driver
         self._metadata = parser.metadata
         self._filename = filename
@@ -89,7 +89,7 @@ class Nd2(object):
         :return: model.ImageSet()
 
         """
-        warnings.warn("nd2.image_sets will be removed from the nd2reader library in the near future.", DeprecationWarning)
+        warnings.warn("Nd2.image_sets will be removed from the nd2reader library in the near future.", DeprecationWarning)
 
         for frame in self.frames:
             image_group = ImageGroup()
diff --git a/nd2reader/model/metadata.py b/nd2reader/model/metadata.py
new file mode 100644
index 0000000..cf7d140
--- /dev/null
+++ b/nd2reader/model/metadata.py
@@ -0,0 +1,62 @@
+class Metadata(object):
+    """ A simple container for ND2 metadata. """
+    def __init__(self, channels, date, fields_of_view, frames, z_levels):
+        self._channels = channels
+        self._date = date
+        self._fields_of_view = fields_of_view
+        self._frames = frames
+        self._z_levels = z_levels
+
+    @property
+    def date(self):
+        """
+        The date and time when acquisition began.
+
+        :rtype: datetime.datetime()
+
+        """
+        return self._date
+
+    @property
+    def channels(self):
+        """
+        These are labels created by the NIS Elements user. Typically they may a short description of the filter cube
+        used (e.g. "bright field", "GFP", etc.)
+
+        :rtype: list
+
+        """
+        return self._channels
+
+    @property
+    def fields_of_view(self):
+        """
+        The metadata contains information about fields of view, but it contains it even if some fields
+        of view were cropped. We can't find anything that states which fields of view are actually
+        in the image data, so we have to calculate it. There probably is something somewhere, since
+        NIS Elements can figure it out, but we haven't found it yet.
+
+        :rtype: list
+
+        """
+        return self._fields_of_view
+
+    @property
+    def frames(self):
+        """
+        The number of cycles.
+
+        :rtype:     list
+
+        """
+        return self._frames
+
+    @property
+    def z_levels(self):
+        """
+        The different levels in the Z-plane. Just a sequence from 0 to n.
+
+        :rtype: list
+
+        """
+        return self._z_levels
diff --git a/nd2reader/parser/__init__.py b/nd2reader/parser/__init__.py
new file mode 100644
index 0000000..24ce42d
--- /dev/null
+++ b/nd2reader/parser/__init__.py
@@ -0,0 +1 @@
+from . parser import get_parser
diff --git a/nd2reader/parser/parser.py b/nd2reader/parser/parser.py
new file mode 100644
index 0000000..cc09374
--- /dev/null
+++ b/nd2reader/parser/parser.py
@@ -0,0 +1,2 @@
+def get_parser(filename, major_version, minor_version):
+    parsers = {}
diff --git a/nd2reader/parser.py b/nd2reader/parser/v2.py
similarity index 100%
rename from nd2reader/parser.py
rename to nd2reader/parser/v2.py
diff --git a/nd2reader/parser/v3.py b/nd2reader/parser/v3.py
new file mode 100644
index 0000000..e17dbb0
--- /dev/null
+++ b/nd2reader/parser/v3.py
@@ -0,0 +1,295 @@
+# -*- coding: utf-8 -*-
+
+import array
+from datetime import datetime
+from nd2reader.model.metadata import Metadata
+import re
+import six
+import struct
+
+
+class V3Parser(object):
+    """ Parses ND2 files and creates a Metadata and ImageReader object. """
+    CHUNK_HEADER = 0xabeceda
+    CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!")
+    CHUNK_MAP_END = six.b("ND2 CHUNK MAP SIGNATURE 0000001!")
+
+    def __init__(self, filename):
+        self._filename = filename
+        self._fh = None
+        self._metadata = None
+
+    def _parse_date(self, metadata_dict):
+        """
+        The date and time when acquisition began.
+
+        :rtype: datetime.datetime()
+
+        """
+        for line in metadata_dict[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values():
+            line = line.decode("utf8")
+            absolute_start_12 = None
+            absolute_start_24 = None
+            # ND2s seem to randomly switch between 12- and 24-hour representations.
+            try:
+                absolute_start_24 = datetime.strptime(line, "%m/%d/%Y  %H:%M:%S")
+            except (TypeError, ValueError):
+                pass
+            try:
+                absolute_start_12 = datetime.strptime(line, "%m/%d/%Y  %I:%M:%S %p")
+            except (TypeError, ValueError):
+                pass
+            if not absolute_start_12 and not absolute_start_24:
+                continue
+            return absolute_start_12 if absolute_start_12 else absolute_start_24
+        raise ValueError("This ND2 has no recorded start time. This is probably a bug.")
+
+    def _parse_channels(self, metadata_dict):
+        """
+        These are labels created by the NIS Elements user. Typically they may a short description of the filter cube
+        used (e.g. "bright field", "GFP", etc.)
+
+        :rtype: list
+
+        """
+        channels = []
+        metadata = metadata_dict[six.b('ImageMetadataSeq')][six.b('SLxPictureMetadata')][six.b('sPicturePlanes')]
+        try:
+            validity = metadata_dict[six.b('ImageMetadata')][six.b('SLxExperiment')][six.b('ppNextLevelEx')][six.b('')][0][six.b('ppNextLevelEx')][six.b('')][0][six.b('pItemValid')]
+        except KeyError:
+            # If none of the channels have been deleted, there is no validity list, so we just make one
+            validity = [True for _ in metadata]
+        # Channel information is contained in dictionaries with the keys a0, a1...an where the number
+        # indicates the order in which the channel is stored. So by sorting the dicts alphabetically
+        # we get the correct order.
+        for (label, chan), valid in zip(sorted(metadata[six.b('sPlaneNew')].items()), validity):
+            if not valid:
+                continue
+            channels.append(chan[six.b('sDescription')].decode("utf8"))
+        return channels
+
+    def _parse_fields_of_view(self, metadata_dict):
+        """
+        The metadata contains information about fields of view, but it contains it even if some fields
+        of view were cropped. We can't find anything that states which fields of view are actually
+        in the image data, so we have to calculate it. There probably is something somewhere, since
+        NIS Elements can figure it out, but we haven't found it yet.
+
+        :rtype: list
+
+        """
+        return self._parse_dimension(r""".*?XY\((\d+)\).*?""", metadata_dict)
+
+    def _parse_frames(self, metadata_dict):
+        """
+        The number of cycles.
+
+        :rtype:     list
+
+        """
+        return self._parse_dimension(r""".*?T'\((\d+)\).*?""", metadata_dict)
+
+    def _parse_z_levels(self, metadata_dict):
+        """
+        The different levels in the Z-plane. Just a sequence from 0 to n.
+
+        :rtype: list
+
+        """
+        return self._parse_dimension(r""".*?Z\((\d+)\).*?""", metadata_dict)
+
+    @property
+    def _file_handle(self):
+        if self._fh is None:
+            self._fh = open(self._filename, "rb")
+        return self._fh
+
+    def _parse_dimension_text(self, metadata_dict):
+        """
+        While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable.
+        Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text
+        is always there and in the same exact format, so we just parse that instead.
+
+        :rtype: str
+
+        """
+        for line in metadata_dict[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values():
+            if six.b("Dimensions:") in line:
+                metadata = line
+                break
+        else:
+            raise ValueError("Could not parse metadata dimensions!")
+        for line in metadata.split(six.b("\r\n")):
+            if line.startswith(six.b("Dimensions:")):
+                dimension_text = line
+                break
+        else:
+            raise ValueError("Could not parse metadata dimensions!")
+        return dimension_text
+
+    def _parse_dimension(self, pattern, metadata_dict):
+        try:
+            dimension_text = self._parse_dimension_text(metadata_dict)
+            count = int(re.match(pattern, dimension_text).group(1))
+        except AttributeError:
+            return [0]
+        except TypeError:
+            match = re.match(pattern, dimension_text.decode("utf8"))
+            if not match:
+                return [0]
+            return list(range(int(match.group(1))))
+        else:
+            return list(range(count))
+
+    def _parse_total_images_per_channel(self, metadata_dict):
+        """
+        The total number of images per channel. Warning: this may be inaccurate as it includes "gap" images.
+
+        :rtype: int
+
+        """
+        return metadata_dict[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiSequenceCount')]
+
+    def _parse_metadata(self):
+        """
+        Reads all metadata.
+
+        """
+        metadata_dict = {}
+        label_map = self._build_label_map()
+        for label in label_map.keys():
+            if label.endswith(six.b("LV!")) or six.b("LV|") in label:
+                data = self._read_chunk(label_map[label])
+                stop = label.index(six.b("LV"))
+                metadata_dict[label[:stop]] = self._read_metadata(data, 1)
+
+        channels = self._parse_channels(metadata_dict)
+        date = self._parse_fields_of_view(metadata_dict)
+        fields_of_view = self._parse_fields_of_view(metadata_dict)
+        frames = self._parse_frames(metadata_dict)
+        z_levels = self._parse_z_levels(metadata_dict)
+        self._metadata = Metadata(channels, date, fields_of_view, frames, z_levels)
+
+    def _build_label_map(self):
+        """
+        Every label ends with an exclamation point, however, we can't directly search for those to find all the labels
+        as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label,
+        grab the subsequent data (always 16 bytes long), advance to the next label and repeat.
+
+        :rtype: dict
+
+        """
+        label_map = {}
+        self._file_handle.seek(-8, 2)
+        chunk_map_start_location = struct.unpack("Q", self._file_handle.read(8))[0]
+        self._file_handle.seek(chunk_map_start_location)
+        raw_text = self._file_handle.read(-1)
+        label_start = raw_text.index(V3Parser.CHUNK_MAP_START) + 32
+
+        while True:
+            data_start = raw_text.index(six.b("!"), label_start) + 1
+            key = raw_text[label_start: data_start]
+            location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16])
+            if key == V3Parser.CHUNK_MAP_END:
+                # We've reached the end of the chunk map
+                break
+            label_map[key] = location
+            label_start = data_start + 16
+        return label_map
+
+    def _read_chunk(self, chunk_location):
+        """
+        Gets the data for a given chunk pointer
+
+        """
+        self._file_handle.seek(chunk_location)
+        # The chunk metadata is always 16 bytes long
+        chunk_metadata = self._file_handle.read(16)
+        header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata)
+        if header != V3Parser.CHUNK_HEADER:
+            raise ValueError("The ND2 file seems to be corrupted.")
+        # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the
+        # start of the actual data field, which is at some arbitrary place after the metadata.
+        self._file_handle.seek(chunk_location + 16 + relative_offset)
+        return self._file_handle.read(data_length)
+
+    def _parse_unsigned_char(self, data):
+        return struct.unpack("B", data.read(1))[0]
+
+    def _parse_unsigned_int(self, data):
+        return struct.unpack("I", data.read(4))[0]
+
+    def _parse_unsigned_long(self, data):
+        return struct.unpack("Q", data.read(8))[0]
+
+    def _parse_double(self, data):
+        return struct.unpack("d", data.read(8))[0]
+
+    def _parse_string(self, data):
+        value = data.read(2)
+        while not value.endswith(six.b("\x00\x00")):
+            # the string ends at the first instance of \x00\x00
+            value += data.read(2)
+        return value.decode("utf16")[:-1].encode("utf8")
+
+    def _parse_char_array(self, data):
+        array_length = struct.unpack("Q", data.read(8))[0]
+        return array.array("B", data.read(array_length))
+
+    def _parse_metadata_item(self, data):
+        """
+        Reads hierarchical data, analogous to a Python dict.
+
+        """
+        new_count, length = struct.unpack("<IQ", data.read(12))
+        length -= data.tell() - self._cursor_position
+        next_data_length = data.read(length)
+        value = self._read_metadata(next_data_length, new_count)
+        # Skip some offsets
+        data.read(new_count * 8)
+        return value
+
+    def _get_value(self, data, data_type):
+        """
+        ND2s use various codes to indicate different data types, which we translate here.
+
+        """
+        parser = {1: self._parse_unsigned_char,
+                  2: self._parse_unsigned_int,
+                  3: self._parse_unsigned_int,
+                  5: self._parse_unsigned_long,
+                  6: self._parse_double,
+                  8: self._parse_string,
+                  9: self._parse_char_array,
+                  11: self._parse_metadata_item}
+        return parser[data_type](data)
+
+    def _read_metadata(self, data, count):
+        """
+        Iterates over each element some section of the metadata and parses it.
+
+        """
+        data = six.BytesIO(data)
+        metadata = {}
+        for _ in range(count):
+            self._cursor_position = data.tell()
+            header = data.read(2)
+            if not header:
+                # We've reached the end of some hierarchy of data
+                break
+            if six.PY3:
+                header = header.decode("utf8")
+            data_type, name_length = map(ord, header)
+            name = data.read(name_length * 2).decode("utf16")[:-1].encode("utf8")
+            value = self._get_value(data, data_type)
+            if name not in metadata.keys():
+                metadata[name] = value
+            else:
+                if not isinstance(metadata[name], list):
+                    # We have encountered this key exactly once before. Since we're seeing it again, we know we
+                    # need to convert it to a list before proceeding.
+                    metadata[name] = [metadata[name]]
+                # We've encountered this key before so we're guaranteed to be dealing with a list. Thus we append
+                # the value to the already-existing list.
+                metadata[name].append(value)
+        return metadata