From 949d0612d49ae3de3dac82d3339f164aa5c863fb Mon Sep 17 00:00:00 2001 From: jim Date: Fri, 18 Sep 2015 21:12:08 -0500 Subject: [PATCH 1/8] #66 began enormous refactor to support multiple versions of ND2s in an elegant way --- nd2reader/__init__.py | 153 +------------- nd2reader/driver/__init__.py | 1 + nd2reader/driver/driver.py | 11 + nd2reader/driver/v2.py | 0 nd2reader/driver/v3.py | 386 +++++++++++++++++++++++++++++++++++ nd2reader/driver/version.py | 36 ++++ nd2reader/interface.py | 154 ++++++++++++++ nd2reader/model/__init__.py | 157 +------------- nd2reader/model/group.py | 37 ++++ nd2reader/model/image.py | 115 +++++++++++ nd2reader/parser.py | 386 ----------------------------------- tests/__init__.py | 366 ++++++++++++++++----------------- tests/driver/__init__.py | 0 tests/driver/driver.py | 7 + tests/driver/version.py | 16 ++ 15 files changed, 949 insertions(+), 876 deletions(-) create mode 100644 nd2reader/driver/__init__.py create mode 100644 nd2reader/driver/driver.py create mode 100644 nd2reader/driver/v2.py create mode 100644 nd2reader/driver/v3.py create mode 100644 nd2reader/driver/version.py create mode 100644 nd2reader/interface.py create mode 100644 nd2reader/model/group.py create mode 100644 nd2reader/model/image.py create mode 100644 tests/driver/__init__.py create mode 100644 tests/driver/driver.py create mode 100644 tests/driver/version.py diff --git a/nd2reader/__init__.py b/nd2reader/__init__.py index cf19770..d845fad 100644 --- a/nd2reader/__init__.py +++ b/nd2reader/__init__.py @@ -1,152 +1 @@ -# -*- coding: utf-8 -*- - -from nd2reader.model import Image, ImageSet -from nd2reader.parser import Nd2Parser -import six - - -class Nd2(Nd2Parser): - """ - Allows easy access to NIS Elements .nd2 image files. - - """ - def __init__(self, filename): - super(Nd2, self).__init__(filename) - self._filename = filename - - def __repr__(self): - return "\n".join(["" % self._filename, - "Created: %s" % self.absolute_start.strftime("%Y-%m-%d %H:%M:%S"), - "Image size: %sx%s (HxW)" % (self.height, self.width), - "Image cycles: %s" % len(self.time_indexes), - "Channels: %s" % ", ".join(["'%s'" % str(channel) for channel in self.channels]), - "Fields of View: %s" % len(self.fields_of_view), - "Z-Levels: %s" % len(self.z_levels) - ]) - - def __len__(self): - """ - This should be the total number of images in the ND2, but it may be inaccurate. If the ND2 contains a - different number of images in a cycle (i.e. there are "gap" images) it will be higher than reality. - - :rtype: int - - """ - return self._total_images_per_channel * len(self.channels) - - def __getitem__(self, item): - """ - Allows slicing ND2s. - - >>> nd2 = Nd2("my_images.nd2") - >>> image = nd2[16] # gets 17th frame - >>> for image in nd2[100:200]: # iterate over the 100th to 200th images - >>> do_something(image.data) - >>> for image in nd2[::-1]: # iterate backwards - >>> do_something(image.data) - >>> for image in nd2[37:422:17]: # do something super weird if you really want to - >>> do_something(image.data) - - :type item: int or slice - :rtype: nd2reader.model.Image() or generator - - """ - if isinstance(item, int): - try: - channel_offset = item % len(self.channels) - fov = self._calculate_field_of_view(item) - channel = self._calculate_channel(item) - z_level = self._calculate_z_level(item) - image_group_number = int(item / len(self.channels)) - frame_number = self._calculate_frame_number(image_group_number, fov, z_level) - timestamp, raw_image_data = self._get_raw_image_data(image_group_number, channel_offset) - image = Image(timestamp, frame_number, raw_image_data, fov, channel, z_level, self.height, self.width) - except (TypeError, ValueError): - return None - except KeyError: - raise IndexError("Invalid frame number.") - else: - return image - elif isinstance(item, slice): - return self._slice(item.start, item.stop, item.step) - raise IndexError - - def _slice(self, start, stop, step): - """ - Allows for iteration over a selection of the entire dataset. - - :type start: int - :type stop: int - :type step: int - :rtype: nd2reader.model.Image() or None - - """ - start = start if start is not None else 0 - step = step if step is not None else 1 - stop = stop if stop is not None else len(self) - # This weird thing with the step allows you to iterate backwards over the images - for i in range(start, stop)[::step]: - yield self[i] - - @property - def image_sets(self): - """ - Iterates over groups of related images. This is useful if your ND2 contains multiple fields of view. - A typical use case might be that you have, say, four areas of interest that you're monitoring, and every - minute you take a bright field and GFP image of each one. For each cycle, this method would produce four - ImageSet objects, each containing one bright field and one GFP image. - - :return: model.ImageSet() - - """ - for time_index in self.time_indexes: - image_set = ImageSet() - for fov in self.fields_of_view: - for channel_name in self.channels: - for z_level in self.z_levels: - image = self.get_image(time_index, fov, channel_name, z_level) - if image is not None: - image_set.add(image) - yield image_set - - @property - def height(self): - """ - :return: height of each image, in pixels - :rtype: int - - """ - return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiHeight')] - - @property - def width(self): - """ - :return: width of each image, in pixels - :rtype: int - - """ - return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiWidth')] - - def get_image(self, frame_number, field_of_view, channel_name, z_level): - """ - Returns an Image if data exists for the given parameters, otherwise returns None. In general, you should avoid - using this method unless you're very familiar with the structure of ND2 files. - - :type frame_number: int - :param field_of_view: the label for the place in the XY-plane where this image was taken. - :type field_of_view: int - :param channel_name: the name of the color of this image - :type channel_name: str - :param z_level: the label for the location in the Z-plane where this image was taken. - :type z_level: int - :rtype: nd2reader.model.Image() or None - - """ - image_group_number = self._calculate_image_group_number(frame_number, field_of_view, z_level) - try: - timestamp, raw_image_data = self._get_raw_image_data(image_group_number, self._channel_offset[channel_name]) - image = Image(timestamp, frame_number, raw_image_data, field_of_view, channel_name, z_level, self.height, self.width) - except TypeError: - return None - else: - return image +from nd2reader.interface import Nd2 diff --git a/nd2reader/driver/__init__.py b/nd2reader/driver/__init__.py new file mode 100644 index 0000000..16b2735 --- /dev/null +++ b/nd2reader/driver/__init__.py @@ -0,0 +1 @@ +from nd2reader.driver.driver import get_driver diff --git a/nd2reader/driver/driver.py b/nd2reader/driver/driver.py new file mode 100644 index 0000000..8358870 --- /dev/null +++ b/nd2reader/driver/driver.py @@ -0,0 +1,11 @@ +def get_driver(filename, version): + """ + Instantiates the correct driver for the ND2, which allows us to parse metadata and access images. + + :param filename: the path to the ND2 + :type filename: str + :param version: the version of the ND2. Note that this is different than the version of NIS Elements used to create the ND2. + :type version: tuple + + """ + return 1 diff --git a/nd2reader/driver/v2.py b/nd2reader/driver/v2.py new file mode 100644 index 0000000..e69de29 diff --git a/nd2reader/driver/v3.py b/nd2reader/driver/v3.py new file mode 100644 index 0000000..0dca2aa --- /dev/null +++ b/nd2reader/driver/v3.py @@ -0,0 +1,386 @@ +# -*- coding: utf-8 -*- + +import array +from datetime import datetime +import numpy as np +import re +import struct +import six + + +class Nd2Parser(object): + """ + Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data. + You should not ever need to instantiate this class manually unless you're a developer. + + """ + CHUNK_HEADER = 0xabeceda + CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!") + CHUNK_MAP_END = six.b("ND2 CHUNK MAP SIGNATURE 0000001!") + + def __init__(self, filename): + self._absolute_start = None + self._filename = filename + self._fh = None + self._channels = None + self._channel_count = None + self._chunk_map_start_location = None + self._cursor_position = 0 + self._dimension_text = None + self._fields_of_view = None + self._label_map = {} + self.metadata = {} + self._read_map() + self._time_indexes = None + self._parse_metadata() + self._z_levels = None + + @property + def absolute_start(self): + """ + The date and time when acquisition began. + + :rtype: datetime.datetime() + + """ + if self._absolute_start is None: + for line in self.metadata[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values(): + line = line.decode("utf8") + absolute_start_12 = None + absolute_start_24 = None + # ND2s seem to randomly switch between 12- and 24-hour representations. + try: + absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S") + except (TypeError, ValueError): + pass + try: + absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p") + except (TypeError, ValueError): + pass + if not absolute_start_12 and not absolute_start_24: + continue + return absolute_start_12 if absolute_start_12 else absolute_start_24 + raise ValueError("This ND2 has no recorded start time. This is probably a bug.") + return self._absolute_start + + @property + def channels(self): + """ + These are labels created by the NIS Elements user. Typically they may a short description of the filter cube + used (e.g. "bright field", "GFP", etc.) + + :rtype: list + + """ + if not self._channels: + self._channels = [] + metadata = self.metadata[six.b('ImageMetadataSeq')][six.b('SLxPictureMetadata')][six.b('sPicturePlanes')] + try: + validity = self.metadata[six.b('ImageMetadata')][six.b('SLxExperiment')][six.b('ppNextLevelEx')][six.b('')][0][six.b('ppNextLevelEx')][six.b('')][0][six.b('pItemValid')] + except KeyError: + # If none of the channels have been deleted, there is no validity list, so we just make one + validity = [True for _ in metadata] + # Channel information is contained in dictionaries with the keys a0, a1...an where the number + # indicates the order in which the channel is stored. So by sorting the dicts alphabetically + # we get the correct order. + for (label, chan), valid in zip(sorted(metadata[six.b('sPlaneNew')].items()), validity): + if not valid: + continue + self._channels.append(chan[six.b('sDescription')].decode("utf8")) + return self._channels + + @property + def fields_of_view(self): + """ + The metadata contains information about fields of view, but it contains it even if some fields + of view were cropped. We can't find anything that states which fields of view are actually + in the image data, so we have to calculate it. There probably is something somewhere, since + NIS Elements can figure it out, but we haven't found it yet. + + :rtype: list + + """ + if self._fields_of_view is None: + self._fields_of_view = self._parse_dimension_text(r""".*?XY\((\d+)\).*?""") + return self._fields_of_view + + @property + def frames(self): + """ + The number of cycles. + + :rtype: list + + """ + if self._time_indexes is None: + self._time_indexes = self._parse_dimension_text(r""".*?T'\((\d+)\).*?""") + return self._time_indexes + + @property + def z_levels(self): + """ + The different levels in the Z-plane. Just a sequence from 0 to n. + + :rtype: list + + """ + if self._z_levels is None: + self._z_levels = self._parse_dimension_text(r""".*?Z\((\d+)\).*?""") + return self._z_levels + + def _calculate_field_of_view(self, frame_number): + images_per_cycle = len(self.z_levels) * len(self.channels) + return int((frame_number - (frame_number % images_per_cycle)) / images_per_cycle) % len(self.fields_of_view) + + def _calculate_channel(self, frame_number): + return self.channels[frame_number % len(self.channels)] + + def _calculate_z_level(self, frame_number): + return self.z_levels[int(((frame_number - (frame_number % len(self.channels))) / len(self.channels)) % len(self.z_levels))] + + @property + def _file_handle(self): + if self._fh is None: + self._fh = open(self._filename, "rb") + return self._fh + + def _get_raw_image_data(self, image_group_number, channel_offset): + """ + Reads the raw bytes and the timestamp of an image. + + :param image_group_number: groups are made of images with the same time index, field of view and z-level. + :type image_group_number: int + :param channel_offset: the offset in the array where the bytes for this image are found. + :type channel_offset: int + + :return: (int, array.array()) or None + + """ + chunk = self._label_map[six.b("ImageDataSeq|%d!" % image_group_number)] + data = self._read_chunk(chunk) + # All images in the same image group share the same timestamp! So if you have complicated image data, + # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few + # seconds unless you're doing something super weird. + timestamp = struct.unpack("d", data[:8])[0] + image_group_data = array.array("H", data) + image_data_start = 4 + channel_offset + # The images for the various channels are interleaved within the same array. For example, the second image + # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design + # a data structure that way, please send the author of this library a message. + image_data = image_group_data[image_data_start::len(self.channels)] + # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you + # don't have the same number of images each cycle. We discovered this because we only took GFP images every + # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take + # them every cycle. + if np.any(image_data): + return timestamp, image_data + return None + + @property + def _dimensions(self): + """ + While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable. + Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text + is always there and in the same exact format, so we just parse that instead. + + :rtype: str + + """ + if self._dimension_text is None: + for line in self.metadata[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values(): + if six.b("Dimensions:") in line: + metadata = line + break + else: + raise ValueError("Could not parse metadata dimensions!") + for line in metadata.split(six.b("\r\n")): + if line.startswith(six.b("Dimensions:")): + self._dimension_text = line + break + else: + raise ValueError("Could not parse metadata dimensions!") + return self._dimension_text + + def _calculate_image_group_number(self, time_index, fov, z_level): + """ + Images are grouped together if they share the same time index, field of view, and z-level. + + :type time_index: int + :type fov: int + :type z_level: int + + :rtype: int + + """ + return time_index * len(self.fields_of_view) * len(self.z_levels) + (fov * len(self.z_levels) + z_level) + + def _calculate_frame_number(self, image_group_number, fov, z_level): + return (image_group_number - (fov * len(self.z_levels) + z_level)) / (len(self.fields_of_view) * len(self.z_levels)) + + @property + def _channel_offset(self): + """ + Image data is interleaved for each image set. That is, if there are four images in a set, the first image + will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. + + :rtype: dict + + """ + channel_offset = {} + for n, channel in enumerate(self._channels): + channel_offset[channel] = n + return channel_offset + + def _parse_dimension_text(self, pattern): + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return [0] + except TypeError: + match = re.match(pattern, self._dimensions.decode("utf8")) + if not match: + return [0] + return list(range(int(match.group(1)))) + else: + return list(range(count)) + + @property + def _total_images_per_channel(self): + """ + The total number of images per channel. Warning: this may be inaccurate as it includes "gap" images. + + :rtype: int + + """ + return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiSequenceCount')] + + def _parse_metadata(self): + """ + Reads all metadata. + + """ + for label in self._label_map.keys(): + if label.endswith(six.b("LV!")) or six.b("LV|") in label: + data = self._read_chunk(self._label_map[label]) + stop = label.index(six.b("LV")) + self.metadata[label[:stop]] = self._read_metadata(data, 1) + + def _read_map(self): + """ + Every label ends with an exclamation point, however, we can't directly search for those to find all the labels + as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label, + grab the subsequent data (always 16 bytes long), advance to the next label and repeat. + + """ + self._file_handle.seek(-8, 2) + chunk_map_start_location = struct.unpack("Q", self._file_handle.read(8))[0] + self._file_handle.seek(chunk_map_start_location) + raw_text = self._file_handle.read(-1) + label_start = raw_text.index(Nd2Parser.CHUNK_MAP_START) + 32 + + while True: + data_start = raw_text.index(six.b("!"), label_start) + 1 + key = raw_text[label_start: data_start] + location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16]) + if key == Nd2Parser.CHUNK_MAP_END: + # We've reached the end of the chunk map + break + self._label_map[key] = location + label_start = data_start + 16 + + def _read_chunk(self, chunk_location): + """ + Gets the data for a given chunk pointer + + """ + self._file_handle.seek(chunk_location) + # The chunk metadata is always 16 bytes long + chunk_metadata = self._file_handle.read(16) + header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) + if header != Nd2Parser.CHUNK_HEADER: + raise ValueError("The ND2 file seems to be corrupted.") + # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the + # start of the actual data field, which is at some arbitrary place after the metadata. + self._file_handle.seek(chunk_location + 16 + relative_offset) + return self._file_handle.read(data_length) + + def _parse_unsigned_char(self, data): + return struct.unpack("B", data.read(1))[0] + + def _parse_unsigned_int(self, data): + return struct.unpack("I", data.read(4))[0] + + def _parse_unsigned_long(self, data): + return struct.unpack("Q", data.read(8))[0] + + def _parse_double(self, data): + return struct.unpack("d", data.read(8))[0] + + def _parse_string(self, data): + value = data.read(2) + while not value.endswith(six.b("\x00\x00")): + # the string ends at the first instance of \x00\x00 + value += data.read(2) + return value.decode("utf16")[:-1].encode("utf8") + + def _parse_char_array(self, data): + array_length = struct.unpack("Q", data.read(8))[0] + return array.array("B", data.read(array_length)) + + def _parse_metadata_item(self, data): + """ + Reads hierarchical data, analogous to a Python dict. + + """ + new_count, length = struct.unpack("\d)\.(?P\d)$""", data) + if match: + # We haven't seen a lot of ND2s but the ones we have seen conform to this + return int(match.group('major')), int(match.group('minor')) + raise InvalidVersionError("The version of the ND2 you specified is not supported.") diff --git a/nd2reader/interface.py b/nd2reader/interface.py new file mode 100644 index 0000000..0ff6c90 --- /dev/null +++ b/nd2reader/interface.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- + +from nd2reader.model import Image, ImageGroup +from nd2reader.driver import get_driver +from nd2reader.driver.version import get_version +import six + + +class Nd2(object): + """ + Allows easy access to NIS Elements .nd2 image files. + + """ + def __init__(self, filename): + version = get_version(filename) + self._driver = get_driver(filename, version) + self._metadata = self._driver.get_metadata() + + def __repr__(self): + return "\n".join(["" % self._driver._filename, + "Created: %s" % self._driver.absolute_start, + "Image size: %sx%s (HxW)" % (self.height, self.width), + "Frames: %s" % len(self.frames), + "Channels: %s" % ", ".join(["'%s'" % str(channel) for channel in self.channels]), + "Fields of View: %s" % len(self.fields_of_view), + "Z-Levels: %s" % len(self.z_levels) + ]) + + def __len__(self): + """ + This should be the total number of images in the ND2, but it may be inaccurate. If the ND2 contains a + different number of images in a cycle (i.e. there are "gap" images) it will be higher than reality. + + :rtype: int + + """ + return self._driver.total_images_per_channel * len(self.channels) + + def __getitem__(self, item): + """ + Allows slicing ND2s. + + >>> nd2 = Nd2("my_images.nd2") + >>> image = nd2[16] # gets 17th frame + >>> for image in nd2[100:200]: # iterate over the 100th to 200th images + >>> do_something(image.data) + >>> for image in nd2[::-1]: # iterate backwards + >>> do_something(image.data) + >>> for image in nd2[37:422:17]: # do something super weird if you really want to + >>> do_something(image.data) + + :type item: int or slice + :rtype: nd2reader.model.Image() or generator + + """ + if isinstance(item, int): + try: + channel_offset = item % len(self.channels) + fov = self._calculate_field_of_view(item) + channel = self._calculate_channel(item) + z_level = self._calculate_z_level(item) + image_group_number = int(item / len(self.channels)) + frame_number = self._calculate_frame_number(image_group_number, fov, z_level) + timestamp, raw_image_data = self._get_raw_image_data(image_group_number, channel_offset) + image = Image(timestamp, frame_number, raw_image_data, fov, channel, z_level, self.height, self.width) + except (TypeError, ValueError): + return None + except KeyError: + raise IndexError("Invalid frame number.") + else: + return image + elif isinstance(item, slice): + return self._slice(item.start, item.stop, item.step) + raise IndexError + + def _slice(self, start, stop, step): + """ + Allows for iteration over a selection of the entire dataset. + + :type start: int + :type stop: int + :type step: int + :rtype: nd2reader.model.Image() or None + + """ + start = start if start is not None else 0 + step = step if step is not None else 1 + stop = stop if stop is not None else len(self) + # This weird thing with the step allows you to iterate backwards over the images + for i in range(start, stop)[::step]: + yield self[i] + + @property + def image_sets(self): + """ + Iterates over groups of related images. This is useful if your ND2 contains multiple fields of view. + A typical use case might be that you have, say, four areas of interest that you're monitoring, and every + minute you take a bright field and GFP image of each one. For each cycle, this method would produce four + ImageSet objects, each containing one bright field and one GFP image. + + :return: model.ImageSet() + + """ + for time_index in self.time_indexes: + image_set = ImageGroup() + for fov in self.fields_of_view: + for channel_name in self.channels: + for z_level in self.z_levels: + image = self.get_image(time_index, fov, channel_name, z_level) + if image is not None: + image_set.add(image) + yield image_set + + @property + def height(self): + """ + :return: height of each image, in pixels + :rtype: int + + """ + return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiHeight')] + + @property + def width(self): + """ + :return: width of each image, in pixels + :rtype: int + + """ + return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiWidth')] + + def get_image(self, frame_number, field_of_view, channel_name, z_level): + """ + Returns an Image if data exists for the given parameters, otherwise returns None. In general, you should avoid + using this method unless you're very familiar with the structure of ND2 files. + + :type frame_number: int + :param field_of_view: the label for the place in the XY-plane where this image was taken. + :type field_of_view: int + :param channel_name: the name of the color of this image + :type channel_name: str + :param z_level: the label for the location in the Z-plane where this image was taken. + :type z_level: int + :rtype: nd2reader.model.Image() or None + + """ + image_group_number = self._calculate_image_group_number(frame_number, field_of_view, z_level) + try: + timestamp, raw_image_data = self._get_raw_image_data(image_group_number, self._channel_offset[channel_name]) + image = Image(timestamp, frame_number, raw_image_data, field_of_view, channel_name, z_level, self.height, self.width) + except TypeError: + return None + else: + return image diff --git a/nd2reader/model/__init__.py b/nd2reader/model/__init__.py index df1ff5f..f7d3fa8 100644 --- a/nd2reader/model/__init__.py +++ b/nd2reader/model/__init__.py @@ -1,155 +1,2 @@ -# -*- coding: utf-8 -*- - -import collections -import numpy as np -import logging - -log = logging.getLogger(__name__) - - -class Image(object): - def __init__(self, timestamp, frame_number, raw_array, field_of_view, channel, z_level, height, width): - """ - A wrapper around the raw pixel data of an image. - - :param timestamp: The frame number relative to the . - :type timestamp: int - :param timestamp: The number of milliseconds after the beginning of the acquisition that this image was taken. - :type timestamp: int - :param raw_array: The raw sequence of bytes that represents the image. - :type raw_array: array.array() - :param field_of_view: The label for the place in the XY-plane where this image was taken. - :type field_of_view: int - :param channel: The name of the color of this image - :type channel: str - :param z_level: The label for the location in the Z-plane where this image was taken. - :type z_level: int - :param height: The height of the image in pixels. - :type height: int - :param width: The width of the image in pixels. - :type width: int - - """ - self._timestamp = timestamp - self._frame_number = int(frame_number) - self._raw_data = raw_array - self._field_of_view = field_of_view - self._channel = channel - self._z_level = z_level - self._height = height - self._width = width - self._data = None - - def __repr__(self): - return "\n".join(["", - "%sx%s (HxW)" % (self._height, self._width), - "Timestamp: %s" % self.timestamp, - "Frame: %s" % self._frame_number, - "Field of View: %s" % self.field_of_view, - "Channel: %s" % self.channel, - "Z-Level: %s" % self.z_level, - ]) - - @property - def data(self): - """ - The actual image data. - - :rtype np.array() - - """ - if self._data is None: - # The data is just a 1-dimensional array originally. - # We convert it to a 2D image here. - self._data = np.reshape(self._raw_data, (self._height, self._width)) - return self._data - - @property - def field_of_view(self): - """ - Which of the fixed locations this image was taken at. - - :rtype int: - - """ - return self._field_of_view - - @property - def timestamp(self): - """ - The number of seconds after the beginning of the acquisition that the image was taken. Note that for a given - field of view and z-level offset, if you have images of multiple channels, they will all be given the same - timestamp. No, this doesn't make much sense. But that's how ND2s are structured, so if your experiment depends - on millisecond accuracy, you need to find an alternative imaging system. - - :rtype float: - - """ - return self._timestamp / 1000.0 - - @property - def frame_number(self): - return self._frame_number - - @property - def channel(self): - """ - The name of the filter used to acquire this image. These are user-supplied in NIS Elements. - - :rtype str: - - """ - return self._channel - - @property - def z_level(self): - """ - The vertical offset of the image. These are simple integers starting from 0, where the 0 is the lowest - z-level and each subsequent level incremented by 1. - - For example, if you acquired images at -3 µm, 0 µm, and +3 µm, your z-levels would be: - - -3 µm: 0 - 0 µm: 1 - +3 µm: 2 - - :rtype int: - - """ - return self._z_level - - -class ImageSet(object): - """ - A group of images that were taken at roughly the same time. - - """ - def __init__(self): - self._images = collections.defaultdict(dict) - - def __len__(self): - """ The number of images in the image set. """ - return sum([len(channel) for channel in self._images.values()]) - - def __repr__(self): - return "\n".join(["", - "Image count: %s" % len(self)]) - - def get(self, channel, z_level=0): - """ - Retrieve an image with a given channel and z-level. For most users, z_level will always be 0. - - :type channel: str - :type z_level: int - - """ - return self._images.get(channel).get(z_level) - - def add(self, image): - """ - Stores an image. - - :type image: nd2reader.model.Image() - - """ - self._images[image.channel][image.z_level] = image +from nd2reader.model.image import Image +from nd2reader.model.group import ImageGroup diff --git a/nd2reader/model/group.py b/nd2reader/model/group.py new file mode 100644 index 0000000..8d6bf04 --- /dev/null +++ b/nd2reader/model/group.py @@ -0,0 +1,37 @@ +import collections + + +class ImageGroup(object): + """ + A group of images that were taken at roughly the same time and in the same field of view. + + """ + def __init__(self): + self._images = collections.defaultdict(dict) + + def __len__(self): + """ The number of images in the image set. """ + return sum([len(channel) for channel in self._images.values()]) + + def __repr__(self): + return "\n".join(["", + "Image count: %s" % len(self)]) + + def get(self, channel, z_level=0): + """ + Retrieve an image with a given channel and z-level. For most users, z_level will always be 0. + + :type channel: str + :type z_level: int + + """ + return self._images.get(channel).get(z_level) + + def add(self, image): + """ + Stores an image. + + :type image: nd2reader.model.Image() + + """ + self._images[image.channel][image.z_level] = image diff --git a/nd2reader/model/image.py b/nd2reader/model/image.py new file mode 100644 index 0000000..e81c8d1 --- /dev/null +++ b/nd2reader/model/image.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- + +import numpy as np + + +class Image(object): + def __init__(self, timestamp, frame_number, raw_array, field_of_view, channel, z_level, height, width): + """ + A wrapper around the raw pixel data of an image. + + :param timestamp: The frame number relative to the . + :type timestamp: int + :param timestamp: The number of milliseconds after the beginning of the acquisition that this image was taken. + :type timestamp: int + :param raw_array: The raw sequence of bytes that represents the image. + :type raw_array: array.array() + :param field_of_view: The label for the place in the XY-plane where this image was taken. + :type field_of_view: int + :param channel: The name of the color of this image + :type channel: str + :param z_level: The label for the location in the Z-plane where this image was taken. + :type z_level: int + :param height: The height of the image in pixels. + :type height: int + :param width: The width of the image in pixels. + :type width: int + + """ + self._timestamp = timestamp + self._frame_number = int(frame_number) + self._raw_data = raw_array + self._field_of_view = field_of_view + self._channel = channel + self._z_level = z_level + self._height = height + self._width = width + self._data = None + + def __repr__(self): + return "\n".join(["", + "%sx%s (HxW)" % (self._height, self._width), + "Timestamp: %s" % self.timestamp, + "Frame: %s" % self._frame_number, + "Field of View: %s" % self.field_of_view, + "Channel: %s" % self.channel, + "Z-Level: %s" % self.z_level, + ]) + + @property + def data(self): + """ + The actual image data. + + :rtype np.array() + + """ + if self._data is None: + # The data is just a 1-dimensional array originally. + # We convert it to a 2D image here. + self._data = np.reshape(self._raw_data, (self._height, self._width)) + return self._data + + @property + def field_of_view(self): + """ + Which of the fixed locations this image was taken at. + + :rtype int: + + """ + return self._field_of_view + + @property + def timestamp(self): + """ + The number of seconds after the beginning of the acquisition that the image was taken. Note that for a given + field of view and z-level offset, if you have images of multiple channels, they will all be given the same + timestamp. No, this doesn't make much sense. But that's how ND2s are structured, so if your experiment depends + on millisecond accuracy, you need to find an alternative imaging system. + + :rtype float: + + """ + return self._timestamp / 1000.0 + + @property + def frame_number(self): + return self._frame_number + + @property + def channel(self): + """ + The name of the filter used to acquire this image. These are user-supplied in NIS Elements. + + :rtype str: + + """ + return self._channel + + @property + def z_level(self): + """ + The vertical offset of the image. These are simple integers starting from 0, where the 0 is the lowest + z-level and each subsequent level incremented by 1. + + For example, if you acquired images at -3 µm, 0 µm, and +3 µm, your z-levels would be: + + -3 µm: 0 + 0 µm: 1 + +3 µm: 2 + + :rtype int: + + """ + return self._z_level diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 08d41ee..e69de29 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -1,386 +0,0 @@ -# -*- coding: utf-8 -*- - -import array -from datetime import datetime -import numpy as np -import re -import struct -import six - - -class Nd2Parser(object): - """ - Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data. - You should not ever need to instantiate this class manually unless you're a developer. - - """ - CHUNK_HEADER = 0xabeceda - CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!") - CHUNK_MAP_END = six.b("ND2 CHUNK MAP SIGNATURE 0000001!") - - def __init__(self, filename): - self._absolute_start = None - self._filename = filename - self._fh = None - self._channels = None - self._channel_count = None - self._chunk_map_start_location = None - self._cursor_position = 0 - self._dimension_text = None - self._fields_of_view = None - self._label_map = {} - self.metadata = {} - self._read_map() - self._time_indexes = None - self._parse_metadata() - self._z_levels = None - - @property - def absolute_start(self): - """ - The date and time when acquisition began. - - :rtype: datetime.datetime() - - """ - if self._absolute_start is None: - for line in self.metadata[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values(): - line = line.decode("utf8") - absolute_start_12 = None - absolute_start_24 = None - # ND2s seem to randomly switch between 12- and 24-hour representations. - try: - absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S") - except (TypeError, ValueError): - pass - try: - absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p") - except (TypeError, ValueError): - pass - if not absolute_start_12 and not absolute_start_24: - continue - return absolute_start_12 if absolute_start_12 else absolute_start_24 - raise ValueError("This ND2 has no recorded start time. This is probably a bug.") - return self._absolute_start - - @property - def channels(self): - """ - These are labels created by the NIS Elements user. Typically they may a short description of the filter cube - used (e.g. "bright field", "GFP", etc.) - - :rtype: list - - """ - if not self._channels: - self._channels = [] - metadata = self.metadata[six.b('ImageMetadataSeq')][six.b('SLxPictureMetadata')][six.b('sPicturePlanes')] - try: - validity = self.metadata[six.b('ImageMetadata')][six.b('SLxExperiment')][six.b('ppNextLevelEx')][six.b('')][0][six.b('ppNextLevelEx')][six.b('')][0][six.b('pItemValid')] - except KeyError: - # If none of the channels have been deleted, there is no validity list, so we just make one - validity = [True for _ in metadata] - # Channel information is contained in dictionaries with the keys a0, a1...an where the number - # indicates the order in which the channel is stored. So by sorting the dicts alphabetically - # we get the correct order. - for (label, chan), valid in zip(sorted(metadata[six.b('sPlaneNew')].items()), validity): - if not valid: - continue - self._channels.append(chan[six.b('sDescription')].decode("utf8")) - return self._channels - - @property - def fields_of_view(self): - """ - The metadata contains information about fields of view, but it contains it even if some fields - of view were cropped. We can't find anything that states which fields of view are actually - in the image data, so we have to calculate it. There probably is something somewhere, since - NIS Elements can figure it out, but we haven't found it yet. - - :rtype: list - - """ - if self._fields_of_view is None: - self._fields_of_view = self._parse_dimension_text(r""".*?XY\((\d+)\).*?""") - return self._fields_of_view - - @property - def time_indexes(self): - """ - The number of cycles. - - :rtype: list - - """ - if self._time_indexes is None: - self._time_indexes = self._parse_dimension_text(r""".*?T'\((\d+)\).*?""") - return self._time_indexes - - @property - def z_levels(self): - """ - The different levels in the Z-plane. Just a sequence from 0 to n. - - :rtype: list - - """ - if self._z_levels is None: - self._z_levels = self._parse_dimension_text(r""".*?Z\((\d+)\).*?""") - return self._z_levels - - def _calculate_field_of_view(self, frame_number): - images_per_cycle = len(self.z_levels) * len(self.channels) - return int((frame_number - (frame_number % images_per_cycle)) / images_per_cycle) % len(self.fields_of_view) - - def _calculate_channel(self, frame_number): - return self.channels[frame_number % len(self.channels)] - - def _calculate_z_level(self, frame_number): - return self.z_levels[int(((frame_number - (frame_number % len(self.channels))) / len(self.channels)) % len(self.z_levels))] - - @property - def _file_handle(self): - if self._fh is None: - self._fh = open(self._filename, "rb") - return self._fh - - def _get_raw_image_data(self, image_group_number, channel_offset): - """ - Reads the raw bytes and the timestamp of an image. - - :param image_group_number: groups are made of images with the same time index, field of view and z-level. - :type image_group_number: int - :param channel_offset: the offset in the array where the bytes for this image are found. - :type channel_offset: int - - :return: (int, array.array()) or None - - """ - chunk = self._label_map[six.b("ImageDataSeq|%d!" % image_group_number)] - data = self._read_chunk(chunk) - # All images in the same image group share the same timestamp! So if you have complicated image data, - # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few - # seconds unless you're doing something super weird. - timestamp = struct.unpack("d", data[:8])[0] - image_group_data = array.array("H", data) - image_data_start = 4 + channel_offset - # The images for the various channels are interleaved within the same array. For example, the second image - # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design - # a data structure that way, please send the author of this library a message. - image_data = image_group_data[image_data_start::len(self.channels)] - # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you - # don't have the same number of images each cycle. We discovered this because we only took GFP images every - # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take - # them every cycle. - if np.any(image_data): - return timestamp, image_data - return None - - @property - def _dimensions(self): - """ - While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable. - Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text - is always there and in the same exact format, so we just parse that instead. - - :rtype: str - - """ - if self._dimension_text is None: - for line in self.metadata[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values(): - if six.b("Dimensions:") in line: - metadata = line - break - else: - raise ValueError("Could not parse metadata dimensions!") - for line in metadata.split(six.b("\r\n")): - if line.startswith(six.b("Dimensions:")): - self._dimension_text = line - break - else: - raise ValueError("Could not parse metadata dimensions!") - return self._dimension_text - - def _calculate_image_group_number(self, time_index, fov, z_level): - """ - Images are grouped together if they share the same time index, field of view, and z-level. - - :type time_index: int - :type fov: int - :type z_level: int - - :rtype: int - - """ - return time_index * len(self.fields_of_view) * len(self.z_levels) + (fov * len(self.z_levels) + z_level) - - def _calculate_frame_number(self, image_group_number, fov, z_level): - return (image_group_number - (fov * len(self.z_levels) + z_level)) / (len(self.fields_of_view) * len(self.z_levels)) - - @property - def _channel_offset(self): - """ - Image data is interleaved for each image set. That is, if there are four images in a set, the first image - will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. - - :rtype: dict - - """ - channel_offset = {} - for n, channel in enumerate(self._channels): - channel_offset[channel] = n - return channel_offset - - def _parse_dimension_text(self, pattern): - try: - count = int(re.match(pattern, self._dimensions).group(1)) - except AttributeError: - return [0] - except TypeError: - match = re.match(pattern, self._dimensions.decode("utf8")) - if not match: - return [0] - return list(range(int(match.group(1)))) - else: - return list(range(count)) - - @property - def _total_images_per_channel(self): - """ - The total number of images per channel. Warning: this may be inaccurate as it includes "gap" images. - - :rtype: int - - """ - return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiSequenceCount')] - - def _parse_metadata(self): - """ - Reads all metadata. - - """ - for label in self._label_map.keys(): - if label.endswith(six.b("LV!")) or six.b("LV|") in label: - data = self._read_chunk(self._label_map[label]) - stop = label.index(six.b("LV")) - self.metadata[label[:stop]] = self._read_metadata(data, 1) - - def _read_map(self): - """ - Every label ends with an exclamation point, however, we can't directly search for those to find all the labels - as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label, - grab the subsequent data (always 16 bytes long), advance to the next label and repeat. - - """ - self._file_handle.seek(-8, 2) - chunk_map_start_location = struct.unpack("Q", self._file_handle.read(8))[0] - self._file_handle.seek(chunk_map_start_location) - raw_text = self._file_handle.read(-1) - label_start = raw_text.index(Nd2Parser.CHUNK_MAP_START) + 32 - - while True: - data_start = raw_text.index(six.b("!"), label_start) + 1 - key = raw_text[label_start: data_start] - location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16]) - if key == Nd2Parser.CHUNK_MAP_END: - # We've reached the end of the chunk map - break - self._label_map[key] = location - label_start = data_start + 16 - - def _read_chunk(self, chunk_location): - """ - Gets the data for a given chunk pointer - - """ - self._file_handle.seek(chunk_location) - # The chunk metadata is always 16 bytes long - chunk_metadata = self._file_handle.read(16) - header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) - if header != Nd2Parser.CHUNK_HEADER: - raise ValueError("The ND2 file seems to be corrupted.") - # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the - # start of the actual data field, which is at some arbitrary place after the metadata. - self._file_handle.seek(chunk_location + 16 + relative_offset) - return self._file_handle.read(data_length) - - def _parse_unsigned_char(self, data): - return struct.unpack("B", data.read(1))[0] - - def _parse_unsigned_int(self, data): - return struct.unpack("I", data.read(4))[0] - - def _parse_unsigned_long(self, data): - return struct.unpack("Q", data.read(8))[0] - - def _parse_double(self, data): - return struct.unpack("d", data.read(8))[0] - - def _parse_string(self, data): - value = data.read(2) - while not value.endswith(six.b("\x00\x00")): - # the string ends at the first instance of \x00\x00 - value += data.read(2) - return value.decode("utf16")[:-1].encode("utf8") - - def _parse_char_array(self, data): - array_length = struct.unpack("Q", data.read(8))[0] - return array.array("B", data.read(array_length)) - - def _parse_metadata_item(self, data): - """ - Reads hierarchical data, analogous to a Python dict. - - """ - new_count, length = struct.unpack(" Date: Sun, 20 Sep 2015 18:45:03 -0500 Subject: [PATCH 2/8] #66 interface refactoring is complete except for comments --- nd2reader/driver/v3.py | 19 +++++++-- nd2reader/interface.py | 84 ++++++++++++++++++++-------------------- nd2reader/model/image.py | 29 +++----------- 3 files changed, 64 insertions(+), 68 deletions(-) diff --git a/nd2reader/driver/v3.py b/nd2reader/driver/v3.py index 0dca2aa..c5a095b 100644 --- a/nd2reader/driver/v3.py +++ b/nd2reader/driver/v3.py @@ -6,6 +6,7 @@ import numpy as np import re import struct import six +from nd2reader.model.image import Image class Nd2Parser(object): @@ -29,12 +30,22 @@ class Nd2Parser(object): self._dimension_text = None self._fields_of_view = None self._label_map = {} - self.metadata = {} + self._metadata = {} self._read_map() self._time_indexes = None self._parse_metadata() self._z_levels = None + def get_image(self, index): + channel_offset = index % len(self._metadata.channels) + fov = self._calculate_field_of_view(index) + channel = self._calculate_channel(index) + z_level = self._calculate_z_level(index) + image_group_number = int(index / len(self._metadata.channels)) + frame_number = self._calculate_frame_number(image_group_number, fov, z_level) + timestamp, image = self._get_raw_image_data(image_group_number, channel_offset, self._metadata.height, self._metadata.width) + image.add_params(timestamp, frame_number, fov, channel, z_level) + @property def absolute_start(self): """ @@ -144,7 +155,7 @@ class Nd2Parser(object): self._fh = open(self._filename, "rb") return self._fh - def _get_raw_image_data(self, image_group_number, channel_offset): + def _get_raw_image_data(self, image_group_number, channel_offset, height, width): """ Reads the raw bytes and the timestamp of an image. @@ -167,13 +178,13 @@ class Nd2Parser(object): # The images for the various channels are interleaved within the same array. For example, the second image # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design # a data structure that way, please send the author of this library a message. - image_data = image_group_data[image_data_start::len(self.channels)] + image_data = np.reshape(image_group_data[image_data_start::len(self.channels)], (height, width)) # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you # don't have the same number of images each cycle. We discovered this because we only took GFP images every # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take # them every cycle. if np.any(image_data): - return timestamp, image_data + return timestamp, Image(image_data) return None @property diff --git a/nd2reader/interface.py b/nd2reader/interface.py index 0ff6c90..b02c4c0 100644 --- a/nd2reader/interface.py +++ b/nd2reader/interface.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -from nd2reader.model import Image, ImageGroup +from nd2reader.model import ImageGroup from nd2reader.driver import get_driver from nd2reader.driver.version import get_version -import six +import warnings class Nd2(object): @@ -12,13 +12,14 @@ class Nd2(object): """ def __init__(self, filename): + self._filename = filename version = get_version(filename) self._driver = get_driver(filename, version) self._metadata = self._driver.get_metadata() def __repr__(self): - return "\n".join(["" % self._driver._filename, - "Created: %s" % self._driver.absolute_start, + return "\n".join(["" % self._filename, + "Created: %s" % self.date, "Image size: %sx%s (HxW)" % (self.height, self.width), "Frames: %s" % len(self.frames), "Channels: %s" % ", ".join(["'%s'" % str(channel) for channel in self.channels]), @@ -43,32 +44,18 @@ class Nd2(object): >>> nd2 = Nd2("my_images.nd2") >>> image = nd2[16] # gets 17th frame >>> for image in nd2[100:200]: # iterate over the 100th to 200th images - >>> do_something(image.data) + >>> do_something(image) >>> for image in nd2[::-1]: # iterate backwards - >>> do_something(image.data) + >>> do_something(image) >>> for image in nd2[37:422:17]: # do something super weird if you really want to - >>> do_something(image.data) + >>> do_something(image) :type item: int or slice :rtype: nd2reader.model.Image() or generator """ if isinstance(item, int): - try: - channel_offset = item % len(self.channels) - fov = self._calculate_field_of_view(item) - channel = self._calculate_channel(item) - z_level = self._calculate_z_level(item) - image_group_number = int(item / len(self.channels)) - frame_number = self._calculate_frame_number(image_group_number, fov, z_level) - timestamp, raw_image_data = self._get_raw_image_data(image_group_number, channel_offset) - image = Image(timestamp, frame_number, raw_image_data, fov, channel, z_level, self.height, self.width) - except (TypeError, ValueError): - return None - except KeyError: - raise IndexError("Invalid frame number.") - else: - return image + return self._driver.get_image(item) elif isinstance(item, slice): return self._slice(item.start, item.stop, item.step) raise IndexError @@ -80,7 +67,7 @@ class Nd2(object): :type start: int :type stop: int :type step: int - :rtype: nd2reader.model.Image() or None + :rtype: nd2reader.model.Image() """ start = start if start is not None else 0 @@ -101,15 +88,37 @@ class Nd2(object): :return: model.ImageSet() """ - for time_index in self.time_indexes: - image_set = ImageGroup() + warnings.warn("nd2.image_sets will be removed from the nd2reader library in the near future.", DeprecationWarning) + + for frame in self.frames: + image_group = ImageGroup() for fov in self.fields_of_view: for channel_name in self.channels: for z_level in self.z_levels: - image = self.get_image(time_index, fov, channel_name, z_level) + image = self.get_image(frame, fov, channel_name, z_level) if image is not None: - image_set.add(image) - yield image_set + image_group.add(image) + yield image_group + + @property + def date(self): + return self._metadata.date + + @property + def z_levels(self): + return self._metadata.z_levels + + @property + def fields_of_view(self): + return self._metadata.fields_of_view + + @property + def channels(self): + return self._metadata.channels + + @property + def frames(self): + return self._metadata.frames @property def height(self): @@ -118,7 +127,7 @@ class Nd2(object): :rtype: int """ - return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiHeight')] + return self._metadata.height @property def width(self): @@ -127,12 +136,11 @@ class Nd2(object): :rtype: int """ - return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiWidth')] + return self._metadata.width def get_image(self, frame_number, field_of_view, channel_name, z_level): """ - Returns an Image if data exists for the given parameters, otherwise returns None. In general, you should avoid - using this method unless you're very familiar with the structure of ND2 files. + Returns an Image if data exists for the given parameters, otherwise returns None. :type frame_number: int :param field_of_view: the label for the place in the XY-plane where this image was taken. @@ -141,14 +149,8 @@ class Nd2(object): :type channel_name: str :param z_level: the label for the location in the Z-plane where this image was taken. :type z_level: int - :rtype: nd2reader.model.Image() or None + + :rtype: nd2reader.model.Image() """ - image_group_number = self._calculate_image_group_number(frame_number, field_of_view, z_level) - try: - timestamp, raw_image_data = self._get_raw_image_data(image_group_number, self._channel_offset[channel_name]) - image = Image(timestamp, frame_number, raw_image_data, field_of_view, channel_name, z_level, self.height, self.width) - except TypeError: - return None - else: - return image + return self._driver.get_image_by_attributes(frame_number, field_of_view, channel_name, z_level) diff --git a/nd2reader/model/image.py b/nd2reader/model/image.py index e81c8d1..31ad0c6 100644 --- a/nd2reader/model/image.py +++ b/nd2reader/model/image.py @@ -3,8 +3,11 @@ import numpy as np -class Image(object): - def __init__(self, timestamp, frame_number, raw_array, field_of_view, channel, z_level, height, width): +class Image(np.ndarray): + def __new__(cls, array): + return np.asarray(array).view(cls) + + def add_params(self, timestamp, frame_number, field_of_view, channel, z_level): """ A wrapper around the raw pixel data of an image. @@ -12,8 +15,6 @@ class Image(object): :type timestamp: int :param timestamp: The number of milliseconds after the beginning of the acquisition that this image was taken. :type timestamp: int - :param raw_array: The raw sequence of bytes that represents the image. - :type raw_array: array.array() :param field_of_view: The label for the place in the XY-plane where this image was taken. :type field_of_view: int :param channel: The name of the color of this image @@ -28,17 +29,13 @@ class Image(object): """ self._timestamp = timestamp self._frame_number = int(frame_number) - self._raw_data = raw_array self._field_of_view = field_of_view self._channel = channel self._z_level = z_level - self._height = height - self._width = width - self._data = None def __repr__(self): return "\n".join(["", - "%sx%s (HxW)" % (self._height, self._width), + "%sx%s (HxW)" % (self.height, self.width), "Timestamp: %s" % self.timestamp, "Frame: %s" % self._frame_number, "Field of View: %s" % self.field_of_view, @@ -46,20 +43,6 @@ class Image(object): "Z-Level: %s" % self.z_level, ]) - @property - def data(self): - """ - The actual image data. - - :rtype np.array() - - """ - if self._data is None: - # The data is just a 1-dimensional array originally. - # We convert it to a 2D image here. - self._data = np.reshape(self._raw_data, (self._height, self._width)) - return self._data - @property def field_of_view(self): """ From 177530d65785d39dcb4df8ddb16a61597c92e775 Mon Sep 17 00:00:00 2001 From: jim Date: Thu, 24 Sep 2015 18:45:49 -0500 Subject: [PATCH 3/8] #66 refactored init --- nd2reader/interface.py | 11 ++++++----- nd2reader/{driver => }/version.py | 0 2 files changed, 6 insertions(+), 5 deletions(-) rename nd2reader/{driver => }/version.py (100%) diff --git a/nd2reader/interface.py b/nd2reader/interface.py index b02c4c0..fa48a92 100644 --- a/nd2reader/interface.py +++ b/nd2reader/interface.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- from nd2reader.model import ImageGroup -from nd2reader.driver import get_driver -from nd2reader.driver.version import get_version +from nd2reader.parser import get_parser +from nd2reader.version import get_version import warnings @@ -12,10 +12,11 @@ class Nd2(object): """ def __init__(self, filename): - self._filename = filename version = get_version(filename) - self._driver = get_driver(filename, version) - self._metadata = self._driver.get_metadata() + parser = get_parser(filename, version) + self._driver = parser.driver + self._metadata = parser.metadata + self._filename = filename def __repr__(self): return "\n".join(["" % self._filename, diff --git a/nd2reader/driver/version.py b/nd2reader/version.py similarity index 100% rename from nd2reader/driver/version.py rename to nd2reader/version.py From 544fd974ebbd885b3eb3b583f6e5af366db322a4 Mon Sep 17 00:00:00 2001 From: jim Date: Sat, 26 Sep 2015 17:49:57 -0500 Subject: [PATCH 4/8] #66 more refactoring --- nd2reader/driver/driver.py | 11 - nd2reader/driver/v3.py | 397 --------------------- nd2reader/{driver => imreader}/__init__.py | 0 nd2reader/{driver => imreader}/v2.py | 0 nd2reader/imreader/v3.py | 96 +++++ nd2reader/interface.py | 6 +- nd2reader/model/metadata.py | 62 ++++ nd2reader/parser/__init__.py | 1 + nd2reader/parser/parser.py | 2 + nd2reader/{parser.py => parser/v2.py} | 0 nd2reader/parser/v3.py | 295 +++++++++++++++ 11 files changed, 459 insertions(+), 411 deletions(-) delete mode 100644 nd2reader/driver/driver.py delete mode 100644 nd2reader/driver/v3.py rename nd2reader/{driver => imreader}/__init__.py (100%) rename nd2reader/{driver => imreader}/v2.py (100%) create mode 100644 nd2reader/imreader/v3.py create mode 100644 nd2reader/model/metadata.py create mode 100644 nd2reader/parser/__init__.py create mode 100644 nd2reader/parser/parser.py rename nd2reader/{parser.py => parser/v2.py} (100%) create mode 100644 nd2reader/parser/v3.py diff --git a/nd2reader/driver/driver.py b/nd2reader/driver/driver.py deleted file mode 100644 index 8358870..0000000 --- a/nd2reader/driver/driver.py +++ /dev/null @@ -1,11 +0,0 @@ -def get_driver(filename, version): - """ - Instantiates the correct driver for the ND2, which allows us to parse metadata and access images. - - :param filename: the path to the ND2 - :type filename: str - :param version: the version of the ND2. Note that this is different than the version of NIS Elements used to create the ND2. - :type version: tuple - - """ - return 1 diff --git a/nd2reader/driver/v3.py b/nd2reader/driver/v3.py deleted file mode 100644 index c5a095b..0000000 --- a/nd2reader/driver/v3.py +++ /dev/null @@ -1,397 +0,0 @@ -# -*- coding: utf-8 -*- - -import array -from datetime import datetime -import numpy as np -import re -import struct -import six -from nd2reader.model.image import Image - - -class Nd2Parser(object): - """ - Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data. - You should not ever need to instantiate this class manually unless you're a developer. - - """ - CHUNK_HEADER = 0xabeceda - CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!") - CHUNK_MAP_END = six.b("ND2 CHUNK MAP SIGNATURE 0000001!") - - def __init__(self, filename): - self._absolute_start = None - self._filename = filename - self._fh = None - self._channels = None - self._channel_count = None - self._chunk_map_start_location = None - self._cursor_position = 0 - self._dimension_text = None - self._fields_of_view = None - self._label_map = {} - self._metadata = {} - self._read_map() - self._time_indexes = None - self._parse_metadata() - self._z_levels = None - - def get_image(self, index): - channel_offset = index % len(self._metadata.channels) - fov = self._calculate_field_of_view(index) - channel = self._calculate_channel(index) - z_level = self._calculate_z_level(index) - image_group_number = int(index / len(self._metadata.channels)) - frame_number = self._calculate_frame_number(image_group_number, fov, z_level) - timestamp, image = self._get_raw_image_data(image_group_number, channel_offset, self._metadata.height, self._metadata.width) - image.add_params(timestamp, frame_number, fov, channel, z_level) - - @property - def absolute_start(self): - """ - The date and time when acquisition began. - - :rtype: datetime.datetime() - - """ - if self._absolute_start is None: - for line in self.metadata[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values(): - line = line.decode("utf8") - absolute_start_12 = None - absolute_start_24 = None - # ND2s seem to randomly switch between 12- and 24-hour representations. - try: - absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S") - except (TypeError, ValueError): - pass - try: - absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p") - except (TypeError, ValueError): - pass - if not absolute_start_12 and not absolute_start_24: - continue - return absolute_start_12 if absolute_start_12 else absolute_start_24 - raise ValueError("This ND2 has no recorded start time. This is probably a bug.") - return self._absolute_start - - @property - def channels(self): - """ - These are labels created by the NIS Elements user. Typically they may a short description of the filter cube - used (e.g. "bright field", "GFP", etc.) - - :rtype: list - - """ - if not self._channels: - self._channels = [] - metadata = self.metadata[six.b('ImageMetadataSeq')][six.b('SLxPictureMetadata')][six.b('sPicturePlanes')] - try: - validity = self.metadata[six.b('ImageMetadata')][six.b('SLxExperiment')][six.b('ppNextLevelEx')][six.b('')][0][six.b('ppNextLevelEx')][six.b('')][0][six.b('pItemValid')] - except KeyError: - # If none of the channels have been deleted, there is no validity list, so we just make one - validity = [True for _ in metadata] - # Channel information is contained in dictionaries with the keys a0, a1...an where the number - # indicates the order in which the channel is stored. So by sorting the dicts alphabetically - # we get the correct order. - for (label, chan), valid in zip(sorted(metadata[six.b('sPlaneNew')].items()), validity): - if not valid: - continue - self._channels.append(chan[six.b('sDescription')].decode("utf8")) - return self._channels - - @property - def fields_of_view(self): - """ - The metadata contains information about fields of view, but it contains it even if some fields - of view were cropped. We can't find anything that states which fields of view are actually - in the image data, so we have to calculate it. There probably is something somewhere, since - NIS Elements can figure it out, but we haven't found it yet. - - :rtype: list - - """ - if self._fields_of_view is None: - self._fields_of_view = self._parse_dimension_text(r""".*?XY\((\d+)\).*?""") - return self._fields_of_view - - @property - def frames(self): - """ - The number of cycles. - - :rtype: list - - """ - if self._time_indexes is None: - self._time_indexes = self._parse_dimension_text(r""".*?T'\((\d+)\).*?""") - return self._time_indexes - - @property - def z_levels(self): - """ - The different levels in the Z-plane. Just a sequence from 0 to n. - - :rtype: list - - """ - if self._z_levels is None: - self._z_levels = self._parse_dimension_text(r""".*?Z\((\d+)\).*?""") - return self._z_levels - - def _calculate_field_of_view(self, frame_number): - images_per_cycle = len(self.z_levels) * len(self.channels) - return int((frame_number - (frame_number % images_per_cycle)) / images_per_cycle) % len(self.fields_of_view) - - def _calculate_channel(self, frame_number): - return self.channels[frame_number % len(self.channels)] - - def _calculate_z_level(self, frame_number): - return self.z_levels[int(((frame_number - (frame_number % len(self.channels))) / len(self.channels)) % len(self.z_levels))] - - @property - def _file_handle(self): - if self._fh is None: - self._fh = open(self._filename, "rb") - return self._fh - - def _get_raw_image_data(self, image_group_number, channel_offset, height, width): - """ - Reads the raw bytes and the timestamp of an image. - - :param image_group_number: groups are made of images with the same time index, field of view and z-level. - :type image_group_number: int - :param channel_offset: the offset in the array where the bytes for this image are found. - :type channel_offset: int - - :return: (int, array.array()) or None - - """ - chunk = self._label_map[six.b("ImageDataSeq|%d!" % image_group_number)] - data = self._read_chunk(chunk) - # All images in the same image group share the same timestamp! So if you have complicated image data, - # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few - # seconds unless you're doing something super weird. - timestamp = struct.unpack("d", data[:8])[0] - image_group_data = array.array("H", data) - image_data_start = 4 + channel_offset - # The images for the various channels are interleaved within the same array. For example, the second image - # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design - # a data structure that way, please send the author of this library a message. - image_data = np.reshape(image_group_data[image_data_start::len(self.channels)], (height, width)) - # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you - # don't have the same number of images each cycle. We discovered this because we only took GFP images every - # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take - # them every cycle. - if np.any(image_data): - return timestamp, Image(image_data) - return None - - @property - def _dimensions(self): - """ - While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable. - Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text - is always there and in the same exact format, so we just parse that instead. - - :rtype: str - - """ - if self._dimension_text is None: - for line in self.metadata[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values(): - if six.b("Dimensions:") in line: - metadata = line - break - else: - raise ValueError("Could not parse metadata dimensions!") - for line in metadata.split(six.b("\r\n")): - if line.startswith(six.b("Dimensions:")): - self._dimension_text = line - break - else: - raise ValueError("Could not parse metadata dimensions!") - return self._dimension_text - - def _calculate_image_group_number(self, time_index, fov, z_level): - """ - Images are grouped together if they share the same time index, field of view, and z-level. - - :type time_index: int - :type fov: int - :type z_level: int - - :rtype: int - - """ - return time_index * len(self.fields_of_view) * len(self.z_levels) + (fov * len(self.z_levels) + z_level) - - def _calculate_frame_number(self, image_group_number, fov, z_level): - return (image_group_number - (fov * len(self.z_levels) + z_level)) / (len(self.fields_of_view) * len(self.z_levels)) - - @property - def _channel_offset(self): - """ - Image data is interleaved for each image set. That is, if there are four images in a set, the first image - will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. - - :rtype: dict - - """ - channel_offset = {} - for n, channel in enumerate(self._channels): - channel_offset[channel] = n - return channel_offset - - def _parse_dimension_text(self, pattern): - try: - count = int(re.match(pattern, self._dimensions).group(1)) - except AttributeError: - return [0] - except TypeError: - match = re.match(pattern, self._dimensions.decode("utf8")) - if not match: - return [0] - return list(range(int(match.group(1)))) - else: - return list(range(count)) - - @property - def _total_images_per_channel(self): - """ - The total number of images per channel. Warning: this may be inaccurate as it includes "gap" images. - - :rtype: int - - """ - return self.metadata[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiSequenceCount')] - - def _parse_metadata(self): - """ - Reads all metadata. - - """ - for label in self._label_map.keys(): - if label.endswith(six.b("LV!")) or six.b("LV|") in label: - data = self._read_chunk(self._label_map[label]) - stop = label.index(six.b("LV")) - self.metadata[label[:stop]] = self._read_metadata(data, 1) - - def _read_map(self): - """ - Every label ends with an exclamation point, however, we can't directly search for those to find all the labels - as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label, - grab the subsequent data (always 16 bytes long), advance to the next label and repeat. - - """ - self._file_handle.seek(-8, 2) - chunk_map_start_location = struct.unpack("Q", self._file_handle.read(8))[0] - self._file_handle.seek(chunk_map_start_location) - raw_text = self._file_handle.read(-1) - label_start = raw_text.index(Nd2Parser.CHUNK_MAP_START) + 32 - - while True: - data_start = raw_text.index(six.b("!"), label_start) + 1 - key = raw_text[label_start: data_start] - location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16]) - if key == Nd2Parser.CHUNK_MAP_END: - # We've reached the end of the chunk map - break - self._label_map[key] = location - label_start = data_start + 16 - - def _read_chunk(self, chunk_location): - """ - Gets the data for a given chunk pointer - - """ - self._file_handle.seek(chunk_location) - # The chunk metadata is always 16 bytes long - chunk_metadata = self._file_handle.read(16) - header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) - if header != Nd2Parser.CHUNK_HEADER: - raise ValueError("The ND2 file seems to be corrupted.") - # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the - # start of the actual data field, which is at some arbitrary place after the metadata. - self._file_handle.seek(chunk_location + 16 + relative_offset) - return self._file_handle.read(data_length) - - def _parse_unsigned_char(self, data): - return struct.unpack("B", data.read(1))[0] - - def _parse_unsigned_int(self, data): - return struct.unpack("I", data.read(4))[0] - - def _parse_unsigned_long(self, data): - return struct.unpack("Q", data.read(8))[0] - - def _parse_double(self, data): - return struct.unpack("d", data.read(8))[0] - - def _parse_string(self, data): - value = data.read(2) - while not value.endswith(six.b("\x00\x00")): - # the string ends at the first instance of \x00\x00 - value += data.read(2) - return value.decode("utf16")[:-1].encode("utf8") - - def _parse_char_array(self, data): - array_length = struct.unpack("Q", data.read(8))[0] - return array.array("B", data.read(array_length)) - - def _parse_metadata_item(self, data): - """ - Reads hierarchical data, analogous to a Python dict. - - """ - new_count, length = struct.unpack(" Date: Sun, 27 Sep 2015 19:50:16 -0500 Subject: [PATCH 5/8] #66 even more refactoring --- nd2reader/exc.py | 2 ++ nd2reader/imreader/v3.py | 6 ++---- nd2reader/model/image.py | 14 +++++++++----- nd2reader/parser/parser.py | 10 +++++++++- nd2reader/version.py | 5 +---- 5 files changed, 23 insertions(+), 14 deletions(-) create mode 100644 nd2reader/exc.py diff --git a/nd2reader/exc.py b/nd2reader/exc.py new file mode 100644 index 0000000..eda1f2d --- /dev/null +++ b/nd2reader/exc.py @@ -0,0 +1,2 @@ +class InvalidVersionError(Exception): + pass diff --git a/nd2reader/imreader/v3.py b/nd2reader/imreader/v3.py index 28f448a..9285e1b 100644 --- a/nd2reader/imreader/v3.py +++ b/nd2reader/imreader/v3.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- import array -from datetime import datetime import numpy as np -import re import struct import six from nd2reader.model.image import Image @@ -59,7 +57,7 @@ class V3ImageReader(object): """ channel_offset = {} - for n, channel in enumerate(self._channels): + for n, channel in enumerate(self._metadata.channels): channel_offset[channel] = n return channel_offset @@ -86,7 +84,7 @@ class V3ImageReader(object): # The images for the various channels are interleaved within the same array. For example, the second image # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design # a data structure that way, please send the author of this library a message. - image_data = np.reshape(image_group_data[image_data_start::len(self.channels)], (height, width)) + image_data = np.reshape(image_group_data[image_data_start::len(self._metadata.channels)], (height, width)) # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you # don't have the same number of images each cycle. We discovered this because we only took GFP images every # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take diff --git a/nd2reader/model/image.py b/nd2reader/model/image.py index 31ad0c6..5637870 100644 --- a/nd2reader/model/image.py +++ b/nd2reader/model/image.py @@ -21,10 +21,6 @@ class Image(np.ndarray): :type channel: str :param z_level: The label for the location in the Z-plane where this image was taken. :type z_level: int - :param height: The height of the image in pixels. - :type height: int - :param width: The width of the image in pixels. - :type width: int """ self._timestamp = timestamp @@ -37,12 +33,20 @@ class Image(np.ndarray): return "\n".join(["", "%sx%s (HxW)" % (self.height, self.width), "Timestamp: %s" % self.timestamp, - "Frame: %s" % self._frame_number, + "Frame: %s" % self.frame_number, "Field of View: %s" % self.field_of_view, "Channel: %s" % self.channel, "Z-Level: %s" % self.z_level, ]) + @property + def height(self): + return self.shape[1] + + @property + def width(self): + return self.shape[0] + @property def field_of_view(self): """ diff --git a/nd2reader/parser/parser.py b/nd2reader/parser/parser.py index cc09374..d3e7488 100644 --- a/nd2reader/parser/parser.py +++ b/nd2reader/parser/parser.py @@ -1,2 +1,10 @@ +from nd2reader.parser.v3 import V3Parser +from nd2reader.exc import InvalidVersionError + + def get_parser(filename, major_version, minor_version): - parsers = {} + parsers = {(3, None): V3Parser} + parser = parsers.get((major_version, minor_version)) or parsers.get((major_version, None)) + if not parser: + raise InvalidVersionError("No parser is available for that version.") + return parser(filename) diff --git a/nd2reader/version.py b/nd2reader/version.py index d4a02fc..9434d2f 100644 --- a/nd2reader/version.py +++ b/nd2reader/version.py @@ -1,8 +1,5 @@ import re - - -class InvalidVersionError(Exception): - pass +from nd2reader.exc import InvalidVersionError def get_version(filename): From 12512a5a1a3fa7a77efeb6afb1f84d3f380d3dac Mon Sep 17 00:00:00 2001 From: jim Date: Sun, 4 Oct 2015 15:48:45 -0500 Subject: [PATCH 6/8] #66 lol --- nd2reader/driver/__init__.py | 0 nd2reader/parser/parser.py | 11 +++++ nd2reader/parser/v3.py | 93 ++++++++++++++++++++---------------- 3 files changed, 62 insertions(+), 42 deletions(-) create mode 100644 nd2reader/driver/__init__.py diff --git a/nd2reader/driver/__init__.py b/nd2reader/driver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nd2reader/parser/parser.py b/nd2reader/parser/parser.py index d3e7488..e43ddfb 100644 --- a/nd2reader/parser/parser.py +++ b/nd2reader/parser/parser.py @@ -1,5 +1,6 @@ from nd2reader.parser.v3 import V3Parser from nd2reader.exc import InvalidVersionError +from abc import abstractproperty def get_parser(filename, major_version, minor_version): @@ -8,3 +9,13 @@ def get_parser(filename, major_version, minor_version): if not parser: raise InvalidVersionError("No parser is available for that version.") return parser(filename) + + +class BaseParser(object): + @abstractproperty + def metadata(self): + raise NotImplementedError + + @abstractproperty + def driver(self): + raise NotImplementedError diff --git a/nd2reader/parser/v3.py b/nd2reader/parser/v3.py index e17dbb0..59e1cc2 100644 --- a/nd2reader/parser/v3.py +++ b/nd2reader/parser/v3.py @@ -3,12 +3,13 @@ import array from datetime import datetime from nd2reader.model.metadata import Metadata +from nd2reader.parser.parser import BaseParser import re import six import struct -class V3Parser(object): +class V3Parser(BaseParser): """ Parses ND2 files and creates a Metadata and ImageReader object. """ CHUNK_HEADER = 0xabeceda CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!") @@ -19,6 +20,36 @@ class V3Parser(object): self._fh = None self._metadata = None + @property + def metadata(self): + if not self._metadata: + self._parse_metadata() + return self.metadata + + @property + def driver(self): + raise NotImplementedError + + def _parse_metadata(self): + """ + Reads all metadata. + + """ + metadata_dict = {} + label_map = self._build_label_map() + for label in label_map.keys(): + if label.endswith(six.b("LV!")) or six.b("LV|") in label: + data = self._read_chunk(label_map[label]) + stop = label.index(six.b("LV")) + metadata_dict[label[:stop]] = self._read_metadata(data, 1) + + channels = self._parse_channels(metadata_dict) + date = self._parse_fields_of_view(metadata_dict) + fields_of_view = self._parse_fields_of_view(metadata_dict) + frames = self._parse_frames(metadata_dict) + z_levels = self._parse_z_levels(metadata_dict) + self._metadata = Metadata(channels, date, fields_of_view, frames, z_levels) + def _parse_date(self, metadata_dict): """ The date and time when acquisition began. @@ -98,8 +129,7 @@ class V3Parser(object): """ return self._parse_dimension(r""".*?Z\((\d+)\).*?""", metadata_dict) - @property - def _file_handle(self): + def _get_file_handle(self): if self._fh is None: self._fh = open(self._filename, "rb") return self._fh @@ -128,18 +158,14 @@ class V3Parser(object): return dimension_text def _parse_dimension(self, pattern, metadata_dict): - try: - dimension_text = self._parse_dimension_text(metadata_dict) - count = int(re.match(pattern, dimension_text).group(1)) - except AttributeError: + dimension_text = self._parse_dimension_text(metadata_dict) + if six.PY3: + dimension_text = dimension_text.decode("utf8") + match = re.match(pattern, dimension_text) + if not match: return [0] - except TypeError: - match = re.match(pattern, dimension_text.decode("utf8")) - if not match: - return [0] - return list(range(int(match.group(1)))) - else: - return list(range(count)) + count = int(match.group(1)) + return list(range(count)) def _parse_total_images_per_channel(self, metadata_dict): """ @@ -150,26 +176,6 @@ class V3Parser(object): """ return metadata_dict[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiSequenceCount')] - def _parse_metadata(self): - """ - Reads all metadata. - - """ - metadata_dict = {} - label_map = self._build_label_map() - for label in label_map.keys(): - if label.endswith(six.b("LV!")) or six.b("LV|") in label: - data = self._read_chunk(label_map[label]) - stop = label.index(six.b("LV")) - metadata_dict[label[:stop]] = self._read_metadata(data, 1) - - channels = self._parse_channels(metadata_dict) - date = self._parse_fields_of_view(metadata_dict) - fields_of_view = self._parse_fields_of_view(metadata_dict) - frames = self._parse_frames(metadata_dict) - z_levels = self._parse_z_levels(metadata_dict) - self._metadata = Metadata(channels, date, fields_of_view, frames, z_levels) - def _build_label_map(self): """ Every label ends with an exclamation point, however, we can't directly search for those to find all the labels @@ -180,10 +186,10 @@ class V3Parser(object): """ label_map = {} - self._file_handle.seek(-8, 2) - chunk_map_start_location = struct.unpack("Q", self._file_handle.read(8))[0] - self._file_handle.seek(chunk_map_start_location) - raw_text = self._file_handle.read(-1) + self._get_file_handle().seek(-8, 2) + chunk_map_start_location = struct.unpack("Q", self._get_file_handle().read(8))[0] + self._get_file_handle().seek(chunk_map_start_location) + raw_text = self._get_file_handle().read(-1) label_start = raw_text.index(V3Parser.CHUNK_MAP_START) + 32 while True: @@ -202,16 +208,19 @@ class V3Parser(object): Gets the data for a given chunk pointer """ - self._file_handle.seek(chunk_location) + self._get_file_handle().seek(chunk_location) # The chunk metadata is always 16 bytes long - chunk_metadata = self._file_handle.read(16) + chunk_metadata = self._get_file_handle().read(16) header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) if header != V3Parser.CHUNK_HEADER: raise ValueError("The ND2 file seems to be corrupted.") # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the # start of the actual data field, which is at some arbitrary place after the metadata. - self._file_handle.seek(chunk_location + 16 + relative_offset) - return self._file_handle.read(data_length) + self._get_file_handle().seek(chunk_location + 16 + relative_offset) + val = self._get_file_handle().read(data_length) + print("**************************") + print(type(val)) + return val def _parse_unsigned_char(self, data): return struct.unpack("B", data.read(1))[0] From 4ec54183d1132b934c4e6d98579a03000d932519 Mon Sep 17 00:00:00 2001 From: jim Date: Sun, 4 Oct 2015 16:03:35 -0500 Subject: [PATCH 7/8] #66 fixed import problems --- nd2reader/parser/base.py | 11 +++++++++++ nd2reader/parser/parser.py | 11 ----------- nd2reader/parser/v3.py | 11 +++++------ 3 files changed, 16 insertions(+), 17 deletions(-) create mode 100644 nd2reader/parser/base.py diff --git a/nd2reader/parser/base.py b/nd2reader/parser/base.py new file mode 100644 index 0000000..20b40d1 --- /dev/null +++ b/nd2reader/parser/base.py @@ -0,0 +1,11 @@ +from abc import abstractproperty + + +class BaseParser(object): + @abstractproperty + def metadata(self): + raise NotImplementedError + + @abstractproperty + def driver(self): + raise NotImplementedError diff --git a/nd2reader/parser/parser.py b/nd2reader/parser/parser.py index e43ddfb..d3e7488 100644 --- a/nd2reader/parser/parser.py +++ b/nd2reader/parser/parser.py @@ -1,6 +1,5 @@ from nd2reader.parser.v3 import V3Parser from nd2reader.exc import InvalidVersionError -from abc import abstractproperty def get_parser(filename, major_version, minor_version): @@ -9,13 +8,3 @@ def get_parser(filename, major_version, minor_version): if not parser: raise InvalidVersionError("No parser is available for that version.") return parser(filename) - - -class BaseParser(object): - @abstractproperty - def metadata(self): - raise NotImplementedError - - @abstractproperty - def driver(self): - raise NotImplementedError diff --git a/nd2reader/parser/v3.py b/nd2reader/parser/v3.py index 59e1cc2..c1f9a0f 100644 --- a/nd2reader/parser/v3.py +++ b/nd2reader/parser/v3.py @@ -3,7 +3,7 @@ import array from datetime import datetime from nd2reader.model.metadata import Metadata -from nd2reader.parser.parser import BaseParser +from nd2reader.parser.base import BaseParser import re import six import struct @@ -24,7 +24,7 @@ class V3Parser(BaseParser): def metadata(self): if not self._metadata: self._parse_metadata() - return self.metadata + return self._metadata @property def driver(self): @@ -207,6 +207,8 @@ class V3Parser(BaseParser): """ Gets the data for a given chunk pointer + :rtype: bytes + """ self._get_file_handle().seek(chunk_location) # The chunk metadata is always 16 bytes long @@ -217,10 +219,7 @@ class V3Parser(BaseParser): # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the # start of the actual data field, which is at some arbitrary place after the metadata. self._get_file_handle().seek(chunk_location + 16 + relative_offset) - val = self._get_file_handle().read(data_length) - print("**************************") - print(type(val)) - return val + return self._get_file_handle().read(data_length) def _parse_unsigned_char(self, data): return struct.unpack("B", data.read(1))[0] From 89d78030f1ef273f1456fb0a3cbb04c8184b12a5 Mon Sep 17 00:00:00 2001 From: jim Date: Fri, 9 Oct 2015 17:45:33 -0500 Subject: [PATCH 8/8] #66 resolves v3 driver refactor --- nd2reader/{imreader => driver}/v2.py | 0 nd2reader/{imreader => driver}/v3.py | 29 +++++++++++++++++++++++++--- nd2reader/imreader/__init__.py | 1 - nd2reader/model/image.py | 17 +++++++--------- nd2reader/model/metadata.py | 12 +++++++++++- nd2reader/parser/v3.py | 16 +++++++++------ 6 files changed, 54 insertions(+), 21 deletions(-) rename nd2reader/{imreader => driver}/v2.py (100%) rename nd2reader/{imreader => driver}/v3.py (81%) delete mode 100644 nd2reader/imreader/__init__.py diff --git a/nd2reader/imreader/v2.py b/nd2reader/driver/v2.py similarity index 100% rename from nd2reader/imreader/v2.py rename to nd2reader/driver/v2.py diff --git a/nd2reader/imreader/v3.py b/nd2reader/driver/v3.py similarity index 81% rename from nd2reader/imreader/v3.py rename to nd2reader/driver/v3.py index 9285e1b..ea00527 100644 --- a/nd2reader/imreader/v3.py +++ b/nd2reader/driver/v3.py @@ -7,10 +7,14 @@ import six from nd2reader.model.image import Image -class V3ImageReader(object): - def __init__(self, metadata): - self._metadata = metadata +class V3Driver(object): + CHUNK_HEADER = 0xabeceda + def __init__(self, metadata, label_map, file_handle): + self._metadata = metadata + self._label_map = label_map + self._file_handle = file_handle + def _calculate_field_of_view(self, frame_number): images_per_cycle = len(self._metadata.z_levels) * len(self._metadata.channels) return int((frame_number - (frame_number % images_per_cycle)) / images_per_cycle) % len(self._metadata.fields_of_view) @@ -46,6 +50,7 @@ class V3ImageReader(object): frame_number = self._calculate_frame_number(image_group_number, fov, z_level) timestamp, image = self._get_raw_image_data(image_group_number, channel_offset, self._metadata.height, self._metadata.width) image.add_params(timestamp, frame_number, fov, channel, z_level) + return image @property def _channel_offset(self): @@ -92,3 +97,21 @@ class V3ImageReader(object): if np.any(image_data): return timestamp, Image(image_data) return None + + def _read_chunk(self, chunk_location): + """ + Gets the data for a given chunk pointer + + :rtype: bytes + + """ + self._file_handle.seek(chunk_location) + # The chunk metadata is always 16 bytes long + chunk_metadata = self._file_handle.read(16) + header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) + if header != V3Driver.CHUNK_HEADER: + raise ValueError("The ND2 file seems to be corrupted.") + # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the + # start of the actual data field, which is at some arbitrary place after the metadata. + self._file_handle.seek(chunk_location + 16 + relative_offset) + return self._file_handle.read(data_length) diff --git a/nd2reader/imreader/__init__.py b/nd2reader/imreader/__init__.py deleted file mode 100644 index 16b2735..0000000 --- a/nd2reader/imreader/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from nd2reader.driver.driver import get_driver diff --git a/nd2reader/model/image.py b/nd2reader/model/image.py index 5637870..f51d586 100644 --- a/nd2reader/model/image.py +++ b/nd2reader/model/image.py @@ -7,6 +7,13 @@ class Image(np.ndarray): def __new__(cls, array): return np.asarray(array).view(cls) + def __init__(self, array): + self._timestamp = None + self._frame_number = None + self._field_of_view = None + self._channel = None + self._z_level = None + def add_params(self, timestamp, frame_number, field_of_view, channel, z_level): """ A wrapper around the raw pixel data of an image. @@ -29,16 +36,6 @@ class Image(np.ndarray): self._channel = channel self._z_level = z_level - def __repr__(self): - return "\n".join(["", - "%sx%s (HxW)" % (self.height, self.width), - "Timestamp: %s" % self.timestamp, - "Frame: %s" % self.frame_number, - "Field of View: %s" % self.field_of_view, - "Channel: %s" % self.channel, - "Z-Level: %s" % self.z_level, - ]) - @property def height(self): return self.shape[1] diff --git a/nd2reader/model/metadata.py b/nd2reader/model/metadata.py index cf7d140..838dcd7 100644 --- a/nd2reader/model/metadata.py +++ b/nd2reader/model/metadata.py @@ -1,12 +1,22 @@ class Metadata(object): """ A simple container for ND2 metadata. """ - def __init__(self, channels, date, fields_of_view, frames, z_levels): + def __init__(self, height, width, channels, date, fields_of_view, frames, z_levels): + self._height = height + self._width = width self._channels = channels self._date = date self._fields_of_view = fields_of_view self._frames = frames self._z_levels = z_levels + @property + def height(self): + return self._height + + @property + def width(self): + return self._width + @property def date(self): """ diff --git a/nd2reader/parser/v3.py b/nd2reader/parser/v3.py index c1f9a0f..fa1cf89 100644 --- a/nd2reader/parser/v3.py +++ b/nd2reader/parser/v3.py @@ -4,6 +4,7 @@ import array from datetime import datetime from nd2reader.model.metadata import Metadata from nd2reader.parser.base import BaseParser +from nd2reader.driver.v3 import V3Driver import re import six import struct @@ -19,6 +20,7 @@ class V3Parser(BaseParser): self._filename = filename self._fh = None self._metadata = None + self._label_map = None @property def metadata(self): @@ -28,7 +30,7 @@ class V3Parser(BaseParser): @property def driver(self): - raise NotImplementedError + return V3Driver(self.metadata, self._label_map, self._get_file_handle()) def _parse_metadata(self): """ @@ -36,19 +38,21 @@ class V3Parser(BaseParser): """ metadata_dict = {} - label_map = self._build_label_map() - for label in label_map.keys(): + self._label_map = self._build_label_map() + for label in self._label_map.keys(): if label.endswith(six.b("LV!")) or six.b("LV|") in label: - data = self._read_chunk(label_map[label]) + data = self._read_chunk(self._label_map[label]) stop = label.index(six.b("LV")) metadata_dict[label[:stop]] = self._read_metadata(data, 1) + height = metadata_dict[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiHeight')] + width = metadata_dict[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiWidth')] channels = self._parse_channels(metadata_dict) - date = self._parse_fields_of_view(metadata_dict) + date = self._parse_date(metadata_dict) fields_of_view = self._parse_fields_of_view(metadata_dict) frames = self._parse_frames(metadata_dict) z_levels = self._parse_z_levels(metadata_dict) - self._metadata = Metadata(channels, date, fields_of_view, frames, z_levels) + self._metadata = Metadata(height, width, channels, date, fields_of_view, frames, z_levels) def _parse_date(self, metadata_dict): """