diff --git a/nd2reader/__init__.py b/nd2reader/__init__.py index 06e8709..26a6f1b 100644 --- a/nd2reader/__init__.py +++ b/nd2reader/__init__.py @@ -1,84 +1,33 @@ # -*- coding: utf-8 -*- -import array -from datetime import datetime -import logging from nd2reader.model import Image, ImageSet from nd2reader.parser import Nd2Parser -import re -import struct - -log = logging.getLogger(__name__) -log.addHandler(logging.StreamHandler()) -log.setLevel(logging.WARNING) class Nd2(Nd2Parser): + """ + Allows easy access to NIS Elements .nd2 image files. + + """ def __init__(self, filename): super(Nd2, self).__init__(filename) self._filename = filename def __repr__(self): - return "\n".join(["ND2: %s" % self._filename, - "Created: %s" % self.absolute_start.strftime("%Y-%m-%d %H:%M:%S"), + return "\n".join(["" % self._filename, + "Created: %s" % self._absolute_start.strftime("%Y-%m-%d %H:%M:%S"), "Image size: %sx%s (HxW)" % (self.height, self.width), - "Image cycles: %s" % self.time_index_count, - "Channels: %s" % ", ".join(["'%s'" % channel for channel in self.channels]), - "Fields of View: %s" % self.field_of_view_count, - "Z-Levels: %s" % self.z_level_count + "Image cycles: %s" % self._time_index_count, + "Channels: %s" % ", ".join(["'%s'" % channel for channel in self._channels]), + "Fields of View: %s" % self._field_of_view_count, + "Z-Levels: %s" % self._z_level_count ]) - def __iter__(self): - for i in range(self._image_count): - for fov in range(self.field_of_view_count): - for z_level in range(self.z_level_count): - for channel_name in self.channels: - image = self.get_image(i, fov, channel_name, z_level) - if image is not None: - yield image - - @property - def image_sets(self): - for time_index in xrange(self.time_index_count): - image_set = ImageSet() - for fov in range(self.field_of_view_count): - for channel_name in self.channels: - for z_level in xrange(self.z_level_count): - image = self.get_image(time_index, fov, channel_name, z_level) - if image is not None: - image_set.add(image) - yield image_set - - def get_image(self, time_index, fov, channel_name, z_level): - image_set_number = self._calculate_image_set_number(time_index, fov, z_level) - try: - timestamp, raw_image_data = self._get_raw_image_data(image_set_number, self._channel_offset[channel_name]) - image = Image(timestamp, raw_image_data, fov, channel_name, z_level, self.height, self.width) - except TypeError: - return None - else: - return image - - @property - def channels(self): - metadata = self.metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes'] - try: - validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid'] - except KeyError: - # If none of the channels have been deleted, there is no validity list, so we just make one - validity = [True for _ in metadata] - # Channel information is contained in dictionaries with the keys a0, a1...an where the number - # indicates the order in which the channel is stored. So by sorting the dicts alphabetically - # we get the correct order. - for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity): - if not valid: - continue - yield chan['sDescription'] - @property def height(self): """ - :return: height of each image, in pixels + :return: height of each image, in pixels + :rtype: int """ return self.metadata['ImageAttributes']['SLxImageAttributes']['uiHeight'] @@ -86,106 +35,70 @@ class Nd2(Nd2Parser): @property def width(self): """ - :return: width of each image, in pixels + :return: width of each image, in pixels + :rtype: int """ return self.metadata['ImageAttributes']['SLxImageAttributes']['uiWidth'] - @property - def absolute_start(self): - for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values(): - absolute_start_12 = None - absolute_start_24 = None - # ND2s seem to randomly switch between 12- and 24-hour representations. - try: - absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S") - except ValueError: - pass - try: - absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p") - except ValueError: - pass - if not absolute_start_12 and not absolute_start_24: - continue - return absolute_start_12 if absolute_start_12 else absolute_start_24 - raise ValueError("This ND2 has no recorded start time. This is probably a bug.") - - @property - def channel_count(self): - pattern = r""".*?λ\((\d+)\).*?""" - try: - count = int(re.match(pattern, self._dimensions).group(1)) - except AttributeError: - return 1 - else: - return count - - @property - def field_of_view_count(self): + def __iter__(self): """ - The metadata contains information about fields of view, but it contains it even if some fields - of view were cropped. We can't find anything that states which fields of view are actually - in the image data, so we have to calculate it. There probably is something somewhere, since - NIS Elements can figure it out, but we haven't found it yet. + Iterates over every image, in the order they were taken. + + :return: model.Image() """ - pattern = r""".*?XY\((\d+)\).*?""" - try: - count = int(re.match(pattern, self._dimensions).group(1)) - except AttributeError: - return 1 - else: - return count + for i in range(self._image_count): + for fov in range(self._field_of_view_count): + for z_level in range(self._z_level_count): + for channel_name in self._channels: + image = self.get_image(i, fov, channel_name, z_level) + if image is not None: + yield image @property - def time_index_count(self): + def image_sets(self): """ - The number of cycles. + Iterates over groups of related images. This is useful if your ND2 contains multiple fields of view. + A typical use case might be that you have, say, four areas of interest that you're monitoring, and every + minute you take a bright field and GFP image of each one. For each cycle, this method would produce four + ImageSet objects, each containing one bright field and one GFP image. - :rtype: int + :return: model.ImageSet() """ - pattern = r""".*?T'\((\d+)\).*?""" - try: - count = int(re.match(pattern, self._dimensions).group(1)) - except AttributeError: - return 1 - else: - return count - - @property - def z_level_count(self): - pattern = r""".*?Z\((\d+)\).*?""" - try: - count = int(re.match(pattern, self._dimensions).group(1)) - except AttributeError: - return 1 - else: - return count + for time_index in xrange(self._time_index_count): + image_set = ImageSet() + for fov in range(self._field_of_view_count): + for channel_name in self._channels: + for z_level in xrange(self._z_level_count): + image = self.get_image(time_index, fov, channel_name, z_level) + if image is not None: + image_set.add(image) + yield image_set - @property - def _channel_offset(self): + def get_image(self, time_index, field_of_view, channel_name, z_level): """ - Image data is interleaved for each image set. That is, if there are four images in a set, the first image - will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. Why this would be the - case is beyond me, but that's how it works. + Returns an Image if data exists for the given parameters, otherwise returns None. In general, you should avoid + using this method unless you're very familiar with the structure of ND2 files. If you have a use case that + cannot be met by the `__iter__` or `image_sets` methods above, please create an issue on Github. + + :param time_index: the frame number + :type time_index: int + :param field_of_view: the label for the place in the XY-plane where this image was taken. + :type field_of_view: int + :param channel_name: the name of the color of this image + :type channel_name: str + :param z_level: the label for the location in the Z-plane where this image was taken. + :type z_level: int + :rtype: nd2reader.model.Image() or None """ - channel_offset = {} - for n, channel in enumerate(self.channels): - channel_offset[channel] = n - return channel_offset - - def _get_raw_image_data(self, image_set_number, channel_offset): - chunk = self._label_map["ImageDataSeq|%d!" % image_set_number] - data = self._read_chunk(chunk) - timestamp = struct.unpack("d", data[:8])[0] - # The images for the various channels are interleaved within each other. - image_data = array.array("H", data) - image_data_start = 4 + channel_offset - if any(image_data): - return timestamp, image_data[image_data_start::self.channel_count] - return None - - def _calculate_image_set_number(self, time_index, fov, z_level): - return time_index * self.field_of_view_count * self.z_level_count + (fov * self.z_level_count + z_level) \ No newline at end of file + image_set_number = self._calculate_image_group_number(time_index, field_of_view, z_level) + try: + timestamp, raw_image_data = self._get_raw_image_data(image_set_number, self._channel_offset[channel_name]) + image = Image(timestamp, raw_image_data, field_of_view, channel_name, z_level, self.height, self.width) + except TypeError: + return None + else: + return image \ No newline at end of file diff --git a/nd2reader/model/__init__.py b/nd2reader/model/__init__.py index 796b96e..bd547a1 100644 --- a/nd2reader/model/__init__.py +++ b/nd2reader/model/__init__.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import collections import numpy as np import logging @@ -7,6 +9,25 @@ log = logging.getLogger(__name__) class Image(object): def __init__(self, timestamp, raw_array, field_of_view, channel, z_level, height, width): + """ + A wrapper around the raw pixel data of an image. + + :param timestamp: The number of milliseconds after the beginning of the acquisition that this image was taken. + :type timestamp: int + :param raw_array: The raw sequence of bytes that represents the image. + :type raw_array: array.array() + :param field_of_view: The label for the place in the XY-plane where this image was taken. + :type field_of_view: int + :param channel: The name of the color of this image + :type channel: str + :param z_level: The label for the location in the Z-plane where this image was taken. + :type z_level: int + :param height: The height of the image in pixels. + :type height: int + :param width: The width of the image in pixels. + :type width: int + + """ self._timestamp = timestamp self._raw_data = raw_array self._field_of_view = field_of_view @@ -16,8 +37,37 @@ class Image(object): self._width = width self._data = None + def __repr__(self): + return "\n".join(["", + "%sx%s (HxW)" % (self._height, self._width), + "Timestamp: %s" % self.timestamp, + "Field of View: %s" % self.field_of_view, + "Channel: %s" % self.channel, + "Z-Level: %s" % self.z_level, + ]) + + @property + def data(self): + """ + The actual image data. + + :rtype np.array() + + """ + if self._data is None: + # The data is just a 1-dimensional array originally. + # We convert it to a 2D image here. + self._data = np.reshape(self._raw_data, (self._height, self._width)) + return self._data + @property def field_of_view(self): + """ + Which of the fixed locations this image was taken at. + + :rtype int: + + """ return self._field_of_view @property @@ -28,52 +78,69 @@ class Image(object): timestamp. No, this doesn't make much sense. But that's how ND2s are structured, so if your experiment depends on millisecond accuracy, you need to find an alternative imaging system. + :rtype float: + """ return self._timestamp / 1000.0 @property def channel(self): + """ + The name of the filter used to acquire this image. These are user-supplied in NIS Elements. + + :rtype str: + + """ return self._channel @property def z_level(self): - return self._z_level + """ + The vertical offset of the image. These are simple integers starting from 0, where the 0 is the lowest + z-level and each subsequent level incremented by 1. - @property - def data(self): - if self._data is None: - # The data is just a flat, 1-dimensional array. We convert it to a 2D image here. - self._data = np.reshape(self._raw_data, (self._height, self._width)) - return self._data + For example, if you acquired images at -3 µm, 0 µm, and +3 µm, your z-levels would be: + + -3 µm: 0 + 0 µm: 1 + +3 µm: 2 + + :rtype int: + + """ + return self._z_level class ImageSet(object): """ - A group of images that share the same timestamp. NIS Elements doesn't store a unique timestamp for every - image, rather, it stores one for each set of images that share the same field of view and z-axis level. + A group of images that were taken at roughly the same time. """ def __init__(self): self._images = collections.defaultdict(dict) - def get(self, channel="", z_level=0): + def __len__(self): + """ The number of images in the image set. """ + return sum([len(channel) for channel in self._images.values()]) + + def __repr__(self): + return "\n".join(["", + "Image count: %s" % len(self)]) + + def get(self, channel, z_level=0): """ Retrieve an image with a given channel and z-level. For most users, z_level will always be 0. - """ - try: - image = self._images[channel][z_level] - except KeyError: - return None - else: - return image + :type channel: str + :type z_level: int - def __len__(self): - """ The number of images in the image set. """ - return sum([len(channel) for channel in self._images.values()]) + """ + return self._images.get(channel).get(z_level) def add(self, image): """ + Stores an image. + :type image: nd2reader.model.Image() """ diff --git a/nd2reader/parser.py b/nd2reader/parser.py index ee49d53..650e751 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -2,6 +2,9 @@ import array from collections import namedtuple +from datetime import datetime +import numpy as np +import re import struct from StringIO import StringIO @@ -11,6 +14,7 @@ field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset']) class Nd2Parser(object): """ Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data. + You should not ever need to instantiate this class manually unless you're a developer. """ CHUNK_HEADER = 0xabeceda @@ -18,7 +22,6 @@ class Nd2Parser(object): CHUNK_MAP_END = "ND2 CHUNK MAP SIGNATURE 0000001!" def __init__(self, filename): - self._absolute_start = None self._filename = filename self._fh = None self._chunk_map_start_location = None @@ -35,8 +38,48 @@ class Nd2Parser(object): self._fh = open(self._filename, "rb") return self._fh + def _get_raw_image_data(self, image_group_number, channel_offset): + """ + Reads the raw bytes and the timestamp of an image. + + :param image_group_number: groups are made of images with the same time index, field of view and z-level. + :type image_group_number: int + :param channel_offset: the offset in the array where the bytes for this image are found. + :type channel_offset: int + + :return: (int, array.array()) or None + + """ + chunk = self._label_map["ImageDataSeq|%d!" % image_group_number] + data = self._read_chunk(chunk) + # All images in the same image group share the same timestamp! So if you have complicated image data, + # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few + # seconds unless you're doing something super weird. + timestamp = struct.unpack("d", data[:8])[0] + image_group_data = array.array("H", data) + image_data_start = 4 + channel_offset + # The images for the various channels are interleaved within the same array. For example, the second image + # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design + # a data structure that way, please send the author of this library a message. + image_data = image_group_data[image_data_start::self._channel_count] + # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you + # don't have the same number of images each cycle. We discovered this because we only took GFP images every + # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take + # them every cycle. + if np.any(image_data): + return timestamp, image_data + return None + @property def _dimensions(self): + """ + While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable. + Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text + is always there and in the same exact format, so we just parse that instead. + + :rtype: str + + """ if self._dimension_text is None: for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values(): if "Dimensions:" in line: @@ -53,14 +96,162 @@ class Nd2Parser(object): return self._dimension_text @property - def _image_count(self): - return self.metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount'] + def _channels(self): + """ + These are labels created by the NIS Elements user. Typically they may a short description of the filter cube + used (e.g. "bright field", "GFP", etc.) + + :rtype: str + + """ + metadata = self.metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes'] + try: + validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid'] + except KeyError: + # If none of the channels have been deleted, there is no validity list, so we just make one + validity = [True for _ in metadata] + # Channel information is contained in dictionaries with the keys a0, a1...an where the number + # indicates the order in which the channel is stored. So by sorting the dicts alphabetically + # we get the correct order. + for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity): + if not valid: + continue + yield chan['sDescription'] + + def _calculate_image_group_number(self, time_index, fov, z_level): + """ + Images are grouped together if they share the same time index, field of view, and z-level. + + :type time_index: int + :type fov: int + :type z_level: int + + :rtype: int + + """ + return time_index * self._field_of_view_count * self._z_level_count + (fov * self._z_level_count + z_level) + + @property + def _channel_offset(self): + """ + Image data is interleaved for each image set. That is, if there are four images in a set, the first image + will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. + + :rtype: dict + + """ + channel_offset = {} + for n, channel in enumerate(self._channels): + channel_offset[channel] = n + return channel_offset + + @property + def _absolute_start(self): + """ + The date and time when acquisition began. + + :rtype: datetime.datetime() + + """ + for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values(): + absolute_start_12 = None + absolute_start_24 = None + # ND2s seem to randomly switch between 12- and 24-hour representations. + try: + absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S") + except ValueError: + pass + try: + absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p") + except ValueError: + pass + if not absolute_start_12 and not absolute_start_24: + continue + return absolute_start_12 if absolute_start_12 else absolute_start_24 + raise ValueError("This ND2 has no recorded start time. This is probably a bug.") @property - def _sequence_count(self): - return self.metadata['ImageEvents']['RLxExperimentRecord']['uiCount'] + def _channel_count(self): + """ + The number of different channels used, including bright field. + + :rtype: int + + """ + pattern = r""".*?λ\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def _field_of_view_count(self): + """ + The metadata contains information about fields of view, but it contains it even if some fields + of view were cropped. We can't find anything that states which fields of view are actually + in the image data, so we have to calculate it. There probably is something somewhere, since + NIS Elements can figure it out, but we haven't found it yet. + + :rtype: int + + """ + pattern = r""".*?XY\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def _time_index_count(self): + """ + The number of cycles. + + :rtype: int + + """ + pattern = r""".*?T'\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def _z_level_count(self): + """ + The number of different levels in the Z-plane. + + :rtype: int + + """ + pattern = r""".*?Z\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def _image_count(self): + """ + The total number of images in the ND2. Warning: this may be inaccurate as it includes "gap" images. + + :rtype: int + + """ + return self.metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount'] def _parse_metadata(self): + """ + Reads all metadata. + + """ for label in self._label_map.keys(): if label.endswith("LV!") or "LV|" in label: data = self._read_chunk(self._label_map[label]) @@ -106,10 +297,6 @@ class Nd2Parser(object): self._file_handle.seek(chunk_location + 16 + relative_offset) return self._file_handle.read(data_length) - def _z_level_count(self): - st = self._read_chunk(self._label_map["CustomData|Z!"]) - return len(array.array("d", st)) - def _parse_unsigned_char(self, data): return struct.unpack("B", data.read(1))[0] @@ -134,6 +321,10 @@ class Nd2Parser(object): return array.array("B", data.read(array_length)) def _parse_metadata_item(self, data): + """ + Reads hierarchical data, analogous to a Python dict. + + """ new_count, length = struct.unpack("