Merge pull request #31 from jimrybarski/2-rename-things

2 rename things
9 years ago · c9abc0017f
--- a/nd2reader/init.py
+++ b/nd2reader/init.py
@ -1,84 +1,33 @@
 # -*- coding: utf-8 -*-

 import array
 from datetime import datetime
 import logging
 from nd2reader.model import Image, ImageSet
 from nd2reader.parser import Nd2Parser
 import re
 import struct

 log = logging.getLogger(__name__)
 log.addHandler(logging.StreamHandler())
 log.setLevel(logging.WARNING)


 class Nd2(Nd2Parser):
    """
    Allows easy access to NIS Elements .nd2 image files.

    """
    def __init__(self, filename):
        super(Nd2, self).__init__(filename)
        self._filename = filename

    def __repr__(self):
        return "\n".join(["ND2: %s" % self._filename,
                          "Created: %s" % self.absolute_start.strftime("%Y-%m-%d %H:%M:%S"),
        return "\n".join(["<ND2 %s>" % self._filename,
                          "Created: %s" % self._absolute_start.strftime("%Y-%m-%d %H:%M:%S"),
                          "Image size: %sx%s (HxW)" % (self.height, self.width),
                          "Image cycles: %s" % self.time_index_count,
                          "Channels: %s" % ", ".join(["'%s'" % channel for channel in self.channels]),
                          "Fields of View: %s" % self.field_of_view_count,
                          "Z-Levels: %s" % self.z_level_count
                          "Image cycles: %s" % self._time_index_count,
                          "Channels: %s" % ", ".join(["'%s'" % channel for channel in self._channels]),
                          "Fields of View: %s" % self._field_of_view_count,
                          "Z-Levels: %s" % self._z_level_count
                          ])

    def __iter__(self):
        for i in range(self._image_count):
            for fov in range(self.field_of_view_count):
                for z_level in range(self.z_level_count):
                    for channel_name in self.channels:
                        image = self.get_image(i, fov, channel_name, z_level)
                        if image is not None:
                            yield image

    @property
    def image_sets(self):
        for time_index in xrange(self.time_index_count):
            image_set = ImageSet()
            for fov in range(self.field_of_view_count):
                for channel_name in self.channels:
                    for z_level in xrange(self.z_level_count):
                        image = self.get_image(time_index, fov, channel_name, z_level)
                        if image is not None:
                            image_set.add(image)
                yield image_set

    def get_image(self, time_index, fov, channel_name, z_level):
        image_set_number = self._calculate_image_set_number(time_index, fov, z_level)
        try:
            timestamp, raw_image_data = self._get_raw_image_data(image_set_number, self._channel_offset[channel_name])
            image = Image(timestamp, raw_image_data, fov, channel_name, z_level, self.height, self.width)
        except TypeError:
            return None
        else:
            return image

    @property
    def channels(self):
        metadata = self.metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes']
        try:
            validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid']
        except KeyError:
            # If none of the channels have been deleted, there is no validity list, so we just make one
            validity = [True for _ in metadata]
        # Channel information is contained in dictionaries with the keys a0, a1...an where the number
        # indicates the order in which the channel is stored. So by sorting the dicts alphabetically
        # we get the correct order.
        for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity):
            if not valid:
                continue
            yield chan['sDescription']

    @property
    def height(self):
        """
        :return:    height of each image, in pixels
        :return: height of each image, in pixels
        :rtype: int

        """
        return self.metadata['ImageAttributes']['SLxImageAttributes']['uiHeight']
@ -86,106 +35,70 @@ class Nd2(Nd2Parser):
    @property
    def width(self):
        """
        :return:    width of each image, in pixels
        :return: width of each image, in pixels
        :rtype: int

        """
        return self.metadata['ImageAttributes']['SLxImageAttributes']['uiWidth']

    @property
    def absolute_start(self):
        for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values():
            absolute_start_12 = None
            absolute_start_24 = None
            # ND2s seem to randomly switch between 12- and 24-hour representations.
            try:
                absolute_start_24 = datetime.strptime(line, "%m/%d/%Y  %H:%M:%S")
            except ValueError:
                pass
            try:
                absolute_start_12 = datetime.strptime(line, "%m/%d/%Y  %I:%M:%S %p")
            except ValueError:
                pass
            if not absolute_start_12 and not absolute_start_24:
                continue
            return absolute_start_12 if absolute_start_12 else absolute_start_24
        raise ValueError("This ND2 has no recorded start time. This is probably a bug.")

    @property
    def channel_count(self):
        pattern = r""".*?λ\((\d+)\).*?"""
        try:
            count = int(re.match(pattern, self._dimensions).group(1))
        except AttributeError:
            return 1
        else:
            return count

    @property
    def field_of_view_count(self):
    def __iter__(self):
        """
        The metadata contains information about fields of view, but it contains it even if some fields
        of view were cropped. We can't find anything that states which fields of view are actually
        in the image data, so we have to calculate it. There probably is something somewhere, since
        NIS Elements can figure it out, but we haven't found it yet.
        Iterates over every image, in the order they were taken.

        :return: model.Image()

        """
        pattern = r""".*?XY\((\d+)\).*?"""
        try:
            count = int(re.match(pattern, self._dimensions).group(1))
        except AttributeError:
            return 1
        else:
            return count
        for i in range(self._image_count):
            for fov in range(self._field_of_view_count):
                for z_level in range(self._z_level_count):
                    for channel_name in self._channels:
                        image = self.get_image(i, fov, channel_name, z_level)
                        if image is not None:
                            yield image

    @property
    def time_index_count(self):
    def image_sets(self):
        """
        The number of cycles.
        Iterates over groups of related images. This is useful if your ND2 contains multiple fields of view.
        A typical use case might be that you have, say, four areas of interest that you're monitoring, and every
        minute you take a bright field and GFP image of each one. For each cycle, this method would produce four
        ImageSet objects, each containing one bright field and one GFP image.

        :rtype:     int
        :return: model.ImageSet()

        """
        pattern = r""".*?T'\((\d+)\).*?"""
        try:
            count = int(re.match(pattern, self._dimensions).group(1))
        except AttributeError:
            return 1
        else:
            return count

    @property
    def z_level_count(self):
        pattern = r""".*?Z\((\d+)\).*?"""
        try:
            count = int(re.match(pattern, self._dimensions).group(1))
        except AttributeError:
            return 1
        else:
            return count
        for time_index in xrange(self._time_index_count):
            image_set = ImageSet()
            for fov in range(self._field_of_view_count):
                for channel_name in self._channels:
                    for z_level in xrange(self._z_level_count):
                        image = self.get_image(time_index, fov, channel_name, z_level)
                        if image is not None:
                            image_set.add(image)
                yield image_set

    @property
    def _channel_offset(self):
    def get_image(self, time_index, field_of_view, channel_name, z_level):
        """
        Image data is interleaved for each image set. That is, if there are four images in a set, the first image
        will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. Why this would be the
        case is beyond me, but that's how it works.
        Returns an Image if data exists for the given parameters, otherwise returns None. In general, you should avoid
        using this method unless you're very familiar with the structure of ND2 files. If you have a use case that
        cannot be met by the `__iter__` or `image_sets` methods above, please create an issue on Github.

        :param time_index: the frame number
        :type time_index: int
        :param field_of_view: the label for the place in the XY-plane where this image was taken.
        :type field_of_view: int
        :param channel_name: the name of the color of this image
        :type channel_name: str
        :param z_level: the label for the location in the Z-plane where this image was taken.
        :type z_level: int
        :rtype: nd2reader.model.Image() or None

        """
        channel_offset = {}
        for n, channel in enumerate(self.channels):
            channel_offset[channel] = n
        return channel_offset

    def _get_raw_image_data(self, image_set_number, channel_offset):
        chunk = self._label_map["ImageDataSeq|%d!" % image_set_number]
        data = self._read_chunk(chunk)
        timestamp = struct.unpack("d", data[:8])[0]
        # The images for the various channels are interleaved within each other.
        image_data = array.array("H", data)
        image_data_start = 4 + channel_offset
        if any(image_data):
            return timestamp, image_data[image_data_start::self.channel_count]
        return None

    def _calculate_image_set_number(self, time_index, fov, z_level):
        return time_index * self.field_of_view_count * self.z_level_count + (fov * self.z_level_count + z_level)
        image_set_number = self._calculate_image_group_number(time_index, field_of_view, z_level)
        try:
            timestamp, raw_image_data = self._get_raw_image_data(image_set_number, self._channel_offset[channel_name])
            image = Image(timestamp, raw_image_data, field_of_view, channel_name, z_level, self.height, self.width)
        except TypeError:
            return None
        else:
            return image
--- a/nd2reader/model/init.py
+++ b/nd2reader/model/init.py
@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-

 import collections
 import numpy as np
 import logging
@ -7,6 +9,25 @@ log = logging.getLogger(__name__)

 class Image(object):
    def __init__(self, timestamp, raw_array, field_of_view, channel, z_level, height, width):
        """
        A wrapper around the raw pixel data of an image.

        :param timestamp: The number of milliseconds after the beginning of the acquisition that this image was taken.
        :type timestamp: int
        :param raw_array: The raw sequence of bytes that represents the image.
        :type raw_array: array.array()
        :param field_of_view: The label for the place in the XY-plane where this image was taken.
        :type field_of_view: int
        :param channel: The name of the color of this image
        :type channel: str
        :param z_level: The label for the location in the Z-plane where this image was taken.
        :type z_level: int
        :param height: The height of the image in pixels.
        :type height: int
        :param width: The width of the image in pixels.
        :type width: int

        """
        self._timestamp = timestamp
        self._raw_data = raw_array
        self._field_of_view = field_of_view
@ -16,8 +37,37 @@ class Image(object):
        self._width = width
        self._data = None

    def __repr__(self):
        return "\n".join(["<ND2 Image>",
                          "%sx%s (HxW)" % (self._height, self._width),
                          "Timestamp: %s" % self.timestamp,
                          "Field of View: %s" % self.field_of_view,
                          "Channel: %s" % self.channel,
                          "Z-Level: %s" % self.z_level,
                          ])

    @property
    def data(self):
        """
        The actual image data.

        :rtype np.array()

        """
        if self._data is None:
            # The data is just a 1-dimensional array originally.
            # We convert it to a 2D image here.
            self._data = np.reshape(self._raw_data, (self._height, self._width))
        return self._data

    @property
    def field_of_view(self):
        """
        Which of the fixed locations this image was taken at.

        :rtype int:

        """
        return self._field_of_view

    @property
@ -28,52 +78,69 @@ class Image(object):
        timestamp. No, this doesn't make much sense. But that's how ND2s are structured, so if your experiment depends
        on millisecond accuracy, you need to find an alternative imaging system.

        :rtype float:

        """
        return self._timestamp / 1000.0

    @property
    def channel(self):
        """
        The name of the filter used to acquire this image. These are user-supplied in NIS Elements.

        :rtype str:

        """
        return self._channel

    @property
    def z_level(self):
        return self._z_level
        """
        The vertical offset of the image. These are simple integers starting from 0, where the 0 is the lowest
        z-level and each subsequent level incremented by 1.

    @property
    def data(self):
        if self._data is None:
            # The data is just a flat, 1-dimensional array. We convert it to a 2D image here.
            self._data = np.reshape(self._raw_data, (self._height, self._width))
        return self._data
        For example, if you acquired images at -3 µm, 0 µm, and +3 µm, your z-levels would be:

        -3 µm: 0
        0 µm: 1
        +3 µm: 2

        :rtype int:

        """
        return self._z_level


 class ImageSet(object):
    """
    A group of images that share the same timestamp. NIS Elements doesn't store a unique timestamp for every
    image, rather, it stores one for each set of images that share the same field of view and z-axis level.
    A group of images that were taken at roughly the same time.

    """
    def __init__(self):
        self._images = collections.defaultdict(dict)

    def get(self, channel="", z_level=0):
    def __len__(self):
        """ The number of images in the image set. """
        return sum([len(channel) for channel in self._images.values()])

    def __repr__(self):
        return "\n".join(["<ND2 Image Set>",
                          "Image count: %s" % len(self)])

    def get(self, channel, z_level=0):
        """
        Retrieve an image with a given channel and z-level. For most users, z_level will always be 0.

        """
        try:
            image = self._images[channel][z_level]
        except KeyError:
            return None
        else:
            return image
        :type channel:  str
        :type z_level:  int

    def __len__(self):
        """ The number of images in the image set. """
        return sum([len(channel) for channel in self._images.values()])
        """
        return self._images.get(channel).get(z_level)

    def add(self, image):
        """
        Stores an image.

        :type image:    nd2reader.model.Image()

        """
--- a/nd2reader/parser.py
+++ b/nd2reader/parser.py
@ -2,6 +2,9 @@

 import array
 from collections import namedtuple
 from datetime import datetime
 import numpy as np
 import re
 import struct
 from StringIO import StringIO

@ -11,6 +14,7 @@ field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset'])
 class Nd2Parser(object):
    """
    Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data.
    You should not ever need to instantiate this class manually unless you're a developer.

    """
    CHUNK_HEADER = 0xabeceda
@ -18,7 +22,6 @@ class Nd2Parser(object):
    CHUNK_MAP_END = "ND2 CHUNK MAP SIGNATURE 0000001!"
    
    def __init__(self, filename):
        self._absolute_start = None
        self._filename = filename
        self._fh = None
        self._chunk_map_start_location = None
@ -35,8 +38,48 @@ class Nd2Parser(object):
            self._fh = open(self._filename, "rb")
        return self._fh

    def _get_raw_image_data(self, image_group_number, channel_offset):
        """
        Reads the raw bytes and the timestamp of an image.

        :param image_group_number: groups are made of images with the same time index, field of view and z-level.
        :type image_group_number: int
        :param channel_offset: the offset in the array where the bytes for this image are found.
        :type channel_offset: int

        :return: (int, array.array()) or None

        """
        chunk = self._label_map["ImageDataSeq|%d!" % image_group_number]
        data = self._read_chunk(chunk)
        # All images in the same image group share the same timestamp! So if you have complicated image data,
        # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few
        # seconds unless you're doing something super weird.
        timestamp = struct.unpack("d", data[:8])[0]
        image_group_data = array.array("H", data)
        image_data_start = 4 + channel_offset
        # The images for the various channels are interleaved within the same array. For example, the second image
        # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design
        # a data structure that way, please send the author of this library a message.
        image_data = image_group_data[image_data_start::self._channel_count]
        # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you
        # don't have the same number of images each cycle. We discovered this because we only took GFP images every
        # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take
        # them every cycle.
        if np.any(image_data):
            return timestamp, image_data
        return None

    @property
    def _dimensions(self):
        """
        While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable.
        Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text
        is always there and in the same exact format, so we just parse that instead.

        :rtype: str

        """
        if self._dimension_text is None:
            for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values():
                if "Dimensions:" in line:
@ -53,14 +96,162 @@ class Nd2Parser(object):
        return self._dimension_text

    @property
    def _image_count(self):
        return self.metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount']
    def _channels(self):
        """
        These are labels created by the NIS Elements user. Typically they may a short description of the filter cube
        used (e.g. "bright field", "GFP", etc.)

        :rtype: str

        """
        metadata = self.metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes']
        try:
            validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid']
        except KeyError:
            # If none of the channels have been deleted, there is no validity list, so we just make one
            validity = [True for _ in metadata]
        # Channel information is contained in dictionaries with the keys a0, a1...an where the number
        # indicates the order in which the channel is stored. So by sorting the dicts alphabetically
        # we get the correct order.
        for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity):
            if not valid:
                continue
            yield chan['sDescription']

    def _calculate_image_group_number(self, time_index, fov, z_level):
        """
        Images are grouped together if they share the same time index, field of view, and z-level.

        :type time_index: int
        :type fov: int
        :type z_level: int

        :rtype: int

        """
        return time_index * self._field_of_view_count * self._z_level_count + (fov * self._z_level_count + z_level)

    @property
    def _channel_offset(self):
        """
        Image data is interleaved for each image set. That is, if there are four images in a set, the first image
        will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth.

        :rtype: dict

        """
        channel_offset = {}
        for n, channel in enumerate(self._channels):
            channel_offset[channel] = n
        return channel_offset

    @property
    def _absolute_start(self):
        """
        The date and time when acquisition began.

        :rtype: datetime.datetime()

        """
        for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values():
            absolute_start_12 = None
            absolute_start_24 = None
            # ND2s seem to randomly switch between 12- and 24-hour representations.
            try:
                absolute_start_24 = datetime.strptime(line, "%m/%d/%Y  %H:%M:%S")
            except ValueError:
                pass
            try:
                absolute_start_12 = datetime.strptime(line, "%m/%d/%Y  %I:%M:%S %p")
            except ValueError:
                pass
            if not absolute_start_12 and not absolute_start_24:
                continue
            return absolute_start_12 if absolute_start_12 else absolute_start_24
        raise ValueError("This ND2 has no recorded start time. This is probably a bug.")

    @property
    def _sequence_count(self):
        return self.metadata['ImageEvents']['RLxExperimentRecord']['uiCount']
    def _channel_count(self):
        """
        The number of different channels used, including bright field.

        :rtype: int

        """
        pattern = r""".*?λ\((\d+)\).*?"""
        try:
            count = int(re.match(pattern, self._dimensions).group(1))
        except AttributeError:
            return 1
        else:
            return count

    @property
    def _field_of_view_count(self):
        """
        The metadata contains information about fields of view, but it contains it even if some fields
        of view were cropped. We can't find anything that states which fields of view are actually
        in the image data, so we have to calculate it. There probably is something somewhere, since
        NIS Elements can figure it out, but we haven't found it yet.

        :rtype: int

        """
        pattern = r""".*?XY\((\d+)\).*?"""
        try:
            count = int(re.match(pattern, self._dimensions).group(1))
        except AttributeError:
            return 1
        else:
            return count

    @property
    def _time_index_count(self):
        """
        The number of cycles.

        :rtype:     int

        """
        pattern = r""".*?T'\((\d+)\).*?"""
        try:
            count = int(re.match(pattern, self._dimensions).group(1))
        except AttributeError:
            return 1
        else:
            return count

    @property
    def _z_level_count(self):
        """
        The number of different levels in the Z-plane.

        :rtype: int

        """
        pattern = r""".*?Z\((\d+)\).*?"""
        try:
            count = int(re.match(pattern, self._dimensions).group(1))
        except AttributeError:
            return 1
        else:
            return count

    @property
    def _image_count(self):
        """
        The total number of images in the ND2. Warning: this may be inaccurate as it includes "gap" images.

        :rtype: int

        """
        return self.metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount']

    def _parse_metadata(self):
        """
        Reads all metadata.

        """
        for label in self._label_map.keys():
            if label.endswith("LV!") or "LV|" in label:
                data = self._read_chunk(self._label_map[label])
@ -106,10 +297,6 @@ class Nd2Parser(object):
        self._file_handle.seek(chunk_location + 16 + relative_offset)
        return self._file_handle.read(data_length)

    def _z_level_count(self):
        st = self._read_chunk(self._label_map["CustomData|Z!"])
        return len(array.array("d", st))

    def _parse_unsigned_char(self, data):
        return struct.unpack("B", data.read(1))[0]

@ -134,6 +321,10 @@ class Nd2Parser(object):
        return array.array("B", data.read(array_length))

    def _parse_metadata_item(self, data):
        """
        Reads hierarchical data, analogous to a Python dict.

        """
        new_count, length = struct.unpack("<IQ", data.read(12))
        length -= data.tell() - self._cursor_position
        next_data_length = data.read(length)
@ -143,6 +334,10 @@ class Nd2Parser(object):
        return value

    def _get_value(self, data, data_type):
        """
        ND2s use various codes to indicate different data types, which we translate here.

        """
        parser = {1: self._parse_unsigned_char,
                  2: self._parse_unsigned_int,
                  3: self._parse_unsigned_int,
@ -154,12 +349,17 @@ class Nd2Parser(object):
        return parser[data_type](data)

    def _read_metadata(self, data, count):
        """
        Iterates over each element some section of the metadata and parses it.

        """
        data = StringIO(data)
        metadata = {}
        for _ in xrange(count):
            self._cursor_position = data.tell()
            header = data.read(2)
            if not header:
                # We've reached the end of some hierarchy of data
                break
            data_type, name_length = map(ord, header)
            name = data.read(name_length * 2).decode("utf16")[:-1].encode("utf8")