Browse Source

Merge pull request #31 from jimrybarski/2-rename-things

2 rename things
master
Jim Rybarski 10 years ago
parent
commit
c9abc0017f
3 changed files with 358 additions and 178 deletions
  1. +62
    -149
      nd2reader/__init__.py
  2. +87
    -20
      nd2reader/model/__init__.py
  3. +209
    -9
      nd2reader/parser.py

+ 62
- 149
nd2reader/__init__.py View File

@ -1,84 +1,33 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import array
from datetime import datetime
import logging
from nd2reader.model import Image, ImageSet from nd2reader.model import Image, ImageSet
from nd2reader.parser import Nd2Parser from nd2reader.parser import Nd2Parser
import re
import struct
log = logging.getLogger(__name__)
log.addHandler(logging.StreamHandler())
log.setLevel(logging.WARNING)
class Nd2(Nd2Parser): class Nd2(Nd2Parser):
"""
Allows easy access to NIS Elements .nd2 image files.
"""
def __init__(self, filename): def __init__(self, filename):
super(Nd2, self).__init__(filename) super(Nd2, self).__init__(filename)
self._filename = filename self._filename = filename
def __repr__(self): def __repr__(self):
return "\n".join(["ND2: %s" % self._filename,
"Created: %s" % self.absolute_start.strftime("%Y-%m-%d %H:%M:%S"),
return "\n".join(["<ND2 %s>" % self._filename,
"Created: %s" % self._absolute_start.strftime("%Y-%m-%d %H:%M:%S"),
"Image size: %sx%s (HxW)" % (self.height, self.width), "Image size: %sx%s (HxW)" % (self.height, self.width),
"Image cycles: %s" % self.time_index_count,
"Channels: %s" % ", ".join(["'%s'" % channel for channel in self.channels]),
"Fields of View: %s" % self.field_of_view_count,
"Z-Levels: %s" % self.z_level_count
"Image cycles: %s" % self._time_index_count,
"Channels: %s" % ", ".join(["'%s'" % channel for channel in self._channels]),
"Fields of View: %s" % self._field_of_view_count,
"Z-Levels: %s" % self._z_level_count
]) ])
def __iter__(self):
for i in range(self._image_count):
for fov in range(self.field_of_view_count):
for z_level in range(self.z_level_count):
for channel_name in self.channels:
image = self.get_image(i, fov, channel_name, z_level)
if image is not None:
yield image
@property
def image_sets(self):
for time_index in xrange(self.time_index_count):
image_set = ImageSet()
for fov in range(self.field_of_view_count):
for channel_name in self.channels:
for z_level in xrange(self.z_level_count):
image = self.get_image(time_index, fov, channel_name, z_level)
if image is not None:
image_set.add(image)
yield image_set
def get_image(self, time_index, fov, channel_name, z_level):
image_set_number = self._calculate_image_set_number(time_index, fov, z_level)
try:
timestamp, raw_image_data = self._get_raw_image_data(image_set_number, self._channel_offset[channel_name])
image = Image(timestamp, raw_image_data, fov, channel_name, z_level, self.height, self.width)
except TypeError:
return None
else:
return image
@property
def channels(self):
metadata = self.metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes']
try:
validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid']
except KeyError:
# If none of the channels have been deleted, there is no validity list, so we just make one
validity = [True for _ in metadata]
# Channel information is contained in dictionaries with the keys a0, a1...an where the number
# indicates the order in which the channel is stored. So by sorting the dicts alphabetically
# we get the correct order.
for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity):
if not valid:
continue
yield chan['sDescription']
@property @property
def height(self): def height(self):
""" """
:return: height of each image, in pixels
:return: height of each image, in pixels
:rtype: int
""" """
return self.metadata['ImageAttributes']['SLxImageAttributes']['uiHeight'] return self.metadata['ImageAttributes']['SLxImageAttributes']['uiHeight']
@ -86,106 +35,70 @@ class Nd2(Nd2Parser):
@property @property
def width(self): def width(self):
""" """
:return: width of each image, in pixels
:return: width of each image, in pixels
:rtype: int
""" """
return self.metadata['ImageAttributes']['SLxImageAttributes']['uiWidth'] return self.metadata['ImageAttributes']['SLxImageAttributes']['uiWidth']
@property
def absolute_start(self):
for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values():
absolute_start_12 = None
absolute_start_24 = None
# ND2s seem to randomly switch between 12- and 24-hour representations.
try:
absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S")
except ValueError:
pass
try:
absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p")
except ValueError:
pass
if not absolute_start_12 and not absolute_start_24:
continue
return absolute_start_12 if absolute_start_12 else absolute_start_24
raise ValueError("This ND2 has no recorded start time. This is probably a bug.")
@property
def channel_count(self):
pattern = r""".*?λ\((\d+)\).*?"""
try:
count = int(re.match(pattern, self._dimensions).group(1))
except AttributeError:
return 1
else:
return count
@property
def field_of_view_count(self):
def __iter__(self):
""" """
The metadata contains information about fields of view, but it contains it even if some fields
of view were cropped. We can't find anything that states which fields of view are actually
in the image data, so we have to calculate it. There probably is something somewhere, since
NIS Elements can figure it out, but we haven't found it yet.
Iterates over every image, in the order they were taken.
:return: model.Image()
""" """
pattern = r""".*?XY\((\d+)\).*?"""
try:
count = int(re.match(pattern, self._dimensions).group(1))
except AttributeError:
return 1
else:
return count
for i in range(self._image_count):
for fov in range(self._field_of_view_count):
for z_level in range(self._z_level_count):
for channel_name in self._channels:
image = self.get_image(i, fov, channel_name, z_level)
if image is not None:
yield image
@property @property
def time_index_count(self):
def image_sets(self):
""" """
The number of cycles.
Iterates over groups of related images. This is useful if your ND2 contains multiple fields of view.
A typical use case might be that you have, say, four areas of interest that you're monitoring, and every
minute you take a bright field and GFP image of each one. For each cycle, this method would produce four
ImageSet objects, each containing one bright field and one GFP image.
:rtype: int
:return: model.ImageSet()
""" """
pattern = r""".*?T'\((\d+)\).*?"""
try:
count = int(re.match(pattern, self._dimensions).group(1))
except AttributeError:
return 1
else:
return count
@property
def z_level_count(self):
pattern = r""".*?Z\((\d+)\).*?"""
try:
count = int(re.match(pattern, self._dimensions).group(1))
except AttributeError:
return 1
else:
return count
for time_index in xrange(self._time_index_count):
image_set = ImageSet()
for fov in range(self._field_of_view_count):
for channel_name in self._channels:
for z_level in xrange(self._z_level_count):
image = self.get_image(time_index, fov, channel_name, z_level)
if image is not None:
image_set.add(image)
yield image_set
@property
def _channel_offset(self):
def get_image(self, time_index, field_of_view, channel_name, z_level):
""" """
Image data is interleaved for each image set. That is, if there are four images in a set, the first image
will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. Why this would be the
case is beyond me, but that's how it works.
Returns an Image if data exists for the given parameters, otherwise returns None. In general, you should avoid
using this method unless you're very familiar with the structure of ND2 files. If you have a use case that
cannot be met by the `__iter__` or `image_sets` methods above, please create an issue on Github.
:param time_index: the frame number
:type time_index: int
:param field_of_view: the label for the place in the XY-plane where this image was taken.
:type field_of_view: int
:param channel_name: the name of the color of this image
:type channel_name: str
:param z_level: the label for the location in the Z-plane where this image was taken.
:type z_level: int
:rtype: nd2reader.model.Image() or None
""" """
channel_offset = {}
for n, channel in enumerate(self.channels):
channel_offset[channel] = n
return channel_offset
def _get_raw_image_data(self, image_set_number, channel_offset):
chunk = self._label_map["ImageDataSeq|%d!" % image_set_number]
data = self._read_chunk(chunk)
timestamp = struct.unpack("d", data[:8])[0]
# The images for the various channels are interleaved within each other.
image_data = array.array("H", data)
image_data_start = 4 + channel_offset
if any(image_data):
return timestamp, image_data[image_data_start::self.channel_count]
return None
def _calculate_image_set_number(self, time_index, fov, z_level):
return time_index * self.field_of_view_count * self.z_level_count + (fov * self.z_level_count + z_level)
image_set_number = self._calculate_image_group_number(time_index, field_of_view, z_level)
try:
timestamp, raw_image_data = self._get_raw_image_data(image_set_number, self._channel_offset[channel_name])
image = Image(timestamp, raw_image_data, field_of_view, channel_name, z_level, self.height, self.width)
except TypeError:
return None
else:
return image

+ 87
- 20
nd2reader/model/__init__.py View File

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import collections import collections
import numpy as np import numpy as np
import logging import logging
@ -7,6 +9,25 @@ log = logging.getLogger(__name__)
class Image(object): class Image(object):
def __init__(self, timestamp, raw_array, field_of_view, channel, z_level, height, width): def __init__(self, timestamp, raw_array, field_of_view, channel, z_level, height, width):
"""
A wrapper around the raw pixel data of an image.
:param timestamp: The number of milliseconds after the beginning of the acquisition that this image was taken.
:type timestamp: int
:param raw_array: The raw sequence of bytes that represents the image.
:type raw_array: array.array()
:param field_of_view: The label for the place in the XY-plane where this image was taken.
:type field_of_view: int
:param channel: The name of the color of this image
:type channel: str
:param z_level: The label for the location in the Z-plane where this image was taken.
:type z_level: int
:param height: The height of the image in pixels.
:type height: int
:param width: The width of the image in pixels.
:type width: int
"""
self._timestamp = timestamp self._timestamp = timestamp
self._raw_data = raw_array self._raw_data = raw_array
self._field_of_view = field_of_view self._field_of_view = field_of_view
@ -16,8 +37,37 @@ class Image(object):
self._width = width self._width = width
self._data = None self._data = None
def __repr__(self):
return "\n".join(["<ND2 Image>",
"%sx%s (HxW)" % (self._height, self._width),
"Timestamp: %s" % self.timestamp,
"Field of View: %s" % self.field_of_view,
"Channel: %s" % self.channel,
"Z-Level: %s" % self.z_level,
])
@property
def data(self):
"""
The actual image data.
:rtype np.array()
"""
if self._data is None:
# The data is just a 1-dimensional array originally.
# We convert it to a 2D image here.
self._data = np.reshape(self._raw_data, (self._height, self._width))
return self._data
@property @property
def field_of_view(self): def field_of_view(self):
"""
Which of the fixed locations this image was taken at.
:rtype int:
"""
return self._field_of_view return self._field_of_view
@property @property
@ -28,52 +78,69 @@ class Image(object):
timestamp. No, this doesn't make much sense. But that's how ND2s are structured, so if your experiment depends timestamp. No, this doesn't make much sense. But that's how ND2s are structured, so if your experiment depends
on millisecond accuracy, you need to find an alternative imaging system. on millisecond accuracy, you need to find an alternative imaging system.
:rtype float:
""" """
return self._timestamp / 1000.0 return self._timestamp / 1000.0
@property @property
def channel(self): def channel(self):
"""
The name of the filter used to acquire this image. These are user-supplied in NIS Elements.
:rtype str:
"""
return self._channel return self._channel
@property @property
def z_level(self): def z_level(self):
return self._z_level
"""
The vertical offset of the image. These are simple integers starting from 0, where the 0 is the lowest
z-level and each subsequent level incremented by 1.
@property
def data(self):
if self._data is None:
# The data is just a flat, 1-dimensional array. We convert it to a 2D image here.
self._data = np.reshape(self._raw_data, (self._height, self._width))
return self._data
For example, if you acquired images at -3 µm, 0 µm, and +3 µm, your z-levels would be:
-3 µm: 0
0 µm: 1
+3 µm: 2
:rtype int:
"""
return self._z_level
class ImageSet(object): class ImageSet(object):
""" """
A group of images that share the same timestamp. NIS Elements doesn't store a unique timestamp for every
image, rather, it stores one for each set of images that share the same field of view and z-axis level.
A group of images that were taken at roughly the same time.
""" """
def __init__(self): def __init__(self):
self._images = collections.defaultdict(dict) self._images = collections.defaultdict(dict)
def get(self, channel="", z_level=0):
def __len__(self):
""" The number of images in the image set. """
return sum([len(channel) for channel in self._images.values()])
def __repr__(self):
return "\n".join(["<ND2 Image Set>",
"Image count: %s" % len(self)])
def get(self, channel, z_level=0):
""" """
Retrieve an image with a given channel and z-level. For most users, z_level will always be 0. Retrieve an image with a given channel and z-level. For most users, z_level will always be 0.
"""
try:
image = self._images[channel][z_level]
except KeyError:
return None
else:
return image
:type channel: str
:type z_level: int
def __len__(self):
""" The number of images in the image set. """
return sum([len(channel) for channel in self._images.values()])
"""
return self._images.get(channel).get(z_level)
def add(self, image): def add(self, image):
""" """
Stores an image.
:type image: nd2reader.model.Image() :type image: nd2reader.model.Image()
""" """

+ 209
- 9
nd2reader/parser.py View File

@ -2,6 +2,9 @@
import array import array
from collections import namedtuple from collections import namedtuple
from datetime import datetime
import numpy as np
import re
import struct import struct
from StringIO import StringIO from StringIO import StringIO
@ -11,6 +14,7 @@ field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset'])
class Nd2Parser(object): class Nd2Parser(object):
""" """
Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data. Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data.
You should not ever need to instantiate this class manually unless you're a developer.
""" """
CHUNK_HEADER = 0xabeceda CHUNK_HEADER = 0xabeceda
@ -18,7 +22,6 @@ class Nd2Parser(object):
CHUNK_MAP_END = "ND2 CHUNK MAP SIGNATURE 0000001!" CHUNK_MAP_END = "ND2 CHUNK MAP SIGNATURE 0000001!"
def __init__(self, filename): def __init__(self, filename):
self._absolute_start = None
self._filename = filename self._filename = filename
self._fh = None self._fh = None
self._chunk_map_start_location = None self._chunk_map_start_location = None
@ -35,8 +38,48 @@ class Nd2Parser(object):
self._fh = open(self._filename, "rb") self._fh = open(self._filename, "rb")
return self._fh return self._fh
def _get_raw_image_data(self, image_group_number, channel_offset):
"""
Reads the raw bytes and the timestamp of an image.
:param image_group_number: groups are made of images with the same time index, field of view and z-level.
:type image_group_number: int
:param channel_offset: the offset in the array where the bytes for this image are found.
:type channel_offset: int
:return: (int, array.array()) or None
"""
chunk = self._label_map["ImageDataSeq|%d!" % image_group_number]
data = self._read_chunk(chunk)
# All images in the same image group share the same timestamp! So if you have complicated image data,
# your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few
# seconds unless you're doing something super weird.
timestamp = struct.unpack("d", data[:8])[0]
image_group_data = array.array("H", data)
image_data_start = 4 + channel_offset
# The images for the various channels are interleaved within the same array. For example, the second image
# of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design
# a data structure that way, please send the author of this library a message.
image_data = image_group_data[image_data_start::self._channel_count]
# Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you
# don't have the same number of images each cycle. We discovered this because we only took GFP images every
# other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take
# them every cycle.
if np.any(image_data):
return timestamp, image_data
return None
@property @property
def _dimensions(self): def _dimensions(self):
"""
While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable.
Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text
is always there and in the same exact format, so we just parse that instead.
:rtype: str
"""
if self._dimension_text is None: if self._dimension_text is None:
for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values(): for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values():
if "Dimensions:" in line: if "Dimensions:" in line:
@ -53,14 +96,162 @@ class Nd2Parser(object):
return self._dimension_text return self._dimension_text
@property @property
def _image_count(self):
return self.metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount']
def _channels(self):
"""
These are labels created by the NIS Elements user. Typically they may a short description of the filter cube
used (e.g. "bright field", "GFP", etc.)
:rtype: str
"""
metadata = self.metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes']
try:
validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid']
except KeyError:
# If none of the channels have been deleted, there is no validity list, so we just make one
validity = [True for _ in metadata]
# Channel information is contained in dictionaries with the keys a0, a1...an where the number
# indicates the order in which the channel is stored. So by sorting the dicts alphabetically
# we get the correct order.
for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity):
if not valid:
continue
yield chan['sDescription']
def _calculate_image_group_number(self, time_index, fov, z_level):
"""
Images are grouped together if they share the same time index, field of view, and z-level.
:type time_index: int
:type fov: int
:type z_level: int
:rtype: int
"""
return time_index * self._field_of_view_count * self._z_level_count + (fov * self._z_level_count + z_level)
@property
def _channel_offset(self):
"""
Image data is interleaved for each image set. That is, if there are four images in a set, the first image
will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth.
:rtype: dict
"""
channel_offset = {}
for n, channel in enumerate(self._channels):
channel_offset[channel] = n
return channel_offset
@property
def _absolute_start(self):
"""
The date and time when acquisition began.
:rtype: datetime.datetime()
"""
for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values():
absolute_start_12 = None
absolute_start_24 = None
# ND2s seem to randomly switch between 12- and 24-hour representations.
try:
absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S")
except ValueError:
pass
try:
absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p")
except ValueError:
pass
if not absolute_start_12 and not absolute_start_24:
continue
return absolute_start_12 if absolute_start_12 else absolute_start_24
raise ValueError("This ND2 has no recorded start time. This is probably a bug.")
@property @property
def _sequence_count(self):
return self.metadata['ImageEvents']['RLxExperimentRecord']['uiCount']
def _channel_count(self):
"""
The number of different channels used, including bright field.
:rtype: int
"""
pattern = r""".*?λ\((\d+)\).*?"""
try:
count = int(re.match(pattern, self._dimensions).group(1))
except AttributeError:
return 1
else:
return count
@property
def _field_of_view_count(self):
"""
The metadata contains information about fields of view, but it contains it even if some fields
of view were cropped. We can't find anything that states which fields of view are actually
in the image data, so we have to calculate it. There probably is something somewhere, since
NIS Elements can figure it out, but we haven't found it yet.
:rtype: int
"""
pattern = r""".*?XY\((\d+)\).*?"""
try:
count = int(re.match(pattern, self._dimensions).group(1))
except AttributeError:
return 1
else:
return count
@property
def _time_index_count(self):
"""
The number of cycles.
:rtype: int
"""
pattern = r""".*?T'\((\d+)\).*?"""
try:
count = int(re.match(pattern, self._dimensions).group(1))
except AttributeError:
return 1
else:
return count
@property
def _z_level_count(self):
"""
The number of different levels in the Z-plane.
:rtype: int
"""
pattern = r""".*?Z\((\d+)\).*?"""
try:
count = int(re.match(pattern, self._dimensions).group(1))
except AttributeError:
return 1
else:
return count
@property
def _image_count(self):
"""
The total number of images in the ND2. Warning: this may be inaccurate as it includes "gap" images.
:rtype: int
"""
return self.metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount']
def _parse_metadata(self): def _parse_metadata(self):
"""
Reads all metadata.
"""
for label in self._label_map.keys(): for label in self._label_map.keys():
if label.endswith("LV!") or "LV|" in label: if label.endswith("LV!") or "LV|" in label:
data = self._read_chunk(self._label_map[label]) data = self._read_chunk(self._label_map[label])
@ -106,10 +297,6 @@ class Nd2Parser(object):
self._file_handle.seek(chunk_location + 16 + relative_offset) self._file_handle.seek(chunk_location + 16 + relative_offset)
return self._file_handle.read(data_length) return self._file_handle.read(data_length)
def _z_level_count(self):
st = self._read_chunk(self._label_map["CustomData|Z!"])
return len(array.array("d", st))
def _parse_unsigned_char(self, data): def _parse_unsigned_char(self, data):
return struct.unpack("B", data.read(1))[0] return struct.unpack("B", data.read(1))[0]
@ -134,6 +321,10 @@ class Nd2Parser(object):
return array.array("B", data.read(array_length)) return array.array("B", data.read(array_length))
def _parse_metadata_item(self, data): def _parse_metadata_item(self, data):
"""
Reads hierarchical data, analogous to a Python dict.
"""
new_count, length = struct.unpack("<IQ", data.read(12)) new_count, length = struct.unpack("<IQ", data.read(12))
length -= data.tell() - self._cursor_position length -= data.tell() - self._cursor_position
next_data_length = data.read(length) next_data_length = data.read(length)
@ -143,6 +334,10 @@ class Nd2Parser(object):
return value return value
def _get_value(self, data, data_type): def _get_value(self, data, data_type):
"""
ND2s use various codes to indicate different data types, which we translate here.
"""
parser = {1: self._parse_unsigned_char, parser = {1: self._parse_unsigned_char,
2: self._parse_unsigned_int, 2: self._parse_unsigned_int,
3: self._parse_unsigned_int, 3: self._parse_unsigned_int,
@ -154,12 +349,17 @@ class Nd2Parser(object):
return parser[data_type](data) return parser[data_type](data)
def _read_metadata(self, data, count): def _read_metadata(self, data, count):
"""
Iterates over each element some section of the metadata and parses it.
"""
data = StringIO(data) data = StringIO(data)
metadata = {} metadata = {}
for _ in xrange(count): for _ in xrange(count):
self._cursor_position = data.tell() self._cursor_position = data.tell()
header = data.read(2) header = data.read(2)
if not header: if not header:
# We've reached the end of some hierarchy of data
break break
data_type, name_length = map(ord, header) data_type, name_length = map(ord, header)
name = data.read(name_length * 2).decode("utf16")[:-1].encode("utf8") name = data.read(name_length * 2).decode("utf16")[:-1].encode("utf8")


Loading…
Cancel
Save