From d06153fa0dfd97a8c3fd24316c0b07e2907e3e22 Mon Sep 17 00:00:00 2001 From: Jim Rybarski Date: Mon, 11 May 2015 12:50:03 +0000 Subject: [PATCH] much refactor. very change --- nd2reader/__init__.py | 167 +++++++++++++++----- nd2reader/model/__init__.py | 19 --- nd2reader/parser.py | 179 ++++++++++++++++++++++ nd2reader/reader.py | 297 ------------------------------------ 4 files changed, 305 insertions(+), 357 deletions(-) create mode 100644 nd2reader/parser.py delete mode 100644 nd2reader/reader.py diff --git a/nd2reader/__init__.py b/nd2reader/__init__.py index 00648d9..e5d4839 100644 --- a/nd2reader/__init__.py +++ b/nd2reader/__init__.py @@ -1,27 +1,31 @@ # -*- coding: utf-8 -*- -from nd2reader.model import Channel +import array from datetime import datetime import logging from nd2reader.model import Image, ImageSet -from nd2reader.reader import Nd2FileReader - +from nd2reader.parser import Nd2Parser +import numpy as np +import re +import struct log = logging.getLogger(__name__) log.addHandler(logging.StreamHandler()) log.setLevel(logging.WARN) -class Nd2(Nd2FileReader): - def __init__(self, filename): +class Nd2(Nd2Parser): + def __init__(self, filename, image_sets=False): super(Nd2, self).__init__(filename) + self._use_image_sets = image_sets def __iter__(self): - """ - Just return every image in order (might not be exactly the order that the images were physically taken, but it will - be within a few seconds). A better explanation is probably needed here. + if self._use_image_sets: + return self.image_sets() + else: + return self.images() - """ + def images(self): for i in range(self._image_count): for fov in range(self.field_of_view_count): for z_level in range(self.z_level_count): @@ -30,34 +34,27 @@ class Nd2(Nd2FileReader): if image.is_valid: yield image - def image_sets(self, field_of_view, time_indices=None, channels=None, z_levels=None): - """ - Gets all the images for a given field of view and - """ - timepoint_set = xrange(self.time_index_count) if time_indices is None else time_indices - channel_set = [channel.name for channel in self.channels] if channels is None else channels - z_level_set = xrange(self.z_level_count) if z_levels is None else z_levels - - for timepoint in timepoint_set: + def image_sets(self): + for time_index in xrange(self.time_index_count): image_set = ImageSet() - for channel_name in channel_set: - for z_level in z_level_set: - image = self.get_image(timepoint, field_of_view, channel_name, z_level) - if image.is_valid: - image_set.add(image) + for fov in range(self.field_of_view_count): + for channel_name in self.channels: + for z_level in xrange(self.z_level_count): + image = self.get_image(time_index, fov, channel_name, z_level) + if image.is_valid: + image_set.add(image) yield image_set - self._channel_offset = None - - @property - def metadata(self): - return self._metadata + def get_image(self, time_index, fov, channel_name, z_level): + image_set_number = self._calculate_image_set_number(time_index, fov, z_level) + timestamp, raw_image_data = self._get_raw_image_data(image_set_number, self._channel_offset[channel_name]) + return Image(timestamp, raw_image_data, fov, channel_name, z_level, self.height, self.width) @property def channels(self): - metadata = self._metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes'] + metadata = self.metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes'] try: - validity = self._metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid'] + validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid'] except KeyError: # If none of the channels have been deleted, there is no validity list, so we just make one validity = [True for i in metadata] @@ -67,26 +64,28 @@ class Nd2(Nd2FileReader): for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity): if not valid: continue - name = chan['sDescription'] - exposure_time = metadata['sSampleSetting'][label]['dExposureTime'] - camera = metadata['sSampleSetting'][label]['pCameraSetting']['CameraUserName'] - yield Channel(name, camera, exposure_time) + yield chan['sDescription'] @property - def channel_names(self): + def height(self): """ - A convenience method for getting an alphabetized list of channel names. + :return: height of each image, in pixels - :return: list[str] + """ + return self.metadata['ImageAttributes']['SLxImageAttributes']['uiHeight'] + @property + def width(self): """ - for channel in sorted(self.channels, key=lambda x: x.name): - yield channel.name + :return: width of each image, in pixels + + """ + return self.metadata['ImageAttributes']['SLxImageAttributes']['uiWidth'] @property def absolute_start(self): if self._absolute_start is None: - for line in self._metadata['ImageTextInfo']['SLxImageTextInfo'].values(): + for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values(): absolute_start_12 = None absolute_start_24 = None # ND2s seem to randomly switch between 12- and 24-hour representations. @@ -101,4 +100,90 @@ class Nd2(Nd2FileReader): if not absolute_start_12 and not absolute_start_24: continue self._absolute_start = absolute_start_12 if absolute_start_12 else absolute_start_24 - return self._absolute_start \ No newline at end of file + return self._absolute_start + + @property + def channel_count(self): + pattern = r""".*?λ\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def field_of_view_count(self): + """ + The metadata contains information about fields of view, but it contains it even if some fields + of view were cropped. We can't find anything that states which fields of view are actually + in the image data, so we have to calculate it. There probably is something somewhere, since + NIS Elements can figure it out, but we haven't found it yet. + + """ + pattern = r""".*?XY\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def time_index_count(self): + """ + The number of image sets. If images were acquired using some kind of cycle, all images at each step in the + program will have the same timestamp (even though they may have varied by a few seconds in reality). For example, + if you have four fields of view that you're constantly monitoring, and you take a bright field and GFP image of + each, and you repeat that process 100 times, you'll have 800 individual images. But there will only be 400 + time indexes. + + :rtype: int + + """ + pattern = r""".*?T'\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def z_level_count(self): + pattern = r""".*?Z\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @staticmethod + def as_numpy_array(arr): + return np.frombuffer(arr) + + @property + def _channel_offset(self): + """ + Image data is interleaved for each image set. That is, if there are four images in a set, the first image + will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. Why this would be the + case is beyond me, but that's how it works. + + """ + channel_offset = {} + for n, channel in enumerate(self.channels): + self._channel_offset[channel.name] = n + return channel_offset + + def _get_raw_image_data(self, image_set_number, channel_offset): + chunk = self._label_map["ImageDataSeq|%d!" % image_set_number] + data = self._read_chunk(chunk) + timestamp = struct.unpack("d", data[:8])[0] + # The images for the various channels are interleaved within each other. + image_data = array.array("H", data) + image_data_start = 4 + channel_offset + return timestamp, image_data[image_data_start::self.channel_count] + + def _calculate_image_set_number(self, time_index, fov, z_level): + return time_index * self.field_of_view_count * self.z_level_count + (fov * self.z_level_count + z_level) diff --git a/nd2reader/model/__init__.py b/nd2reader/model/__init__.py index 39227a1..800cb7b 100644 --- a/nd2reader/model/__init__.py +++ b/nd2reader/model/__init__.py @@ -5,25 +5,6 @@ import logging log = logging.getLogger(__name__) -class Channel(object): - def __init__(self, name, camera, exposure_time): - self._name = name - self._camera = camera - self._exposure_time = exposure_time - - @property - def name(self): - return self._name - - @property - def camera(self): - return self._camera - - @property - def exposure_time(self): - return self._exposure_time - - class ImageSet(object): """ A group of images that share the same timestamp. NIS Elements doesn't store a unique timestamp for every diff --git a/nd2reader/parser.py b/nd2reader/parser.py new file mode 100644 index 0000000..e8e3d96 --- /dev/null +++ b/nd2reader/parser.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- + +import array +from collections import namedtuple +import struct +from StringIO import StringIO + +field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset']) + + +class Nd2Parser(object): + CHUNK_HEADER = 0xabeceda + CHUNK_MAP_START = "ND2 FILEMAP SIGNATURE NAME 0001!" + CHUNK_MAP_END = "ND2 CHUNK MAP SIGNATURE 0000001!" + + """ + Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data. + + """ + def __init__(self, filename): + self._absolute_start = None + self._filename = filename + self._fh = None + self._chunk_map_start_location = None + self._cursor_position = None + self._dimension_text = None + self._label_map = {} + self.metadata = {} + self._read_map() + self._parse_metadata() + + @property + def _file_handle(self): + if self._fh is None: + self._fh = open(self._filename, "rb") + return self._fh + + @property + def _dimensions(self): + if self._dimension_text is None: + for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values(): + if "Dimensions:" in line: + metadata = line + break + else: + raise ValueError("Could not parse metadata dimensions!") + for line in metadata.split("\r\n"): + if line.startswith("Dimensions:"): + self._dimension_text = line + break + else: + raise ValueError("Could not parse metadata dimensions!") + return self._dimension_text + + @property + def _image_count(self): + return self.metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount'] + + @property + def _sequence_count(self): + return self.metadata['ImageEvents']['RLxExperimentRecord']['uiCount'] + + def _parse_metadata(self): + for label in self._label_map.keys(): + if not label.endswith("LV!") or "LV|" in label: + continue + data = self._read_chunk(self._label_map[label]) + stop = label.index("LV") + self.metadata[label[:stop]] = self._read_metadata(data, 1) + + def _read_map(self): + """ + Every label ends with an exclamation point, however, we can't directly search for those to find all the labels + as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label, + grab the subsequent data (always 16 bytes long), advance to the next label and repeat. + + """ + self._file_handle.seek(-8, 2) + chunk_map_start_location = struct.unpack("Q", self._file_handle.read(8))[0] + self._file_handle.seek(chunk_map_start_location) + raw_text = self._file_handle.read(-1) + label_start = raw_text.index(Nd2Parser.CHUNK_MAP_START) + 32 + + while True: + data_start = raw_text.index("!", label_start) + 1 + key = raw_text[label_start: data_start] + location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16]) + if key == Nd2Parser.CHUNK_MAP_END: + # We've reached the end of the chunk map + break + self._label_map[key] = location + label_start = data_start + 16 + + def _read_chunk(self, chunk_location): + """ + Gets the data for a given chunk pointer + + """ + self._file_handle.seek(chunk_location) + # The chunk metadata is always 16 bytes long + chunk_metadata = self._file_handle.read(16) + header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) + if header != Nd2Parser.CHUNK_HEADER: + raise ValueError("The ND2 file seems to be corrupted.") + # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the + # start of the actual data field, which is at some arbitrary place after the metadata. + self._file_handle.seek(chunk_location + 16 + relative_offset) + return self._file_handle.read(data_length) + + def _z_level_count(self): + st = self._read_chunk(self._label_map["CustomData|Z!"]) + return len(array.array("d", st)) + + def _parse_unsigned_char(self, data): + return struct.unpack("B", data.read(1))[0] + + def _parse_unsigned_int(self, data): + return struct.unpack("I", data.read(4))[0] + + def _parse_unsigned_long(self, data): + return struct.unpack("Q", data.read(8))[0] + + def _parse_double(self, data): + return struct.unpack("d", data.read(8))[0] + + def _parse_string(self, data): + value = data.read(2) + while not value.endswith("\x00\x00"): + # the string ends at the first instance of \x00\x00 + value += data.read(2) + return value.decode("utf16")[:-1].encode("utf8") + + def _parse_char_array(self, data): + array_length = struct.unpack("Q", data.read(8))[0] + return array.array("B", data.read(array_length)) + + def _parse_metadata_item(self, args): + data, cursor_position = args + new_count, length = struct.unpack("