From 7fcf2a5740ece7589b65a7fe96ba22fff1fa8195 Mon Sep 17 00:00:00 2001 From: jim Date: Sun, 26 Apr 2015 18:51:41 -0500 Subject: [PATCH 1/8] #11 - partial refactor --- nd2reader/__init__.py | 84 ++++++++- nd2reader/model/__init__.py | 174 +----------------- nd2reader/{service/__init__.py => reader.py} | 182 ++----------------- 3 files changed, 94 insertions(+), 346 deletions(-) rename nd2reader/{service/__init__.py => reader.py} (60%) diff --git a/nd2reader/__init__.py b/nd2reader/__init__.py index 264eb55..92281c7 100644 --- a/nd2reader/__init__.py +++ b/nd2reader/__init__.py @@ -1,18 +1,25 @@ +# -*- coding: utf-8 -*- + +from collections import namedtuple +from nd2reader.model import Channel import logging -from nd2reader.service import BaseNd2 from nd2reader.model import Image, ImageSet +from nd2reader.reader import Nd2FileReader + +chunk = namedtuple('Chunk', ['location', 'length']) +field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset']) log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) -class Nd2(BaseNd2): +class Nd2(Nd2FileReader): def __init__(self, filename): super(Nd2, self).__init__(filename) def get_image(self, time_index, fov, channel_name, z_level): image_set_number = self._calculate_image_set_number(time_index, fov, z_level) - timestamp, raw_image_data = self._reader.get_raw_image_data(image_set_number, self.channel_offset[channel_name]) + timestamp, raw_image_data = self.get_raw_image_data(image_set_number, self.channel_offset[channel_name]) return Image(timestamp, raw_image_data, fov, channel_name, z_level, self.height, self.width) def __iter__(self): @@ -44,4 +51,73 @@ class Nd2(BaseNd2): image = self.get_image(timepoint, field_of_view, channel_name, z_level) if image.is_valid: image_set.add(image) - yield image_set \ No newline at end of file + yield image_set + + self._channel_offset = None + + @property + def height(self): + """ + :return: height of each image, in pixels + + """ + return self._metadata['ImageAttributes']['SLxImageAttributes']['uiHeight'] + + @property + def width(self): + """ + :return: width of each image, in pixels + + """ + return self._metadata['ImageAttributes']['SLxImageAttributes']['uiWidth'] + + @property + def channels(self): + metadata = self._metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes'] + try: + validity = self._metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid'] + except KeyError: + # If none of the channels have been deleted, there is no validity list, so we just make one + validity = [True for i in metadata] + # Channel information is contained in dictionaries with the keys a0, a1...an where the number + # indicates the order in which the channel is stored. So by sorting the dicts alphabetically + # we get the correct order. + for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity): + if not valid: + continue + name = chan['sDescription'] + exposure_time = metadata['sSampleSetting'][label]['dExposureTime'] + camera = metadata['sSampleSetting'][label]['pCameraSetting']['CameraUserName'] + yield Channel(name, camera, exposure_time) + + @property + def channel_names(self): + """ + A convenience method for getting an alphabetized list of channel names. + + :return: list[str] + + """ + for channel in sorted(self.channels, key=lambda x: x.name): + yield channel.name + + @property + def _image_count(self): + return self._metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount'] + + @property + def _sequence_count(self): + return self._metadata['ImageEvents']['RLxExperimentRecord']['uiCount'] + + @property + def channel_offset(self): + if self._channel_offset is None: + self._channel_offset = {} + for n, channel in enumerate(self.channels): + self._channel_offset[channel.name] = n + return self._channel_offset + + def _calculate_image_set_number(self, time_index, fov, z_level): + return time_index * self.field_of_view_count * self.z_level_count + (fov * self.z_level_count + z_level) + + diff --git a/nd2reader/model/__init__.py b/nd2reader/model/__init__.py index 1ae84e4..39227a1 100644 --- a/nd2reader/model/__init__.py +++ b/nd2reader/model/__init__.py @@ -1,10 +1,6 @@ import numpy as np import skimage.io import logging -from io import BytesIO -import array -import struct - log = logging.getLogger(__name__) @@ -96,172 +92,4 @@ class Image(object): def show(self): skimage.io.imshow(self.data) - skimage.io.show() - - -class MetadataItem(object): - def __init__(self, start, data): - self._datatype = ord(data[start]) - self._label_length = 2 * ord(data[start + 1]) - self._data = data - - @property - def is_valid(self): - return self._datatype > 0 - - @property - def key(self): - return self._data[2:self._label_length].decode("utf16").encode("utf8") - - @property - def length(self): - return self._length - - @property - def data_start(self): - return self._label_length + 2 - - @property - def _body(self): - """ - All data after the header. - - """ - return self._data[self.data_start:] - - def _get_bytes(self, count): - return self._data[self.data_start: self.data_start + count] - - @property - def value(self): - parser = {1: self._parse_unsigned_char, - 2: self._parse_unsigned_int, - 3: self._parse_unsigned_int, - 5: self._parse_unsigned_long, - 6: self._parse_double, - 8: self._parse_string, - 9: self._parse_char_array, - 11: self._parse_metadata_item - } - return parser[self._datatype]() - - def _parse_unsigned_char(self): - self._length = 1 - return self._unpack("B", self._get_bytes(self._length)) - - def _parse_unsigned_int(self): - self._length = 4 - return self._unpack("I", self._get_bytes(self._length)) - - def _parse_unsigned_long(self): - self._length = 8 - return self._unpack("Q", self._get_bytes(self._length)) - - def _parse_double(self): - self._length = 8 - return self._unpack("d", self._get_bytes(self._length)) - - def _parse_string(self): - # the string is of unknown length but ends at the first instance of \x00\x00 - stop = self._body.index("\x00\x00") - self._length = stop - return self._body[:stop - 1].decode("utf16").encode("utf8") - - def _parse_char_array(self): - array_length = self._unpack("Q", self._get_bytes(8)) - self._length = array_length + 8 - return array.array("B", self._body[8:array_length]) - - def _parse_metadata_item(self): - count, length = struct.unpack(" Date: Fri, 8 May 2015 02:57:08 +0000 Subject: [PATCH 2/8] added requirements file --- nd2reader/__init__.py | 3 ++- requirements.txt | 1 + setup.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 requirements.txt diff --git a/nd2reader/__init__.py b/nd2reader/__init__.py index 92281c7..a01b3b6 100644 --- a/nd2reader/__init__.py +++ b/nd2reader/__init__.py @@ -9,8 +9,9 @@ from nd2reader.reader import Nd2FileReader chunk = namedtuple('Chunk', ['location', 'length']) field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset']) +print(__name__) log = logging.getLogger(__name__) -log.setLevel(logging.DEBUG) +log.setLevel(logging.WARN) class Nd2(Nd2FileReader): diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..296d654 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +numpy \ No newline at end of file diff --git a/setup.py b/setup.py index b7acbd1..93db35b 100644 --- a/setup.py +++ b/setup.py @@ -7,4 +7,4 @@ setup( install_requires=[ 'numpy', ], -) +) \ No newline at end of file From 26ff38d039cf16f09c7c86cfae77b816358ff164 Mon Sep 17 00:00:00 2001 From: Jim Rybarski Date: Thu, 7 May 2015 23:08:42 -0500 Subject: [PATCH 3/8] created dockerfile --- Dockerfile | 24 ++++++++++++++++++++++++ nd2reader/reader.py | 4 +++- setup.py | 5 +---- 3 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..95602c0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +FROM ubuntu +MAINTAINER Jim Rybarski + +RUN apt-get update && apt-get install -y \ + gcc \ + gfortran \ + libblas-dev \ + liblapack-dev \ + libatlas-dev \ + tk \ + tk-dev \ + libpng12-dev \ + python \ + python-dev \ + python-pip \ + libfreetype6-dev \ + python-skimage + +RUN pip install numpy +RUN pip install --upgrade scikit-image + +COPY . /opt/nd2reader +WORKDIR /opt/nd2reader +RUN python setup.py install diff --git a/nd2reader/reader.py b/nd2reader/reader.py index 3b3732a..d269bf3 100644 --- a/nd2reader/reader.py +++ b/nd2reader/reader.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import array import numpy as np import struct @@ -280,4 +282,4 @@ class Nd2FileReader(object): res[name].append(value) x = data.read() assert not x, "skip %d %s" % (len(x), repr(x[:30])) - return res \ No newline at end of file + return res diff --git a/setup.py b/setup.py index 93db35b..5107f45 100644 --- a/setup.py +++ b/setup.py @@ -3,8 +3,5 @@ from setuptools import setup, find_packages setup( name="nd2reader", packages=find_packages(), - version="0.9.7", - install_requires=[ - 'numpy', - ], + version="0.9.7" ) \ No newline at end of file From de8915fd6b16475c1773d31156d540567fe12cf6 Mon Sep 17 00:00:00 2001 From: Jim Rybarski Date: Sun, 10 May 2015 18:33:18 +0000 Subject: [PATCH 4/8] simplified even more --- nd2reader/__init__.py | 64 ++++++----------- nd2reader/reader.py | 162 ++++++++++++++++++++---------------------- 2 files changed, 98 insertions(+), 128 deletions(-) diff --git a/nd2reader/__init__.py b/nd2reader/__init__.py index a01b3b6..00648d9 100644 --- a/nd2reader/__init__.py +++ b/nd2reader/__init__.py @@ -1,16 +1,14 @@ # -*- coding: utf-8 -*- -from collections import namedtuple from nd2reader.model import Channel +from datetime import datetime import logging from nd2reader.model import Image, ImageSet from nd2reader.reader import Nd2FileReader -chunk = namedtuple('Chunk', ['location', 'length']) -field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset']) -print(__name__) log = logging.getLogger(__name__) +log.addHandler(logging.StreamHandler()) log.setLevel(logging.WARN) @@ -18,11 +16,6 @@ class Nd2(Nd2FileReader): def __init__(self, filename): super(Nd2, self).__init__(filename) - def get_image(self, time_index, fov, channel_name, z_level): - image_set_number = self._calculate_image_set_number(time_index, fov, z_level) - timestamp, raw_image_data = self.get_raw_image_data(image_set_number, self.channel_offset[channel_name]) - return Image(timestamp, raw_image_data, fov, channel_name, z_level, self.height, self.width) - def __iter__(self): """ Just return every image in order (might not be exactly the order that the images were physically taken, but it will @@ -57,20 +50,8 @@ class Nd2(Nd2FileReader): self._channel_offset = None @property - def height(self): - """ - :return: height of each image, in pixels - - """ - return self._metadata['ImageAttributes']['SLxImageAttributes']['uiHeight'] - - @property - def width(self): - """ - :return: width of each image, in pixels - - """ - return self._metadata['ImageAttributes']['SLxImageAttributes']['uiWidth'] + def metadata(self): + return self._metadata @property def channels(self): @@ -103,22 +84,21 @@ class Nd2(Nd2FileReader): yield channel.name @property - def _image_count(self): - return self._metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount'] - - @property - def _sequence_count(self): - return self._metadata['ImageEvents']['RLxExperimentRecord']['uiCount'] - - @property - def channel_offset(self): - if self._channel_offset is None: - self._channel_offset = {} - for n, channel in enumerate(self.channels): - self._channel_offset[channel.name] = n - return self._channel_offset - - def _calculate_image_set_number(self, time_index, fov, z_level): - return time_index * self.field_of_view_count * self.z_level_count + (fov * self.z_level_count + z_level) - - + def absolute_start(self): + if self._absolute_start is None: + for line in self._metadata['ImageTextInfo']['SLxImageTextInfo'].values(): + absolute_start_12 = None + absolute_start_24 = None + # ND2s seem to randomly switch between 12- and 24-hour representations. + try: + absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S") + except ValueError: + pass + try: + absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p") + except ValueError: + pass + if not absolute_start_12 and not absolute_start_24: + continue + self._absolute_start = absolute_start_12 if absolute_start_12 else absolute_start_24 + return self._absolute_start \ No newline at end of file diff --git a/nd2reader/reader.py b/nd2reader/reader.py index d269bf3..20ab5d2 100644 --- a/nd2reader/reader.py +++ b/nd2reader/reader.py @@ -1,11 +1,15 @@ # -*- coding: utf-8 -*- +from abc import abstractproperty import array +from collections import namedtuple import numpy as np import struct import re from StringIO import StringIO -from datetime import datetime +from nd2reader.model import Image + +field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset']) class Nd2FileReader(object): @@ -17,6 +21,7 @@ class Nd2FileReader(object): self._absolute_start = None self._filename = filename self._file_handler = None + self._channel_offset = None self._chunk_map_start_location = None self._label_map = {} self._metadata = {} @@ -24,6 +29,31 @@ class Nd2FileReader(object): self._parse_dict_data() self.__dimensions = None + def get_image(self, time_index, fov, channel_name, z_level): + image_set_number = self._calculate_image_set_number(time_index, fov, z_level) + timestamp, raw_image_data = self.get_raw_image_data(image_set_number, self.channel_offset[channel_name]) + return Image(timestamp, raw_image_data, fov, channel_name, z_level, self.height, self.width) + + @abstractproperty + def channels(self): + raise NotImplemented + + @property + def height(self): + """ + :return: height of each image, in pixels + + """ + return self._metadata['ImageAttributes']['SLxImageAttributes']['uiHeight'] + + @property + def width(self): + """ + :return: width of each image, in pixels + + """ + return self._metadata['ImageAttributes']['SLxImageAttributes']['uiWidth'] + @property def _dimensions(self): if self.__dimensions is None: @@ -40,30 +70,6 @@ class Nd2FileReader(object): break return self.__dimensions - @property - def absolute_start(self): - if self._absolute_start is None: - for line in self._metadata['ImageTextInfo']['SLxImageTextInfo'].values(): - absolute_start_12 = None - absolute_start_24 = None - - # ND2s seem to randomly switch between 12- and 24-hour representations. - try: - absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S") - except ValueError: - pass - - try: - absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p") - except ValueError: - pass - - if not absolute_start_12 and not absolute_start_24: - continue - - self._absolute_start = absolute_start_12 if absolute_start_12 else absolute_start_24 - - return self._absolute_start @property def fh(self): @@ -74,8 +80,11 @@ class Nd2FileReader(object): @property def time_index_count(self): """ - The number of images for a given field of view, channel, and z_level combination. - Effectively the number of frames. + The number of image sets. If images were acquired using some kind of cycle, all images at each step in the + program will have the same timestamp (even though they may have varied by a few seconds in reality). For example, + if you have four fields of view that you're constantly monitoring, and you take a bright field and GFP image of + each, and you repeat that process 100 times, you'll have 800 individual images. But there will only be 400 + time indexes. :rtype: int @@ -125,35 +134,49 @@ class Nd2FileReader(object): else: return count + @property + def _image_count(self): + return self._metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount'] + + @property + def _sequence_count(self): + return self._metadata['ImageEvents']['RLxExperimentRecord']['uiCount'] + + @property + def channel_offset(self): + """ + Image data is interleaved for each image set. That is, if there are four images in a set, the first image + will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. Why this would be the + case is beyond me, but that's how it works. + + """ + if self._channel_offset is None: + self._channel_offset = {} + for n, channel in enumerate(self.channels): + self._channel_offset[channel.name] = n + return self._channel_offset + + def _calculate_image_set_number(self, time_index, fov, z_level): + return time_index * self.field_of_view_count * self.z_level_count + (fov * self.z_level_count + z_level) + def get_raw_image_data(self, image_set_number, channel_offset): chunk = self._label_map["ImageDataSeq|%d!" % image_set_number] - data = self._read_chunk(chunk.location) + data = self._read_chunk(chunk) timestamp = struct.unpack("d", data[:8])[0] - # The images for the various channels are interleaved within each other. Yes, this is an incredibly unintuitive and nonsensical way - # to store data. + # The images for the various channels are interleaved within each other. image_data = array.array("H", data) image_data_start = 4 + channel_offset return timestamp, image_data[image_data_start::self.channel_count] def _parse_dict_data(self): # TODO: Don't like this name - for label in self._top_level_dict_labels: - chunk_location = self._label_map[label].location - data = self._read_chunk(chunk_location) + for label in self._label_map.keys(): + if not label.endswith("LV!") or "LV|" in label: + continue + data = self._read_chunk(self._label_map[label]) stop = label.index("LV") self._metadata[label[:stop]] = self.read_lv_encoding(data, 1) - @property - def metadata(self): - return self._metadata - - @property - def _top_level_dict_labels(self): - # TODO: I don't like this name either - for label in self._label_map.keys(): - if label.endswith("LV!") or "LV|" in label: - yield label - def _read_map(self): """ Every label ends with an exclamation point, however, we can't directly search for those to find all the labels @@ -171,13 +194,10 @@ class Nd2FileReader(object): data_start = raw_text.index("!", label_start) + 1 key = raw_text[label_start: data_start] location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16]) - label, value = key, chunk(location=location, length=length) - - if label == "ND2 CHUNK MAP SIGNATURE 0000001!": + if key == "ND2 CHUNK MAP SIGNATURE 0000001!": # We've reached the end of the chunk map break - - self._label_map[label] = value + self._label_map[key] = location label_start = data_start + 16 def _read_chunk(self, chunk_location): @@ -186,53 +206,23 @@ class Nd2FileReader(object): """ self.fh.seek(chunk_location) - chunk_data = self._read_chunk_metadata() - header, relative_offset, data_length = self._parse_chunk_metadata(chunk_data) - return self._read_chunk_data(chunk_location, relative_offset, data_length) - - def _read_chunk_metadata(self): - """ - Gets the chunks metadata, which is always 16 bytes - - """ - return self.fh.read(16) - - def _read_chunk_data(self, chunk_location, relative_offset, data_length): - """ - Reads the actual data for a given chunk - - """ + # The chunk metadata is always 16 bytes long + chunk_metadata = self.fh.read(16) + header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) + if header != 0xabeceda: + raise ValueError("The ND2 file seems to be corrupted.") # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the # start of the actual data field, which is at some arbitrary place after the metadata. self.fh.seek(chunk_location + 16 + relative_offset) return self.fh.read(data_length) - @staticmethod - def _parse_chunk_metadata(chunk_data): - """ - Finds out everything about a given chunk. Every chunk begins with the same value, so if that's ever - different we can assume the file has suffered some kind of damage. - - """ - header, relative_offset, data_length = struct.unpack("IIQ", chunk_data) - if header != 0xabeceda: - raise ValueError("The ND2 file seems to be corrupted.") - return header, relative_offset, data_length - - def _get_raw_chunk_map_text(self): - """ - Reads the entire chunk map and returns it as a string. - - """ - - @staticmethod def as_numpy_array(arr): return np.frombuffer(arr) def _z_level_count(self): name = "CustomData|Z!" - st = self._read_chunk(self._label_map[name].location) + st = self._read_chunk(self._label_map[name]) res = array.array("d", st) return len(res) @@ -282,4 +272,4 @@ class Nd2FileReader(object): res[name].append(value) x = data.read() assert not x, "skip %d %s" % (len(x), repr(x[:30])) - return res + return res \ No newline at end of file From 5930c63e511e7d96869395a5a6eea8600ae41e6c Mon Sep 17 00:00:00 2001 From: jim Date: Sun, 10 May 2015 15:08:30 -0500 Subject: [PATCH 5/8] #11 made parser section more readable --- nd2reader/reader.py | 61 +++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/nd2reader/reader.py b/nd2reader/reader.py index 20ab5d2..fecf17f 100644 --- a/nd2reader/reader.py +++ b/nd2reader/reader.py @@ -228,48 +228,49 @@ class Nd2FileReader(object): def read_lv_encoding(self, data, count): data = StringIO(data) - res = {} + metadata = {} total_count = 0 - for c in range(count): - lastpos = data.tell() + for _ in xrange(count): + cursor_position = data.tell() total_count += 1 - hdr = data.read(2) - if not hdr: + header = data.read(2) + if not header: break - typ = ord(hdr[0]) - bname = data.read(2*ord(hdr[1])) - name = bname.decode("utf16")[:-1].encode("utf8") - if typ == 1: + data_type, name_length = map(ord, header) + name = data.read(name_length * 2).decode("utf16")[:-1].encode("utf8") + if data_type == 1: value, = struct.unpack("B", data.read(1)) - elif typ in [2, 3]: + elif data_type in [2, 3]: value, = struct.unpack("I", data.read(4)) - elif typ == 5: + elif data_type == 5: value, = struct.unpack("Q", data.read(8)) - elif typ == 6: + elif data_type == 6: value, = struct.unpack("d", data.read(8)) - elif typ == 8: + elif data_type == 8: value = data.read(2) while value[-2:] != "\x00\x00": value += data.read(2) value = value.decode("utf16")[:-1].encode("utf8") - elif typ == 9: + elif data_type == 9: cnt, = struct.unpack("Q", data.read(8)) value = array.array("B", data.read(cnt)) - elif typ == 11: - newcount, length = struct.unpack(" Date: Sun, 10 May 2015 16:09:21 -0500 Subject: [PATCH 6/8] #11 eliminated the messy parts of the parser, mostly --- nd2reader/reader.py | 107 ++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 43 deletions(-) diff --git a/nd2reader/reader.py b/nd2reader/reader.py index fecf17f..1b1401d 100644 --- a/nd2reader/reader.py +++ b/nd2reader/reader.py @@ -13,6 +13,10 @@ field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset']) class Nd2FileReader(object): + CHUNK_HEADER = 0xabeceda + CHUNK_MAP_START = "ND2 FILEMAP SIGNATURE NAME 0001!" + CHUNK_MAP_END = "ND2 CHUNK MAP SIGNATURE 0000001!" + """ Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data. @@ -23,12 +27,17 @@ class Nd2FileReader(object): self._file_handler = None self._channel_offset = None self._chunk_map_start_location = None + self._cursor_position = None self._label_map = {} self._metadata = {} self._read_map() - self._parse_dict_data() + self._parse_metadata() self.__dimensions = None + @staticmethod + def as_numpy_array(arr): + return np.frombuffer(arr) + def get_image(self, time_index, fov, channel_name, z_level): image_set_number = self._calculate_image_set_number(time_index, fov, z_level) timestamp, raw_image_data = self.get_raw_image_data(image_set_number, self.channel_offset[channel_name]) @@ -168,14 +177,13 @@ class Nd2FileReader(object): image_data_start = 4 + channel_offset return timestamp, image_data[image_data_start::self.channel_count] - def _parse_dict_data(self): - # TODO: Don't like this name + def _parse_metadata(self): for label in self._label_map.keys(): if not label.endswith("LV!") or "LV|" in label: continue data = self._read_chunk(self._label_map[label]) stop = label.index("LV") - self._metadata[label[:stop]] = self.read_lv_encoding(data, 1) + self._metadata[label[:stop]] = self._read_file(data, 1) def _read_map(self): """ @@ -188,13 +196,13 @@ class Nd2FileReader(object): chunk_map_start_location = struct.unpack("Q", self.fh.read(8))[0] self.fh.seek(chunk_map_start_location) raw_text = self.fh.read(-1) - label_start = raw_text.index("ND2 FILEMAP SIGNATURE NAME 0001!") + 32 + label_start = raw_text.index(Nd2FileReader.CHUNK_MAP_START) + 32 while True: data_start = raw_text.index("!", label_start) + 1 key = raw_text[label_start: data_start] location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16]) - if key == "ND2 CHUNK MAP SIGNATURE 0000001!": + if key == Nd2FileReader.CHUNK_MAP_END: # We've reached the end of the chunk map break self._label_map[key] = location @@ -209,58 +217,73 @@ class Nd2FileReader(object): # The chunk metadata is always 16 bytes long chunk_metadata = self.fh.read(16) header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) - if header != 0xabeceda: + if header != Nd2FileReader.CHUNK_HEADER: raise ValueError("The ND2 file seems to be corrupted.") # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the # start of the actual data field, which is at some arbitrary place after the metadata. self.fh.seek(chunk_location + 16 + relative_offset) return self.fh.read(data_length) - @staticmethod - def as_numpy_array(arr): - return np.frombuffer(arr) - def _z_level_count(self): name = "CustomData|Z!" st = self._read_chunk(self._label_map[name]) - res = array.array("d", st) - return len(res) - - def read_lv_encoding(self, data, count): + return len(array.array("d", st)) + + def _parse_unsigned_char(self, data): + return struct.unpack("B", data.read(1))[0] + + def _parse_unsigned_int(self, data): + return struct.unpack("I", data.read(4))[0] + + def _parse_unsigned_long(self, data): + return struct.unpack("Q", data.read(8))[0] + + def _parse_double(self, data): + return struct.unpack("d", data.read(8))[0] + + def _parse_string(self, data): + value = data.read(2) + while not value.endswith("\x00\x00"): + # the string ends at the first instance of \x00\x00 + value += data.read(2) + return value.decode("utf16")[:-1].encode("utf8") + + def _parse_char_array(self, data): + array_length = struct.unpack("Q", data.read(8))[0] + return array.array("B", data.read(array_length)) + + def _parse_metadata_item(self, args): + data, cursor_position = args + new_count, length = struct.unpack(" Date: Mon, 11 May 2015 12:50:03 +0000 Subject: [PATCH 7/8] much refactor. very change --- nd2reader/__init__.py | 167 +++++++++++++++----- nd2reader/model/__init__.py | 19 --- nd2reader/parser.py | 179 ++++++++++++++++++++++ nd2reader/reader.py | 297 ------------------------------------ 4 files changed, 305 insertions(+), 357 deletions(-) create mode 100644 nd2reader/parser.py delete mode 100644 nd2reader/reader.py diff --git a/nd2reader/__init__.py b/nd2reader/__init__.py index 00648d9..e5d4839 100644 --- a/nd2reader/__init__.py +++ b/nd2reader/__init__.py @@ -1,27 +1,31 @@ # -*- coding: utf-8 -*- -from nd2reader.model import Channel +import array from datetime import datetime import logging from nd2reader.model import Image, ImageSet -from nd2reader.reader import Nd2FileReader - +from nd2reader.parser import Nd2Parser +import numpy as np +import re +import struct log = logging.getLogger(__name__) log.addHandler(logging.StreamHandler()) log.setLevel(logging.WARN) -class Nd2(Nd2FileReader): - def __init__(self, filename): +class Nd2(Nd2Parser): + def __init__(self, filename, image_sets=False): super(Nd2, self).__init__(filename) + self._use_image_sets = image_sets def __iter__(self): - """ - Just return every image in order (might not be exactly the order that the images were physically taken, but it will - be within a few seconds). A better explanation is probably needed here. + if self._use_image_sets: + return self.image_sets() + else: + return self.images() - """ + def images(self): for i in range(self._image_count): for fov in range(self.field_of_view_count): for z_level in range(self.z_level_count): @@ -30,34 +34,27 @@ class Nd2(Nd2FileReader): if image.is_valid: yield image - def image_sets(self, field_of_view, time_indices=None, channels=None, z_levels=None): - """ - Gets all the images for a given field of view and - """ - timepoint_set = xrange(self.time_index_count) if time_indices is None else time_indices - channel_set = [channel.name for channel in self.channels] if channels is None else channels - z_level_set = xrange(self.z_level_count) if z_levels is None else z_levels - - for timepoint in timepoint_set: + def image_sets(self): + for time_index in xrange(self.time_index_count): image_set = ImageSet() - for channel_name in channel_set: - for z_level in z_level_set: - image = self.get_image(timepoint, field_of_view, channel_name, z_level) - if image.is_valid: - image_set.add(image) + for fov in range(self.field_of_view_count): + for channel_name in self.channels: + for z_level in xrange(self.z_level_count): + image = self.get_image(time_index, fov, channel_name, z_level) + if image.is_valid: + image_set.add(image) yield image_set - self._channel_offset = None - - @property - def metadata(self): - return self._metadata + def get_image(self, time_index, fov, channel_name, z_level): + image_set_number = self._calculate_image_set_number(time_index, fov, z_level) + timestamp, raw_image_data = self._get_raw_image_data(image_set_number, self._channel_offset[channel_name]) + return Image(timestamp, raw_image_data, fov, channel_name, z_level, self.height, self.width) @property def channels(self): - metadata = self._metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes'] + metadata = self.metadata['ImageMetadataSeq']['SLxPictureMetadata']['sPicturePlanes'] try: - validity = self._metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid'] + validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid'] except KeyError: # If none of the channels have been deleted, there is no validity list, so we just make one validity = [True for i in metadata] @@ -67,26 +64,28 @@ class Nd2(Nd2FileReader): for (label, chan), valid in zip(sorted(metadata['sPlaneNew'].items()), validity): if not valid: continue - name = chan['sDescription'] - exposure_time = metadata['sSampleSetting'][label]['dExposureTime'] - camera = metadata['sSampleSetting'][label]['pCameraSetting']['CameraUserName'] - yield Channel(name, camera, exposure_time) + yield chan['sDescription'] @property - def channel_names(self): + def height(self): """ - A convenience method for getting an alphabetized list of channel names. + :return: height of each image, in pixels - :return: list[str] + """ + return self.metadata['ImageAttributes']['SLxImageAttributes']['uiHeight'] + @property + def width(self): """ - for channel in sorted(self.channels, key=lambda x: x.name): - yield channel.name + :return: width of each image, in pixels + + """ + return self.metadata['ImageAttributes']['SLxImageAttributes']['uiWidth'] @property def absolute_start(self): if self._absolute_start is None: - for line in self._metadata['ImageTextInfo']['SLxImageTextInfo'].values(): + for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values(): absolute_start_12 = None absolute_start_24 = None # ND2s seem to randomly switch between 12- and 24-hour representations. @@ -101,4 +100,90 @@ class Nd2(Nd2FileReader): if not absolute_start_12 and not absolute_start_24: continue self._absolute_start = absolute_start_12 if absolute_start_12 else absolute_start_24 - return self._absolute_start \ No newline at end of file + return self._absolute_start + + @property + def channel_count(self): + pattern = r""".*?λ\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def field_of_view_count(self): + """ + The metadata contains information about fields of view, but it contains it even if some fields + of view were cropped. We can't find anything that states which fields of view are actually + in the image data, so we have to calculate it. There probably is something somewhere, since + NIS Elements can figure it out, but we haven't found it yet. + + """ + pattern = r""".*?XY\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def time_index_count(self): + """ + The number of image sets. If images were acquired using some kind of cycle, all images at each step in the + program will have the same timestamp (even though they may have varied by a few seconds in reality). For example, + if you have four fields of view that you're constantly monitoring, and you take a bright field and GFP image of + each, and you repeat that process 100 times, you'll have 800 individual images. But there will only be 400 + time indexes. + + :rtype: int + + """ + pattern = r""".*?T'\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @property + def z_level_count(self): + pattern = r""".*?Z\((\d+)\).*?""" + try: + count = int(re.match(pattern, self._dimensions).group(1)) + except AttributeError: + return 1 + else: + return count + + @staticmethod + def as_numpy_array(arr): + return np.frombuffer(arr) + + @property + def _channel_offset(self): + """ + Image data is interleaved for each image set. That is, if there are four images in a set, the first image + will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. Why this would be the + case is beyond me, but that's how it works. + + """ + channel_offset = {} + for n, channel in enumerate(self.channels): + self._channel_offset[channel.name] = n + return channel_offset + + def _get_raw_image_data(self, image_set_number, channel_offset): + chunk = self._label_map["ImageDataSeq|%d!" % image_set_number] + data = self._read_chunk(chunk) + timestamp = struct.unpack("d", data[:8])[0] + # The images for the various channels are interleaved within each other. + image_data = array.array("H", data) + image_data_start = 4 + channel_offset + return timestamp, image_data[image_data_start::self.channel_count] + + def _calculate_image_set_number(self, time_index, fov, z_level): + return time_index * self.field_of_view_count * self.z_level_count + (fov * self.z_level_count + z_level) diff --git a/nd2reader/model/__init__.py b/nd2reader/model/__init__.py index 39227a1..800cb7b 100644 --- a/nd2reader/model/__init__.py +++ b/nd2reader/model/__init__.py @@ -5,25 +5,6 @@ import logging log = logging.getLogger(__name__) -class Channel(object): - def __init__(self, name, camera, exposure_time): - self._name = name - self._camera = camera - self._exposure_time = exposure_time - - @property - def name(self): - return self._name - - @property - def camera(self): - return self._camera - - @property - def exposure_time(self): - return self._exposure_time - - class ImageSet(object): """ A group of images that share the same timestamp. NIS Elements doesn't store a unique timestamp for every diff --git a/nd2reader/parser.py b/nd2reader/parser.py new file mode 100644 index 0000000..e8e3d96 --- /dev/null +++ b/nd2reader/parser.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- + +import array +from collections import namedtuple +import struct +from StringIO import StringIO + +field_of_view = namedtuple('FOV', ['number', 'x', 'y', 'z', 'pfs_offset']) + + +class Nd2Parser(object): + CHUNK_HEADER = 0xabeceda + CHUNK_MAP_START = "ND2 FILEMAP SIGNATURE NAME 0001!" + CHUNK_MAP_END = "ND2 CHUNK MAP SIGNATURE 0000001!" + + """ + Reads .nd2 files, provides an interface to the metadata, and generates numpy arrays from the image data. + + """ + def __init__(self, filename): + self._absolute_start = None + self._filename = filename + self._fh = None + self._chunk_map_start_location = None + self._cursor_position = None + self._dimension_text = None + self._label_map = {} + self.metadata = {} + self._read_map() + self._parse_metadata() + + @property + def _file_handle(self): + if self._fh is None: + self._fh = open(self._filename, "rb") + return self._fh + + @property + def _dimensions(self): + if self._dimension_text is None: + for line in self.metadata['ImageTextInfo']['SLxImageTextInfo'].values(): + if "Dimensions:" in line: + metadata = line + break + else: + raise ValueError("Could not parse metadata dimensions!") + for line in metadata.split("\r\n"): + if line.startswith("Dimensions:"): + self._dimension_text = line + break + else: + raise ValueError("Could not parse metadata dimensions!") + return self._dimension_text + + @property + def _image_count(self): + return self.metadata['ImageAttributes']['SLxImageAttributes']['uiSequenceCount'] + + @property + def _sequence_count(self): + return self.metadata['ImageEvents']['RLxExperimentRecord']['uiCount'] + + def _parse_metadata(self): + for label in self._label_map.keys(): + if not label.endswith("LV!") or "LV|" in label: + continue + data = self._read_chunk(self._label_map[label]) + stop = label.index("LV") + self.metadata[label[:stop]] = self._read_metadata(data, 1) + + def _read_map(self): + """ + Every label ends with an exclamation point, however, we can't directly search for those to find all the labels + as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label, + grab the subsequent data (always 16 bytes long), advance to the next label and repeat. + + """ + self._file_handle.seek(-8, 2) + chunk_map_start_location = struct.unpack("Q", self._file_handle.read(8))[0] + self._file_handle.seek(chunk_map_start_location) + raw_text = self._file_handle.read(-1) + label_start = raw_text.index(Nd2Parser.CHUNK_MAP_START) + 32 + + while True: + data_start = raw_text.index("!", label_start) + 1 + key = raw_text[label_start: data_start] + location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16]) + if key == Nd2Parser.CHUNK_MAP_END: + # We've reached the end of the chunk map + break + self._label_map[key] = location + label_start = data_start + 16 + + def _read_chunk(self, chunk_location): + """ + Gets the data for a given chunk pointer + + """ + self._file_handle.seek(chunk_location) + # The chunk metadata is always 16 bytes long + chunk_metadata = self._file_handle.read(16) + header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) + if header != Nd2Parser.CHUNK_HEADER: + raise ValueError("The ND2 file seems to be corrupted.") + # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the + # start of the actual data field, which is at some arbitrary place after the metadata. + self._file_handle.seek(chunk_location + 16 + relative_offset) + return self._file_handle.read(data_length) + + def _z_level_count(self): + st = self._read_chunk(self._label_map["CustomData|Z!"]) + return len(array.array("d", st)) + + def _parse_unsigned_char(self, data): + return struct.unpack("B", data.read(1))[0] + + def _parse_unsigned_int(self, data): + return struct.unpack("I", data.read(4))[0] + + def _parse_unsigned_long(self, data): + return struct.unpack("Q", data.read(8))[0] + + def _parse_double(self, data): + return struct.unpack("d", data.read(8))[0] + + def _parse_string(self, data): + value = data.read(2) + while not value.endswith("\x00\x00"): + # the string ends at the first instance of \x00\x00 + value += data.read(2) + return value.decode("utf16")[:-1].encode("utf8") + + def _parse_char_array(self, data): + array_length = struct.unpack("Q", data.read(8))[0] + return array.array("B", data.read(array_length)) + + def _parse_metadata_item(self, args): + data, cursor_position = args + new_count, length = struct.unpack(" Date: Mon, 11 May 2015 12:54:43 +0000 Subject: [PATCH 8/8] deleted unused method --- nd2reader/__init__.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/nd2reader/__init__.py b/nd2reader/__init__.py index e5d4839..4f47559 100644 --- a/nd2reader/__init__.py +++ b/nd2reader/__init__.py @@ -5,7 +5,6 @@ from datetime import datetime import logging from nd2reader.model import Image, ImageSet from nd2reader.parser import Nd2Parser -import numpy as np import re import struct @@ -57,7 +56,7 @@ class Nd2(Nd2Parser): validity = self.metadata['ImageMetadata']['SLxExperiment']['ppNextLevelEx'][''][0]['ppNextLevelEx'][''][0]['pItemValid'] except KeyError: # If none of the channels have been deleted, there is no validity list, so we just make one - validity = [True for i in metadata] + validity = [True for _ in metadata] # Channel information is contained in dictionaries with the keys a0, a1...an where the number # indicates the order in which the channel is stored. So by sorting the dicts alphabetically # we get the correct order. @@ -159,10 +158,6 @@ class Nd2(Nd2Parser): else: return count - @staticmethod - def as_numpy_array(arr): - return np.frombuffer(arr) - @property def _channel_offset(self): """