From 1ed1f0bb24c5d15c93cdc582345e178737abc592 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 11:52:03 +0200 Subject: [PATCH 01/16] Removed empty bytes at channel subchunk ends Checks if the chunk data size does not match the expected, based on image axes size. Verifies that all unwanted bytes are zero-bytes and removes them. Two triggered warnings: one for unwanted bytes and one for bytes removed (more of an INFO though). --- nd2reader/parser.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 3c8c8a1..8df38cd 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -273,6 +273,17 @@ class Parser(object): # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design # a data structure that way, please send the author of this library a message. number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) + + # Remove unwanted 0-bytes that can appear in stitched images + unwanted_bytes_len = (len(image_group_data[image_data_start:]))%(height*width) + if unwanted_bytes_len: + warnings.warn('Identified unwanted bytes in the ND2 file, possibly stitched.') + byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-unwanted_bytes_len+1, height*number_of_true_channels) + if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): + warnings.warn('All unwanted bytes are zero-bytes, correctly removed.') + for i in range(len(byte_ids)): + del image_group_data[byte_ids[i]] + try: image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, width)) except ValueError: From 9d40a12f9640ca183cdbd2e9f136491667fc1742 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 11:56:38 +0200 Subject: [PATCH 02/16] Fixed complexity --- nd2reader/reader.py | 378 ++++++++++++++++++++++++++++---------------- 1 file changed, 243 insertions(+), 135 deletions(-) diff --git a/nd2reader/reader.py b/nd2reader/reader.py index 8246125..f99101d 100644 --- a/nd2reader/reader.py +++ b/nd2reader/reader.py @@ -1,207 +1,315 @@ -from pims import Frame -from pims.base_frames import FramesSequenceND +# -*- coding: utf-8 -*- +import struct -from nd2reader.exceptions import EmptyFileError, InvalidFileType -from nd2reader.parser import Parser +import array +import six +import warnings +from pims.base_frames import Frame import numpy as np +from nd2reader.common import get_version, read_chunk +from nd2reader.exceptions import InvalidVersionError +from nd2reader.label_map import LabelMap +from nd2reader.raw_metadata import RawMetadata -class ND2Reader(FramesSequenceND): - """PIMS wrapper for the ND2 parser. - This is the main class: use this to process your .nd2 files. - """ - class_priority = 12 +class Parser(object): + """Parses ND2 files and creates a Metadata and driver object. - def __init__(self, filename): - super(ND2Reader, self).__init__() + """ + CHUNK_HEADER = 0xabeceda + CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!") + CHUNK_MAP_END = six.b("ND2 CHUNK MAP SIGNATURE 0000001!") - if not filename.endswith(".nd2"): - raise InvalidFileType("The file %s you want to read with nd2reader does not have extension .nd2." % filename) + supported_file_versions = {(3, None): True} - self.filename = filename + def __init__(self, fh): + self._fh = fh + self._label_map = None + self._raw_metadata = None + self.metadata = None - # first use the parser to parse the file - self._fh = open(filename, "rb") - self._parser = Parser(self._fh) + # First check the file version + self.supported = self._check_version_supported() - # Setup metadata - self.metadata = self._parser.metadata + # Parse the metadata + self._parse_metadata() - # Set data type - self._dtype = self._parser.get_dtype_from_metadata() + def calculate_image_properties(self, index): + """Calculate FOV, channels and z_levels - # Setup the axes - self._setup_axes() + Args: + index(int): the index (which is simply the order in which the image was acquired) - # Other properties - self._timesteps = None + Returns: + tuple: tuple of the field of view, the channel and the z level - @classmethod - def class_exts(cls): - """Let PIMS open function use this reader for opening .nd2 files + """ + field_of_view = self._calculate_field_of_view(index) + channel = self._calculate_channel(index) + z_level = self._calculate_z_level(index) + return field_of_view, channel, z_level + def get_image(self, index): """ - return {'nd2'} | super(ND2Reader, cls).class_exts() + Creates an Image object and adds its metadata, based on the index (which is simply the order in which the image + was acquired). May return None if the ND2 contains multiple channels and not all were taken in each cycle (for + example, if you take bright field images every minute, and GFP images every five minutes, there will be some + indexes that do not contain an image. The reason for this is complicated, but suffice it to say that we hope to + eliminate this possibility in future releases. For now, you'll need to check if your image is None if you're + doing anything out of the ordinary. - def close(self): - """Correctly close the file handle + Args: + index(int): the index (which is simply the order in which the image was acquired) - """ - if self._fh is not None: - self._fh.close() + Returns: + Frame: the image - def _get_default(self, coord): + """ + field_of_view, channel, z_level = self.calculate_image_properties(index) + channel_offset = index % len(self.metadata["channels"]) + image_group_number = int(index / len(self.metadata["channels"])) + frame_number = self._calculate_frame_number(image_group_number, field_of_view, z_level) try: - return self.default_coords[coord] - except KeyError: - return 0 + timestamp, image = self._get_raw_image_data(image_group_number, channel_offset, self.metadata["height"], + self.metadata["width"]) + except (TypeError): + return Frame([], frame_no=frame_number, metadata=self._get_frame_metadata()) + else: + return Frame(image, frame_no=frame_number, metadata=self._get_frame_metadata()) + + def get_image_by_attributes(self, frame_number, field_of_view, channel, z_level, height, width): + """Gets an image based on its attributes alone - def get_frame_2D(self, c=0, t=0, z=0, x=0, y=0, v=0): - """Gets a given frame using the parser Args: - x: The x-index (pims expects this) - y: The y-index (pims expects this) - c: The color channel number - t: The frame number - z: The z stack number - v: The field of view index + frame_number: the frame number + field_of_view: the field of view + channel_name: the color channel name + z_level: the z level + height: the height of the image + width: the width of the image + Returns: - pims.Frame: The requested frame + Frame: the requested image + """ - # This needs to be set to width/height to return an image - x = self.metadata["width"] - y = self.metadata["height"] + frame_number = 0 if frame_number is None else frame_number + field_of_view = 0 if field_of_view is None else field_of_view + channel = 0 if channel is None else channel + z_level = 0 if z_level is None else z_level - return self._parser.get_image_by_attributes(t, v, c, z, y, x) + image_group_number = self._calculate_image_group_number(frame_number, field_of_view, z_level) + try: + timestamp, raw_image_data = self._get_raw_image_data(image_group_number, channel, + height, width) + except (TypeError): + return Frame([], frame_no=frame_number, metadata=self._get_frame_metadata()) + else: + return Frame(raw_image_data, frame_no=frame_number, metadata=self._get_frame_metadata()) + + @staticmethod + def get_dtype_from_metadata(): + """Determine the data type from the metadata. + + For now, always use float64 to prevent unexpected overflow errors when manipulating the data (calculating sums/ + means/etc.) - @property - def parser(self): - """ - Returns the parser object. - Returns: - Parser: the parser object """ - return self._parser + return np.float64 - @property - def pixel_type(self): - """Return the pixel data type + def _check_version_supported(self): + """Checks if the ND2 file version is supported by this reader. Returns: - dtype: the pixel data type - + bool: True on supported """ - return self._dtype + major_version, minor_version = get_version(self._fh) + supported = self.supported_file_versions.get( + (major_version, minor_version)) or self.supported_file_versions.get((major_version, None)) - @property - def timesteps(self): - """Get the timesteps of the experiment + if not supported: + print("Warning: No parser is available for your current ND2 version (%d.%d). " % ( + major_version, minor_version) + "This might lead to unexpected behaviour.") - Returns: - np.ndarray: an array of times in milliseconds. + return supported + + def _parse_metadata(self): + """Reads all metadata and instantiates the Metadata object. """ - if self._timesteps is None: - return self.get_timesteps() - return self._timesteps + # Retrieve raw metadata from the label mapping + self._label_map = self._build_label_map() + self._raw_metadata = RawMetadata(self._fh, self._label_map) + self.metadata = self._raw_metadata.__dict__ + self.acquisition_times = self._raw_metadata.acquisition_times - @property - def events(self): - """Get the events of the experiment + def _build_label_map(self): + """ + Every label ends with an exclamation point, however, we can't directly search for those to find all the labels + as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label, + grab the subsequent data (always 16 bytes long), advance to the next label and repeat. Returns: - iterator of events as dict + LabelMap: the computed label map + """ + # go 8 bytes back from file end + self._fh.seek(-8, 2) + chunk_map_start_location = struct.unpack("Q", self._fh.read(8))[0] + self._fh.seek(chunk_map_start_location) + raw_text = self._fh.read(-1) + return LabelMap(raw_text) - return self._get_metadata_property("events") + def _calculate_field_of_view(self, index): + """Determines what field of view was being imaged for a given image. + + Args: + index(int): the index (which is simply the order in which the image was acquired) - @property - def frame_rate(self): - """The (average) frame rate - Returns: - float: the (average) frame rate in frames per second + int: the field of view """ - total_duration = 0.0 + images_per_cycle = len(self.metadata["z_levels"]) * len(self.metadata["channels"]) + return int((index - (index % images_per_cycle)) / images_per_cycle) % len(self.metadata["fields_of_view"]) - for loop in self.metadata['experiment']['loops']: - total_duration += loop['duration'] + def _calculate_channel(self, index): + """Determines what channel a particular image is. - if total_duration == 0: - total_duration = self.timesteps[-1] + Args: + index(int): the index (which is simply the order in which the image was acquired) - if total_duration == 0: - raise ValueError('Total measurement duration could not be determined from loops') + Returns: + string: the name of the color channel - return self.metadata['num_frames'] / (total_duration/1000.0) + """ + return self.metadata["channels"][index % len(self.metadata["channels"])] - def _get_metadata_property(self, key, default=None): - if self.metadata is None: - return default + def _calculate_z_level(self, index): + """Determines the plane in the z-axis a given image was taken in. - if key not in self.metadata: - return default + In the future, this will be replaced with the actual offset in micrometers. - if self.metadata[key] is None: - return default + Args: + index(int): the index (which is simply the order in which the image was acquired) - return self.metadata[key] + Returns: + The z level - def _setup_axes(self): - """Setup the xyctz axes, iterate over t axis by default + """ + return self.metadata["z_levels"][int( + ((index - (index % len(self.metadata["channels"]))) / len(self.metadata["channels"])) % len( + self.metadata["z_levels"]))] + def _calculate_image_group_number(self, frame_number, fov, z_level): """ - self._init_axis_if_exists('x', self._get_metadata_property("width", default=0)) - self._init_axis_if_exists('y', self._get_metadata_property("height", default=0)) - self._init_axis_if_exists('c', len(self._get_metadata_property("channels", default=[])), min_size=2) - self._init_axis_if_exists('t', len(self._get_metadata_property("frames", default=[]))) - self._init_axis_if_exists('z', len(self._get_metadata_property("z_levels", default=[])), min_size=2) - self._init_axis_if_exists('v', len(self._get_metadata_property("fields_of_view", default=[])), min_size=2) + Images are grouped together if they share the same time index, field of view, and z-level. - if len(self.sizes) == 0: - raise EmptyFileError("No axes were found for this .nd2 file.") + Args: + frame_number: the time index + fov: the field of view number + z_level: the z level number - # provide the default - self.iter_axes = self._guess_default_iter_axis() + Returns: + int: the image group number - self._register_get_frame(self.get_frame_2D, 'yx') + """ + z_length = len(self.metadata['z_levels']) + z_length = z_length if z_length > 0 else 1 + fields_of_view = len(self.metadata["fields_of_view"]) + fields_of_view = fields_of_view if fields_of_view > 0 else 1 - def _init_axis_if_exists(self, axis, size, min_size=1): - if size >= min_size: - self._init_axis(axis, size) + return frame_number * fields_of_view * z_length + (fov * z_length + z_level) - def _guess_default_iter_axis(self): + def _calculate_frame_number(self, image_group_number, field_of_view, z_level): """ - Guesses the default axis to iterate over based on axis sizes. + Images are in the same frame if they share the same group number and field of view and are taken sequentially. + + Args: + image_group_number: the image group number (see _calculate_image_group_number) + field_of_view: the field of view number + z_level: the z level number + Returns: - the axis to iterate over + """ - priority = ['t', 'z', 'c', 'v'] - found_axes = [] - for axis in priority: - try: - current_size = self.sizes[axis] - except KeyError: - continue + return (image_group_number - (field_of_view * len(self.metadata["z_levels"]) + z_level)) / ( + len(self.metadata["fields_of_view"]) * len(self.metadata["z_levels"])) - if current_size > 1: - return axis + @property + def _channel_offset(self): + """ + Image data is interleaved for each image set. That is, if there are four images in a set, the first image + will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. - found_axes.append(axis) + Returns: + dict: the channel offset for each channel - return found_axes[0] + """ + return {channel: n for n, channel in enumerate(self.metadata["channels"])} + + def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): + # Remove unwanted 0-bytes that can appear in stitched images + number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) + unwanted_bytes_len = (len(image_group_data[image_data_start:]))%(height*width) + if unwanted_bytes_len: + warnings.warn('Identified unwanted bytes in the ND2 file, possibly stitched.') + byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-unwanted_bytes_len+1, height*number_of_true_channels) + if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): + warnings.warn('All unwanted bytes are zero-bytes, correctly removed.') + for i in range(len(byte_ids)): + del image_group_data[byte_ids[i]] + + def _get_raw_image_data(self, image_group_number, channel_offset, height, width): + """Reads the raw bytes and the timestamp of an image. - def get_timesteps(self): - """Get the timesteps of the experiment + Args: + image_group_number: the image group number (see _calculate_image_group_number) + channel_offset: the number of the color channel + height: the height of the image + width: the width of the image Returns: - np.ndarray: an array of times in milliseconds. """ - if self._timesteps is not None and len(self._timesteps) > 0: - return self._timesteps + chunk = self._label_map.get_image_data_location(image_group_number) + data = read_chunk(self._fh, chunk) + + # All images in the same image group share the same timestamp! So if you have complicated image data, + # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few + # seconds unless you're doing something super weird. + timestamp = struct.unpack("d", data[:8])[0] + image_group_data = array.array("H", data) + image_data_start = 4 + channel_offset + + # The images for the various channels are interleaved within the same array. For example, the second image + # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design + # a data structure that way, please send the author of this library a message. + number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) + self._remove_unwanted_bytes(image_group_data, image_data_start, height, width) + try: + image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, width)) + except ValueError: + image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, int(round(len(image_group_data[image_data_start::number_of_true_channels])/height)))) + + # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you + # don't have the same number of images each cycle. We discovered this because we only took GFP images every + # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take + # them every cycle. + if np.any(image_data): + return timestamp, image_data + + # If a blank "gap" image is encountered, generate an array of corresponding height and width to avoid + # errors with ND2-files with missing frames. Array is filled with nan to reflect that data is missing. + else: + empty_frame = np.full((height, width), np.nan) + warnings.warn('ND2 file contains gap frames which are represented by np.nan-filled arrays; to convert to zeros use e.g. np.nan_to_num(array)') + return timestamp, image_data + + def _get_frame_metadata(self): + """Get the metadata for one frame - self._timesteps = np.array(list(self._parser._raw_metadata.acquisition_times), dtype=np.float) * 1000.0 + Returns: + dict: a dictionary containing the parsed metadata - return self._timesteps + """ + return self.metadata From 20d7faa6dc7b70f2a8516a56da58dc0d8c4c8aa4 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 11:58:10 +0200 Subject: [PATCH 03/16] Regenerated Reader --- nd2reader/reader.py | 378 ++++++++++++++++---------------------------- 1 file changed, 135 insertions(+), 243 deletions(-) diff --git a/nd2reader/reader.py b/nd2reader/reader.py index f99101d..8246125 100644 --- a/nd2reader/reader.py +++ b/nd2reader/reader.py @@ -1,315 +1,207 @@ -# -*- coding: utf-8 -*- -import struct +from pims import Frame +from pims.base_frames import FramesSequenceND -import array -import six -import warnings -from pims.base_frames import Frame +from nd2reader.exceptions import EmptyFileError, InvalidFileType +from nd2reader.parser import Parser import numpy as np -from nd2reader.common import get_version, read_chunk -from nd2reader.exceptions import InvalidVersionError -from nd2reader.label_map import LabelMap -from nd2reader.raw_metadata import RawMetadata +class ND2Reader(FramesSequenceND): + """PIMS wrapper for the ND2 parser. + This is the main class: use this to process your .nd2 files. + """ -class Parser(object): - """Parses ND2 files and creates a Metadata and driver object. + class_priority = 12 - """ - CHUNK_HEADER = 0xabeceda - CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!") - CHUNK_MAP_END = six.b("ND2 CHUNK MAP SIGNATURE 0000001!") + def __init__(self, filename): + super(ND2Reader, self).__init__() - supported_file_versions = {(3, None): True} + if not filename.endswith(".nd2"): + raise InvalidFileType("The file %s you want to read with nd2reader does not have extension .nd2." % filename) - def __init__(self, fh): - self._fh = fh - self._label_map = None - self._raw_metadata = None - self.metadata = None + self.filename = filename - # First check the file version - self.supported = self._check_version_supported() + # first use the parser to parse the file + self._fh = open(filename, "rb") + self._parser = Parser(self._fh) - # Parse the metadata - self._parse_metadata() + # Setup metadata + self.metadata = self._parser.metadata - def calculate_image_properties(self, index): - """Calculate FOV, channels and z_levels + # Set data type + self._dtype = self._parser.get_dtype_from_metadata() - Args: - index(int): the index (which is simply the order in which the image was acquired) + # Setup the axes + self._setup_axes() - Returns: - tuple: tuple of the field of view, the channel and the z level + # Other properties + self._timesteps = None - """ - field_of_view = self._calculate_field_of_view(index) - channel = self._calculate_channel(index) - z_level = self._calculate_z_level(index) - return field_of_view, channel, z_level + @classmethod + def class_exts(cls): + """Let PIMS open function use this reader for opening .nd2 files - def get_image(self, index): """ - Creates an Image object and adds its metadata, based on the index (which is simply the order in which the image - was acquired). May return None if the ND2 contains multiple channels and not all were taken in each cycle (for - example, if you take bright field images every minute, and GFP images every five minutes, there will be some - indexes that do not contain an image. The reason for this is complicated, but suffice it to say that we hope to - eliminate this possibility in future releases. For now, you'll need to check if your image is None if you're - doing anything out of the ordinary. + return {'nd2'} | super(ND2Reader, cls).class_exts() - Args: - index(int): the index (which is simply the order in which the image was acquired) - - Returns: - Frame: the image + def close(self): + """Correctly close the file handle """ - field_of_view, channel, z_level = self.calculate_image_properties(index) - channel_offset = index % len(self.metadata["channels"]) - image_group_number = int(index / len(self.metadata["channels"])) - frame_number = self._calculate_frame_number(image_group_number, field_of_view, z_level) - try: - timestamp, image = self._get_raw_image_data(image_group_number, channel_offset, self.metadata["height"], - self.metadata["width"]) - except (TypeError): - return Frame([], frame_no=frame_number, metadata=self._get_frame_metadata()) - else: - return Frame(image, frame_no=frame_number, metadata=self._get_frame_metadata()) + if self._fh is not None: + self._fh.close() - def get_image_by_attributes(self, frame_number, field_of_view, channel, z_level, height, width): - """Gets an image based on its attributes alone + def _get_default(self, coord): + try: + return self.default_coords[coord] + except KeyError: + return 0 + def get_frame_2D(self, c=0, t=0, z=0, x=0, y=0, v=0): + """Gets a given frame using the parser Args: - frame_number: the frame number - field_of_view: the field of view - channel_name: the color channel name - z_level: the z level - height: the height of the image - width: the width of the image - + x: The x-index (pims expects this) + y: The y-index (pims expects this) + c: The color channel number + t: The frame number + z: The z stack number + v: The field of view index Returns: - Frame: the requested image - + pims.Frame: The requested frame """ - frame_number = 0 if frame_number is None else frame_number - field_of_view = 0 if field_of_view is None else field_of_view - channel = 0 if channel is None else channel - z_level = 0 if z_level is None else z_level + # This needs to be set to width/height to return an image + x = self.metadata["width"] + y = self.metadata["height"] - image_group_number = self._calculate_image_group_number(frame_number, field_of_view, z_level) - try: - timestamp, raw_image_data = self._get_raw_image_data(image_group_number, channel, - height, width) - except (TypeError): - return Frame([], frame_no=frame_number, metadata=self._get_frame_metadata()) - else: - return Frame(raw_image_data, frame_no=frame_number, metadata=self._get_frame_metadata()) - - @staticmethod - def get_dtype_from_metadata(): - """Determine the data type from the metadata. - - For now, always use float64 to prevent unexpected overflow errors when manipulating the data (calculating sums/ - means/etc.) + return self._parser.get_image_by_attributes(t, v, c, z, y, x) + @property + def parser(self): """ - return np.float64 - - def _check_version_supported(self): - """Checks if the ND2 file version is supported by this reader. - + Returns the parser object. Returns: - bool: True on supported + Parser: the parser object """ - major_version, minor_version = get_version(self._fh) - supported = self.supported_file_versions.get( - (major_version, minor_version)) or self.supported_file_versions.get((major_version, None)) - - if not supported: - print("Warning: No parser is available for your current ND2 version (%d.%d). " % ( - major_version, minor_version) + "This might lead to unexpected behaviour.") + return self._parser - return supported + @property + def pixel_type(self): + """Return the pixel data type - def _parse_metadata(self): - """Reads all metadata and instantiates the Metadata object. + Returns: + dtype: the pixel data type """ - # Retrieve raw metadata from the label mapping - self._label_map = self._build_label_map() - self._raw_metadata = RawMetadata(self._fh, self._label_map) - self.metadata = self._raw_metadata.__dict__ - self.acquisition_times = self._raw_metadata.acquisition_times + return self._dtype - def _build_label_map(self): - """ - Every label ends with an exclamation point, however, we can't directly search for those to find all the labels - as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label, - grab the subsequent data (always 16 bytes long), advance to the next label and repeat. + @property + def timesteps(self): + """Get the timesteps of the experiment Returns: - LabelMap: the computed label map + np.ndarray: an array of times in milliseconds. """ - # go 8 bytes back from file end - self._fh.seek(-8, 2) - chunk_map_start_location = struct.unpack("Q", self._fh.read(8))[0] - self._fh.seek(chunk_map_start_location) - raw_text = self._fh.read(-1) - return LabelMap(raw_text) - - def _calculate_field_of_view(self, index): - """Determines what field of view was being imaged for a given image. + if self._timesteps is None: + return self.get_timesteps() + return self._timesteps - Args: - index(int): the index (which is simply the order in which the image was acquired) + @property + def events(self): + """Get the events of the experiment Returns: - int: the field of view + iterator of events as dict """ - images_per_cycle = len(self.metadata["z_levels"]) * len(self.metadata["channels"]) - return int((index - (index % images_per_cycle)) / images_per_cycle) % len(self.metadata["fields_of_view"]) - - def _calculate_channel(self, index): - """Determines what channel a particular image is. - Args: - index(int): the index (which is simply the order in which the image was acquired) + return self._get_metadata_property("events") + @property + def frame_rate(self): + """The (average) frame rate + Returns: - string: the name of the color channel - + float: the (average) frame rate in frames per second """ - return self.metadata["channels"][index % len(self.metadata["channels"])] + total_duration = 0.0 - def _calculate_z_level(self, index): - """Determines the plane in the z-axis a given image was taken in. + for loop in self.metadata['experiment']['loops']: + total_duration += loop['duration'] - In the future, this will be replaced with the actual offset in micrometers. + if total_duration == 0: + total_duration = self.timesteps[-1] - Args: - index(int): the index (which is simply the order in which the image was acquired) + if total_duration == 0: + raise ValueError('Total measurement duration could not be determined from loops') - Returns: - The z level + return self.metadata['num_frames'] / (total_duration/1000.0) - """ - return self.metadata["z_levels"][int( - ((index - (index % len(self.metadata["channels"]))) / len(self.metadata["channels"])) % len( - self.metadata["z_levels"]))] + def _get_metadata_property(self, key, default=None): + if self.metadata is None: + return default - def _calculate_image_group_number(self, frame_number, fov, z_level): - """ - Images are grouped together if they share the same time index, field of view, and z-level. + if key not in self.metadata: + return default - Args: - frame_number: the time index - fov: the field of view number - z_level: the z level number + if self.metadata[key] is None: + return default - Returns: - int: the image group number + return self.metadata[key] + + def _setup_axes(self): + """Setup the xyctz axes, iterate over t axis by default """ - z_length = len(self.metadata['z_levels']) - z_length = z_length if z_length > 0 else 1 - fields_of_view = len(self.metadata["fields_of_view"]) - fields_of_view = fields_of_view if fields_of_view > 0 else 1 + self._init_axis_if_exists('x', self._get_metadata_property("width", default=0)) + self._init_axis_if_exists('y', self._get_metadata_property("height", default=0)) + self._init_axis_if_exists('c', len(self._get_metadata_property("channels", default=[])), min_size=2) + self._init_axis_if_exists('t', len(self._get_metadata_property("frames", default=[]))) + self._init_axis_if_exists('z', len(self._get_metadata_property("z_levels", default=[])), min_size=2) + self._init_axis_if_exists('v', len(self._get_metadata_property("fields_of_view", default=[])), min_size=2) - return frame_number * fields_of_view * z_length + (fov * z_length + z_level) + if len(self.sizes) == 0: + raise EmptyFileError("No axes were found for this .nd2 file.") - def _calculate_frame_number(self, image_group_number, field_of_view, z_level): - """ - Images are in the same frame if they share the same group number and field of view and are taken sequentially. + # provide the default + self.iter_axes = self._guess_default_iter_axis() - Args: - image_group_number: the image group number (see _calculate_image_group_number) - field_of_view: the field of view number - z_level: the z level number + self._register_get_frame(self.get_frame_2D, 'yx') - Returns: + def _init_axis_if_exists(self, axis, size, min_size=1): + if size >= min_size: + self._init_axis(axis, size) + def _guess_default_iter_axis(self): """ - return (image_group_number - (field_of_view * len(self.metadata["z_levels"]) + z_level)) / ( - len(self.metadata["fields_of_view"]) * len(self.metadata["z_levels"])) - - @property - def _channel_offset(self): + Guesses the default axis to iterate over based on axis sizes. + Returns: + the axis to iterate over """ - Image data is interleaved for each image set. That is, if there are four images in a set, the first image - will consist of pixels 1, 5, 9, etc, the second will be pixels 2, 6, 10, and so forth. + priority = ['t', 'z', 'c', 'v'] + found_axes = [] + for axis in priority: + try: + current_size = self.sizes[axis] + except KeyError: + continue - Returns: - dict: the channel offset for each channel + if current_size > 1: + return axis - """ - return {channel: n for n, channel in enumerate(self.metadata["channels"])} - - def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): - # Remove unwanted 0-bytes that can appear in stitched images - number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - unwanted_bytes_len = (len(image_group_data[image_data_start:]))%(height*width) - if unwanted_bytes_len: - warnings.warn('Identified unwanted bytes in the ND2 file, possibly stitched.') - byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-unwanted_bytes_len+1, height*number_of_true_channels) - if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): - warnings.warn('All unwanted bytes are zero-bytes, correctly removed.') - for i in range(len(byte_ids)): - del image_group_data[byte_ids[i]] - - def _get_raw_image_data(self, image_group_number, channel_offset, height, width): - """Reads the raw bytes and the timestamp of an image. + found_axes.append(axis) - Args: - image_group_number: the image group number (see _calculate_image_group_number) - channel_offset: the number of the color channel - height: the height of the image - width: the width of the image + return found_axes[0] + + def get_timesteps(self): + """Get the timesteps of the experiment Returns: + np.ndarray: an array of times in milliseconds. """ - chunk = self._label_map.get_image_data_location(image_group_number) - data = read_chunk(self._fh, chunk) - - # All images in the same image group share the same timestamp! So if you have complicated image data, - # your timestamps may not be entirely accurate. Practically speaking though, they'll only be off by a few - # seconds unless you're doing something super weird. - timestamp = struct.unpack("d", data[:8])[0] - image_group_data = array.array("H", data) - image_data_start = 4 + channel_offset - - # The images for the various channels are interleaved within the same array. For example, the second image - # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design - # a data structure that way, please send the author of this library a message. - number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - self._remove_unwanted_bytes(image_group_data, image_data_start, height, width) - try: - image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, width)) - except ValueError: - image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, int(round(len(image_group_data[image_data_start::number_of_true_channels])/height)))) - - # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you - # don't have the same number of images each cycle. We discovered this because we only took GFP images every - # other cycle to reduce phototoxicity, but NIS Elements still allocated memory as if we were going to take - # them every cycle. - if np.any(image_data): - return timestamp, image_data - - # If a blank "gap" image is encountered, generate an array of corresponding height and width to avoid - # errors with ND2-files with missing frames. Array is filled with nan to reflect that data is missing. - else: - empty_frame = np.full((height, width), np.nan) - warnings.warn('ND2 file contains gap frames which are represented by np.nan-filled arrays; to convert to zeros use e.g. np.nan_to_num(array)') - return timestamp, image_data - - def _get_frame_metadata(self): - """Get the metadata for one frame + if self._timesteps is not None and len(self._timesteps) > 0: + return self._timesteps - Returns: - dict: a dictionary containing the parsed metadata + self._timesteps = np.array(list(self._parser._raw_metadata.acquisition_times), dtype=np.float) * 1000.0 - """ - return self.metadata + return self._timesteps From 031856ac8f2827887abfe2428dc44cf46e0ae509 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 11:58:57 +0200 Subject: [PATCH 04/16] Fixed complexity --- nd2reader/parser.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 8df38cd..010de1e 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -247,6 +247,18 @@ class Parser(object): """ return {channel: n for n, channel in enumerate(self.metadata["channels"])} + def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): + # Remove unwanted 0-bytes that can appear in stitched images + number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) + unwanted_bytes_len = (len(image_group_data[image_data_start:]))%(height*width) + if unwanted_bytes_len: + warnings.warn('Identified unwanted bytes in the ND2 file, possibly stitched.') + byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-unwanted_bytes_len+1, height*number_of_true_channels) + if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): + warnings.warn('All unwanted bytes are zero-bytes, correctly removed.') + for i in range(len(byte_ids)): + del image_group_data[byte_ids[i]] + def _get_raw_image_data(self, image_group_number, channel_offset, height, width): """Reads the raw bytes and the timestamp of an image. @@ -273,17 +285,7 @@ class Parser(object): # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design # a data structure that way, please send the author of this library a message. number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - - # Remove unwanted 0-bytes that can appear in stitched images - unwanted_bytes_len = (len(image_group_data[image_data_start:]))%(height*width) - if unwanted_bytes_len: - warnings.warn('Identified unwanted bytes in the ND2 file, possibly stitched.') - byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-unwanted_bytes_len+1, height*number_of_true_channels) - if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): - warnings.warn('All unwanted bytes are zero-bytes, correctly removed.') - for i in range(len(byte_ids)): - del image_group_data[byte_ids[i]] - + self._remove_unwanted_bytes(image_group_data, image_data_start, height, width) try: image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, width)) except ValueError: From 42a6bd45e49316df6afe91093534b9059a281de1 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 12:01:48 +0200 Subject: [PATCH 05/16] Complexity --- nd2reader/parser.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 010de1e..3614628 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -251,13 +251,15 @@ class Parser(object): # Remove unwanted 0-bytes that can appear in stitched images number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) unwanted_bytes_len = (len(image_group_data[image_data_start:]))%(height*width) - if unwanted_bytes_len: - warnings.warn('Identified unwanted bytes in the ND2 file, possibly stitched.') - byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-unwanted_bytes_len+1, height*number_of_true_channels) - if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): - warnings.warn('All unwanted bytes are zero-bytes, correctly removed.') - for i in range(len(byte_ids)): - del image_group_data[byte_ids[i]] + if not unwanted_bytes_len: + return + + warnings.warn('Identified unwanted bytes in the ND2 file, possibly stitched.') + byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-unwanted_bytes_len+1, height*number_of_true_channels) + if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): + warnings.warn('All unwanted bytes are zero-bytes, correctly removed.') + for i in range(len(byte_ids)): + del image_group_data[byte_ids[i]] def _get_raw_image_data(self, image_group_number, channel_offset, height, width): """Reads the raw bytes and the timestamp of an image. From 16b23c24587acb208803560698b0d4700abf61d4 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 12:07:27 +0200 Subject: [PATCH 06/16] Update parser.py --- nd2reader/parser.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 3614628..5633bb1 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -246,20 +246,22 @@ class Parser(object): """ return {channel: n for n, channel in enumerate(self.metadata["channels"])} - def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): # Remove unwanted 0-bytes that can appear in stitched images number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - unwanted_bytes_len = (len(image_group_data[image_data_start:]))%(height*width) - if not unwanted_bytes_len: + n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width) + if not n_unwanted_bytes: return - warnings.warn('Identified unwanted bytes in the ND2 file, possibly stitched.') - byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-unwanted_bytes_len+1, height*number_of_true_channels) + assert 0 == n_unwanted_bytes % height, "Unexpected unwanted bytes distribution." + unwanted_byte_per_step = n_unwanted_bytes // height + byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): - warnings.warn('All unwanted bytes are zero-bytes, correctly removed.') + warnings.warn(f'Identified {n_unwanted_bytes} ({unwanted_byte_per_step}*{height}) unwanted zero-bytes in the ND2 file, removed.') for i in range(len(byte_ids)): del image_group_data[byte_ids[i]] + else: + warnings.warn(f'Identified {n_unwanted_bytes} unwanted non-zero bytes in the ND2 file.') def _get_raw_image_data(self, image_group_number, channel_offset, height, width): """Reads the raw bytes and the timestamp of an image. From 5bf56afaeccd58c23161818866b289f6d5322e1d Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 12:12:47 +0200 Subject: [PATCH 07/16] Complexity --- nd2reader/parser.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 5633bb1..a5239e2 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -246,14 +246,19 @@ class Parser(object): """ return {channel: n for n, channel in enumerate(self.metadata["channels"])} - def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): - # Remove unwanted 0-bytes that can appear in stitched images + + def _find_unwanted_bytes(self, image_group_data, image_data_start, height, width): number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width) if not n_unwanted_bytes: - return - + return n_unwanted_bytes assert 0 == n_unwanted_bytes % height, "Unexpected unwanted bytes distribution." + return n_unwanted_bytes + + def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): + # Remove unwanted 0-bytes that can appear in stitched images + n_unwanted_bytes = self._find_unwanted_bytes(image_group_data, image_data_start, height, width) + number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) unwanted_byte_per_step = n_unwanted_bytes // height byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): From 237bea49eb5b3807f611ab865db4ba38ff6f71a5 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 12:14:42 +0200 Subject: [PATCH 08/16] Allowing case of multiple unwanted bytes per step --- nd2reader/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index a5239e2..d9bf593 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -264,7 +264,7 @@ class Parser(object): if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): warnings.warn(f'Identified {n_unwanted_bytes} ({unwanted_byte_per_step}*{height}) unwanted zero-bytes in the ND2 file, removed.') for i in range(len(byte_ids)): - del image_group_data[byte_ids[i]] + del image_group_data[byte_ids[i]:(byte_ids[i]+unwanted_byte_per_step)] else: warnings.warn(f'Identified {n_unwanted_bytes} unwanted non-zero bytes in the ND2 file.') From 2e83d26544de57d4eb6ac587cbf3319a8ebb3825 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Sat, 15 Aug 2020 12:19:07 +0200 Subject: [PATCH 09/16] Changed structure to check/remove method pair --- nd2reader/parser.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index d9bf593..929774c 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -247,27 +247,28 @@ class Parser(object): """ return {channel: n for n, channel in enumerate(self.metadata["channels"])} - def _find_unwanted_bytes(self, image_group_data, image_data_start, height, width): + def _check_unwanted_bytes(self, image_group_data, image_data_start, height, width): number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width) if not n_unwanted_bytes: - return n_unwanted_bytes + return False assert 0 == n_unwanted_bytes % height, "Unexpected unwanted bytes distribution." - return n_unwanted_bytes + byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) + all_zero_bytes = all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]) + if not all_zero_bytes: + warnings.warn(f'Identified {n_unwanted_bytes} unwanted non-zero bytes in the ND2 file.') + return all_zero_bytes def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): # Remove unwanted 0-bytes that can appear in stitched images - n_unwanted_bytes = self._find_unwanted_bytes(image_group_data, image_data_start, height, width) number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) + n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width) unwanted_byte_per_step = n_unwanted_bytes // height byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) - if all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]): - warnings.warn(f'Identified {n_unwanted_bytes} ({unwanted_byte_per_step}*{height}) unwanted zero-bytes in the ND2 file, removed.') - for i in range(len(byte_ids)): - del image_group_data[byte_ids[i]:(byte_ids[i]+unwanted_byte_per_step)] - else: - warnings.warn(f'Identified {n_unwanted_bytes} unwanted non-zero bytes in the ND2 file.') - + warnings.warn(f'Identified {n_unwanted_bytes} ({unwanted_byte_per_step}*{height}) unwanted zero-bytes in the ND2 file, removed.') + for i in range(len(byte_ids)): + del image_group_data[byte_ids[i]:(byte_ids[i]+unwanted_byte_per_step)] + def _get_raw_image_data(self, image_group_number, channel_offset, height, width): """Reads the raw bytes and the timestamp of an image. @@ -294,7 +295,8 @@ class Parser(object): # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design # a data structure that way, please send the author of this library a message. number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - self._remove_unwanted_bytes(image_group_data, image_data_start, height, width) + if self._check_unwanted_bytes(image_group_data, image_data_start, height, width): + self._remove_unwanted_bytes(image_group_data, image_data_start, height, width) try: image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, width)) except ValueError: From 1e0c029283247b682c455b0fdd7b4f355cb301c0 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Mon, 31 Aug 2020 10:45:55 +0200 Subject: [PATCH 10/16] Improved assert and warning messages --- nd2reader/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 929774c..0df1729 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -252,7 +252,7 @@ class Parser(object): n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width) if not n_unwanted_bytes: return False - assert 0 == n_unwanted_bytes % height, "Unexpected unwanted bytes distribution." + assert 0 == n_unwanted_bytes % height, "An unexpected number of extra bytes was encountered based on the expected frame size, therefore the file could not be parsed." byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) all_zero_bytes = all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]) if not all_zero_bytes: @@ -265,7 +265,7 @@ class Parser(object): n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width) unwanted_byte_per_step = n_unwanted_bytes // height byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) - warnings.warn(f'Identified {n_unwanted_bytes} ({unwanted_byte_per_step}*{height}) unwanted zero-bytes in the ND2 file, removed.') + warnings.warn(f"{n_unwanted_bytes} ({unwanted_byte_per_step}*{height}) unexpected zero bytes were found in the ND2 file and removed to allow further parsing.") for i in range(len(byte_ids)): del image_group_data[byte_ids[i]:(byte_ids[i]+unwanted_byte_per_step)] From fbf2fc72712f482fa0c77d82e33c02c69d99b8c1 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Mon, 31 Aug 2020 10:55:51 +0200 Subject: [PATCH 11/16] Switched from warning to raising exception and improved message --- nd2reader/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 0df1729..62243a3 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -256,7 +256,7 @@ class Parser(object): byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) all_zero_bytes = all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]) if not all_zero_bytes: - warnings.warn(f'Identified {n_unwanted_bytes} unwanted non-zero bytes in the ND2 file.') + raise Exception(f"{n_unwanted_bytes} unexpected non-zero bytes were found in the ND2 file, the file could not be parsed.") return all_zero_bytes def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): From 99e5d507fbf9a85284cc866beead5a4a85f86999 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Mon, 31 Aug 2020 11:41:29 +0200 Subject: [PATCH 12/16] Using numpy assets when possible, and formatted with black --- nd2reader/parser.py | 88 ++++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 62243a3..3fceb2e 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -247,27 +247,37 @@ class Parser(object): """ return {channel: n for n, channel in enumerate(self.metadata["channels"])} - def _check_unwanted_bytes(self, image_group_data, image_data_start, height, width): + def _get_unwanted_bytes_ids( + self, image_group_data, image_data_start, height, width + ): + # Check if the byte array size conforms to the image axes size. If not, check + # that the number of unexpected (unwanted) bytes is a multiple of the number of + # rows (height), as the same unmber of unwanted bytes is expected to be + # appended at the end of each row. Then, returns the indexes of the unwanted + # bytes. number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width) + n_unwanted_bytes = (len(image_group_data[image_data_start:])) % (height * width) if not n_unwanted_bytes: - return False - assert 0 == n_unwanted_bytes % height, "An unexpected number of extra bytes was encountered based on the expected frame size, therefore the file could not be parsed." - byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) - all_zero_bytes = all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))]) - if not all_zero_bytes: - raise Exception(f"{n_unwanted_bytes} unexpected non-zero bytes were found in the ND2 file, the file could not be parsed.") - return all_zero_bytes - - def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width): - # Remove unwanted 0-bytes that can appear in stitched images - number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width) - unwanted_byte_per_step = n_unwanted_bytes // height - byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels) - warnings.warn(f"{n_unwanted_bytes} ({unwanted_byte_per_step}*{height}) unexpected zero bytes were found in the ND2 file and removed to allow further parsing.") + return np.arange(0) + assert 0 == n_unwanted_bytes % height, ( + "An unexpected number of extra bytes was encountered based on the expected" + + " frame size, therefore the file could not be parsed." + ) + return np.arange( + image_data_start + height * number_of_true_channels, + len(image_group_data) - n_unwanted_bytes + 1, + height * number_of_true_channels, + ) + + def _remove_bytes_by_id(self, byte_ids, image_group_data, height): + # Remove bytes by ID. + bytes_per_row = len(byte_ids) // height + warnings.warn( + f"{len(byte_ids)} ({bytes_per_row}*{height}) unexpected zero " + + "bytes were found in the ND2 file and removed to allow further parsing." + ) for i in range(len(byte_ids)): - del image_group_data[byte_ids[i]:(byte_ids[i]+unwanted_byte_per_step)] + del image_group_data[byte_ids[i] : (byte_ids[i] + bytes_per_row)] def _get_raw_image_data(self, image_group_number, channel_offset, height, width): """Reads the raw bytes and the timestamp of an image. @@ -291,16 +301,41 @@ class Parser(object): image_group_data = array.array("H", data) image_data_start = 4 + channel_offset + # Stitched ND2 files have been reported to contain unexpected (according to + # image shape) zero bytes at the end of each image data row. This hinders + # proper reshaping of the data. Hence, here the unwanted zero bytes are + # identified and removed. + unwanted_byte_ids = self._get_unwanted_bytes_ids( + image_group_data, image_data_start, height, width + ) + if 0 != len(unwanted_byte_ids): + assert np.all( + image_group_data[unwanted_byte_ids + np.arange(len(unwanted_byte_ids))] + == 0 + ), ( + f"{len(unwanted_byte_ids)} unexpected non-zero bytes were found" + + " in the ND2 file, the file could not be parsed." + ) + self._remove_bytes_by_id(unwanted_byte_ids, image_group_data, height) + # The images for the various channels are interleaved within the same array. For example, the second image # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design # a data structure that way, please send the author of this library a message. number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - if self._check_unwanted_bytes(image_group_data, image_data_start, height, width): - self._remove_unwanted_bytes(image_group_data, image_data_start, height, width) try: - image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, width)) + image_data = np.reshape( + image_group_data[image_data_start::number_of_true_channels], + (height, width), + ) except ValueError: - image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, int(round(len(image_group_data[image_data_start::number_of_true_channels])/height)))) + image_data = np.reshape( + image_group_data[image_data_start::number_of_true_channels], + ( + height, + len(image_group_data[image_data_start::number_of_true_channels]) + // height, + ), + ) # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you # don't have the same number of images each cycle. We discovered this because we only took GFP images every @@ -309,11 +344,14 @@ class Parser(object): if np.any(image_data): return timestamp, image_data - # If a blank "gap" image is encountered, generate an array of corresponding height and width to avoid - # errors with ND2-files with missing frames. Array is filled with nan to reflect that data is missing. + # If a blank "gap" image is encountered, generate an array of corresponding height and width to avoid + # errors with ND2-files with missing frames. Array is filled with nan to reflect that data is missing. else: empty_frame = np.full((height, width), np.nan) - warnings.warn('ND2 file contains gap frames which are represented by np.nan-filled arrays; to convert to zeros use e.g. np.nan_to_num(array)') + warnings.warn( + "ND2 file contains gap frames which are represented by np.nan-filled" + + " arrays; to convert to zeros use e.g. np.nan_to_num(array)" + ) return timestamp, image_data def _get_frame_metadata(self): From 7fd91d6a321620041f4ba675eb05e3ff2db9acc3 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Mon, 31 Aug 2020 11:41:41 +0200 Subject: [PATCH 13/16] Added get_image test of stitched sample --- tests/test_parser.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 11dba38..1fd8e37 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -4,6 +4,7 @@ from nd2reader.artificial import ArtificialND2 from nd2reader.common import check_or_make_dir from nd2reader.exceptions import InvalidVersionError from nd2reader.parser import Parser +import urllib.request class TestParser(unittest.TestCase): @@ -13,15 +14,24 @@ class TestParser(unittest.TestCase): def setUp(self): dir_path = path.dirname(path.realpath(__file__)) - check_or_make_dir(path.join(dir_path, 'test_data/')) - self.test_file = path.join(dir_path, 'test_data/test.nd2') + check_or_make_dir(path.join(dir_path, "test_data/")) + self.test_file = path.join(dir_path, "test_data/test.nd2") self.create_test_nd2() def test_can_open_test_file(self): self.create_test_nd2() - with open(self.test_file, 'rb') as fh: + with open(self.test_file, "rb") as fh: parser = Parser(fh) self.assertTrue(parser.supported) - + def test_get_image(self): + stitched_path = "test_data/test_stitched.nd2" + if not path.isfile(stitched_path): + file_name, header = urllib.request.urlretrieve( + "https://downloads.openmicroscopy.org/images/ND2/karl/sample_image.nd2", + stitched_path, + ) + with open(stitched_path, "rb") as fh: + parser = Parser(fh) + parser.get_image(0) From a27ce7745c8035504168b2bb4fa86f3ec968b70a Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Mon, 31 Aug 2020 11:55:15 +0200 Subject: [PATCH 14/16] Refactored. --- nd2reader/parser.py | 74 ++++--------------------------------------- nd2reader/stitched.py | 54 +++++++++++++++++++++++++++++++ tests/test_parser.py | 1 - 3 files changed, 61 insertions(+), 68 deletions(-) create mode 100644 nd2reader/stitched.py diff --git a/nd2reader/parser.py b/nd2reader/parser.py index 3fceb2e..8534e79 100644 --- a/nd2reader/parser.py +++ b/nd2reader/parser.py @@ -8,9 +8,9 @@ from pims.base_frames import Frame import numpy as np from nd2reader.common import get_version, read_chunk -from nd2reader.exceptions import InvalidVersionError from nd2reader.label_map import LabelMap from nd2reader.raw_metadata import RawMetadata +from nd2reader import stitched class Parser(object): @@ -232,8 +232,7 @@ class Parser(object): Returns: """ - return (image_group_number - (field_of_view * len(self.metadata["z_levels"]) + z_level)) / ( - len(self.metadata["fields_of_view"]) * len(self.metadata["z_levels"])) + return (image_group_number - (field_of_view * len(self.metadata["z_levels"]) + z_level)) / (len(self.metadata["fields_of_view"]) * len(self.metadata["z_levels"])) @property def _channel_offset(self): @@ -247,38 +246,6 @@ class Parser(object): """ return {channel: n for n, channel in enumerate(self.metadata["channels"])} - def _get_unwanted_bytes_ids( - self, image_group_data, image_data_start, height, width - ): - # Check if the byte array size conforms to the image axes size. If not, check - # that the number of unexpected (unwanted) bytes is a multiple of the number of - # rows (height), as the same unmber of unwanted bytes is expected to be - # appended at the end of each row. Then, returns the indexes of the unwanted - # bytes. - number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) - n_unwanted_bytes = (len(image_group_data[image_data_start:])) % (height * width) - if not n_unwanted_bytes: - return np.arange(0) - assert 0 == n_unwanted_bytes % height, ( - "An unexpected number of extra bytes was encountered based on the expected" - + " frame size, therefore the file could not be parsed." - ) - return np.arange( - image_data_start + height * number_of_true_channels, - len(image_group_data) - n_unwanted_bytes + 1, - height * number_of_true_channels, - ) - - def _remove_bytes_by_id(self, byte_ids, image_group_data, height): - # Remove bytes by ID. - bytes_per_row = len(byte_ids) // height - warnings.warn( - f"{len(byte_ids)} ({bytes_per_row}*{height}) unexpected zero " - + "bytes were found in the ND2 file and removed to allow further parsing." - ) - for i in range(len(byte_ids)): - del image_group_data[byte_ids[i] : (byte_ids[i] + bytes_per_row)] - def _get_raw_image_data(self, image_group_number, channel_offset, height, width): """Reads the raw bytes and the timestamp of an image. @@ -300,42 +267,17 @@ class Parser(object): timestamp = struct.unpack("d", data[:8])[0] image_group_data = array.array("H", data) image_data_start = 4 + channel_offset - - # Stitched ND2 files have been reported to contain unexpected (according to - # image shape) zero bytes at the end of each image data row. This hinders - # proper reshaping of the data. Hence, here the unwanted zero bytes are - # identified and removed. - unwanted_byte_ids = self._get_unwanted_bytes_ids( - image_group_data, image_data_start, height, width - ) - if 0 != len(unwanted_byte_ids): - assert np.all( - image_group_data[unwanted_byte_ids + np.arange(len(unwanted_byte_ids))] - == 0 - ), ( - f"{len(unwanted_byte_ids)} unexpected non-zero bytes were found" - + " in the ND2 file, the file could not be parsed." - ) - self._remove_bytes_by_id(unwanted_byte_ids, image_group_data, height) + image_group_data = stitched.remove_parsed_unwanted_bytes(image_group_data, image_data_start, height, width) # The images for the various channels are interleaved within the same array. For example, the second image # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design # a data structure that way, please send the author of this library a message. number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) try: - image_data = np.reshape( - image_group_data[image_data_start::number_of_true_channels], - (height, width), - ) + image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, width)) except ValueError: - image_data = np.reshape( - image_group_data[image_data_start::number_of_true_channels], - ( - height, - len(image_group_data[image_data_start::number_of_true_channels]) - // height, - ), - ) + new_width = len(image_group_data[image_data_start::number_of_true_channels]) // height + image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, new_width)) # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you # don't have the same number of images each cycle. We discovered this because we only took GFP images every @@ -349,9 +291,7 @@ class Parser(object): else: empty_frame = np.full((height, width), np.nan) warnings.warn( - "ND2 file contains gap frames which are represented by np.nan-filled" - + " arrays; to convert to zeros use e.g. np.nan_to_num(array)" - ) + "ND2 file contains gap frames which are represented by np.nan-filled arrays; to convert to zeros use e.g. np.nan_to_num(array)") return timestamp, image_data def _get_frame_metadata(self): diff --git a/nd2reader/stitched.py b/nd2reader/stitched.py new file mode 100644 index 0000000..b65cf38 --- /dev/null +++ b/nd2reader/stitched.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +import numpy as np # type: ignore +import warnings + + +def get_unwanted_bytes_ids(self, image_group_data, image_data_start, height, width): + # Check if the byte array size conforms to the image axes size. If not, check + # that the number of unexpected (unwanted) bytes is a multiple of the number of + # rows (height), as the same unmber of unwanted bytes is expected to be + # appended at the end of each row. Then, returns the indexes of the unwanted + # bytes. + number_of_true_channels = int(len(image_group_data[4:]) / (height * width)) + n_unwanted_bytes = (len(image_group_data[image_data_start:])) % (height * width) + if not n_unwanted_bytes: + return np.arange(0) + assert 0 == n_unwanted_bytes % height, ( + "An unexpected number of extra bytes was encountered based on the expected" + + " frame size, therefore the file could not be parsed." + ) + return np.arange( + image_data_start + height * number_of_true_channels, + len(image_group_data) - n_unwanted_bytes + 1, + height * number_of_true_channels, + ) + + +def remove_bytes_by_id(self, byte_ids, image_group_data, height): + # Remove bytes by ID. + bytes_per_row = len(byte_ids) // height + warnings.warn( + f"{len(byte_ids)} ({bytes_per_row}*{height}) unexpected zero " + + "bytes were found in the ND2 file and removed to allow further parsing." + ) + for i in range(len(byte_ids)): + del image_group_data[byte_ids[i] : (byte_ids[i] + bytes_per_row)] + + +def remove_parsed_unwanted_bytes(image_group_data, image_data_start, height, width): + # Stitched ND2 files have been reported to contain unexpected (according to + # image shape) zero bytes at the end of each image data row. This hinders + # proper reshaping of the data. Hence, here the unwanted zero bytes are + # identified and removed. + unwanted_byte_ids = get_unwanted_bytes_ids( + image_group_data, image_data_start, height, width + ) + if 0 != len(unwanted_byte_ids): + assert np.all( + image_group_data[unwanted_byte_ids + np.arange(len(unwanted_byte_ids))] == 0 + ), ( + f"{len(unwanted_byte_ids)} unexpected non-zero bytes were found" + + " in the ND2 file, the file could not be parsed." + ) + remove_bytes_by_id(unwanted_byte_ids, image_group_data, height) + return image_group_data diff --git a/tests/test_parser.py b/tests/test_parser.py index 1fd8e37..c0b4ddf 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2,7 +2,6 @@ import unittest from os import path from nd2reader.artificial import ArtificialND2 from nd2reader.common import check_or_make_dir -from nd2reader.exceptions import InvalidVersionError from nd2reader.parser import Parser import urllib.request From 71ddf599bbdb636cc8f31b06d0a76ed08dff108d Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Mon, 31 Aug 2020 11:58:49 +0200 Subject: [PATCH 15/16] Fixed func def --- nd2reader/stitched.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nd2reader/stitched.py b/nd2reader/stitched.py index b65cf38..41bd43e 100644 --- a/nd2reader/stitched.py +++ b/nd2reader/stitched.py @@ -3,7 +3,7 @@ import numpy as np # type: ignore import warnings -def get_unwanted_bytes_ids(self, image_group_data, image_data_start, height, width): +def get_unwanted_bytes_ids(image_group_data, image_data_start, height, width): # Check if the byte array size conforms to the image axes size. If not, check # that the number of unexpected (unwanted) bytes is a multiple of the number of # rows (height), as the same unmber of unwanted bytes is expected to be @@ -13,6 +13,7 @@ def get_unwanted_bytes_ids(self, image_group_data, image_data_start, height, wid n_unwanted_bytes = (len(image_group_data[image_data_start:])) % (height * width) if not n_unwanted_bytes: return np.arange(0) + print(n_unwanted_bytes) assert 0 == n_unwanted_bytes % height, ( "An unexpected number of extra bytes was encountered based on the expected" + " frame size, therefore the file could not be parsed." @@ -24,7 +25,7 @@ def get_unwanted_bytes_ids(self, image_group_data, image_data_start, height, wid ) -def remove_bytes_by_id(self, byte_ids, image_group_data, height): +def remove_bytes_by_id(byte_ids, image_group_data, height): # Remove bytes by ID. bytes_per_row = len(byte_ids) // height warnings.warn( From 65731c9bebd5d1b76ef99802df44bd86cb09dacb Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Wed, 2 Sep 2020 21:14:24 +0200 Subject: [PATCH 16/16] Update stitched.py --- nd2reader/stitched.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nd2reader/stitched.py b/nd2reader/stitched.py index 41bd43e..9721e21 100644 --- a/nd2reader/stitched.py +++ b/nd2reader/stitched.py @@ -13,7 +13,6 @@ def get_unwanted_bytes_ids(image_group_data, image_data_start, height, width): n_unwanted_bytes = (len(image_group_data[image_data_start:])) % (height * width) if not n_unwanted_bytes: return np.arange(0) - print(n_unwanted_bytes) assert 0 == n_unwanted_bytes % height, ( "An unexpected number of extra bytes was encountered based on the expected" + " frame size, therefore the file could not be parsed."