From 55627552d47c2a797f8672b1e802f295bc6e28d2 Mon Sep 17 00:00:00 2001
From: Gabriele Girelli <gigi.ga90@gmail.com>
Date: Mon, 31 Aug 2020 11:41:29 +0200
Subject: [PATCH] Using numpy assets when possible, and formatted with black

---
 nd2reader/parser.py | 88 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 25 deletions(-)

diff --git a/nd2reader/parser.py b/nd2reader/parser.py
index 62243a3..3fceb2e 100644
--- a/nd2reader/parser.py
+++ b/nd2reader/parser.py
@@ -247,27 +247,37 @@ class Parser(object):
         """
         return {channel: n for n, channel in enumerate(self.metadata["channels"])}
 
-    def _check_unwanted_bytes(self, image_group_data, image_data_start, height, width):
+    def _get_unwanted_bytes_ids(
+        self, image_group_data, image_data_start, height, width
+    ):
+        # Check if the byte array size conforms to the image axes size. If not, check
+        # that the number of unexpected (unwanted) bytes is a multiple of the number of
+        # rows (height), as the same unmber of unwanted bytes is expected to be
+        # appended at the end of each row. Then, returns the indexes of the unwanted
+        # bytes.
         number_of_true_channels = int(len(image_group_data[4:]) / (height * width))
-        n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width)
+        n_unwanted_bytes = (len(image_group_data[image_data_start:])) % (height * width)
         if not n_unwanted_bytes:
-            return False
-        assert 0 == n_unwanted_bytes % height, "An unexpected number of extra bytes was encountered based on the expected frame size, therefore the file could not be parsed."
-        byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels)
-        all_zero_bytes = all([0 == image_group_data[byte_ids[i]+i] for i in range(len(byte_ids))])
-        if not all_zero_bytes:
-            raise Exception(f"{n_unwanted_bytes} unexpected non-zero bytes were found in the ND2 file, the file could not be parsed.")
-        return all_zero_bytes
-
-    def _remove_unwanted_bytes(self, image_group_data, image_data_start, height, width):
-        # Remove unwanted 0-bytes that can appear in stitched images
-        number_of_true_channels = int(len(image_group_data[4:]) / (height * width))
-        n_unwanted_bytes = (len(image_group_data[image_data_start:]))%(height*width)
-        unwanted_byte_per_step = n_unwanted_bytes // height
-        byte_ids = range(image_data_start+height*number_of_true_channels, len(image_group_data)-n_unwanted_bytes+1, height*number_of_true_channels)
-        warnings.warn(f"{n_unwanted_bytes} ({unwanted_byte_per_step}*{height}) unexpected zero bytes were found in the ND2 file and removed to allow further parsing.")
+            return np.arange(0)
+        assert 0 == n_unwanted_bytes % height, (
+            "An unexpected number of extra bytes was encountered based on the expected"
+            + " frame size, therefore the file could not be parsed."
+        )
+        return np.arange(
+            image_data_start + height * number_of_true_channels,
+            len(image_group_data) - n_unwanted_bytes + 1,
+            height * number_of_true_channels,
+        )
+
+    def _remove_bytes_by_id(self, byte_ids, image_group_data, height):
+        # Remove bytes by ID.
+        bytes_per_row = len(byte_ids) // height
+        warnings.warn(
+            f"{len(byte_ids)} ({bytes_per_row}*{height}) unexpected zero "
+            + "bytes were found in the ND2 file and removed to allow further parsing."
+        )
         for i in range(len(byte_ids)):
-            del image_group_data[byte_ids[i]:(byte_ids[i]+unwanted_byte_per_step)]
+            del image_group_data[byte_ids[i] : (byte_ids[i] + bytes_per_row)]
 
     def _get_raw_image_data(self, image_group_number, channel_offset, height, width):
         """Reads the raw bytes and the timestamp of an image.
@@ -291,16 +301,41 @@ class Parser(object):
         image_group_data = array.array("H", data)
         image_data_start = 4 + channel_offset
 
+        # Stitched ND2 files have been reported to contain unexpected (according to
+        # image shape) zero bytes at the end of each image data row. This hinders
+        # proper reshaping of the data. Hence, here the unwanted zero bytes are
+        # identified and removed.
+        unwanted_byte_ids = self._get_unwanted_bytes_ids(
+            image_group_data, image_data_start, height, width
+        )
+        if 0 != len(unwanted_byte_ids):
+            assert np.all(
+                image_group_data[unwanted_byte_ids + np.arange(len(unwanted_byte_ids))]
+                == 0
+            ), (
+                f"{len(unwanted_byte_ids)} unexpected non-zero bytes were found"
+                + " in the ND2 file, the file could not be parsed."
+            )
+            self._remove_bytes_by_id(unwanted_byte_ids, image_group_data, height)
+
         # The images for the various channels are interleaved within the same array. For example, the second image
         # of a four image group will be composed of bytes 2, 6, 10, etc. If you understand why someone would design
         # a data structure that way, please send the author of this library a message.
         number_of_true_channels = int(len(image_group_data[4:]) / (height * width))
-        if self._check_unwanted_bytes(image_group_data, image_data_start, height, width):
-            self._remove_unwanted_bytes(image_group_data, image_data_start, height, width)
         try:
-            image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, width))
+            image_data = np.reshape(
+                image_group_data[image_data_start::number_of_true_channels],
+                (height, width),
+            )
         except ValueError:
-            image_data = np.reshape(image_group_data[image_data_start::number_of_true_channels], (height, int(round(len(image_group_data[image_data_start::number_of_true_channels])/height))))
+            image_data = np.reshape(
+                image_group_data[image_data_start::number_of_true_channels],
+                (
+                    height,
+                    len(image_group_data[image_data_start::number_of_true_channels])
+                    // height,
+                ),
+            )
 
         # Skip images that are all zeros! This is important, since NIS Elements creates blank "gap" images if you
         # don't have the same number of images each cycle. We discovered this because we only took GFP images every
@@ -309,11 +344,14 @@ class Parser(object):
         if np.any(image_data):
             return timestamp, image_data
 
-        # If a blank "gap" image is encountered, generate an array of corresponding height and width to avoid 
-        # errors with ND2-files with missing frames. Array is filled with nan to reflect that data is missing. 
+        # If a blank "gap" image is encountered, generate an array of corresponding height and width to avoid
+        # errors with ND2-files with missing frames. Array is filled with nan to reflect that data is missing.
         else:
             empty_frame = np.full((height, width), np.nan)
-            warnings.warn('ND2 file contains gap frames which are represented by np.nan-filled arrays; to convert to zeros use e.g. np.nan_to_num(array)')
+            warnings.warn(
+                "ND2 file contains gap frames which are represented by np.nan-filled"
+                + " arrays; to convert to zeros use e.g. np.nan_to_num(array)"
+            )
             return timestamp, image_data
 
     def _get_frame_metadata(self):