import struct import array from datetime import datetime import six import re from nd2reader.exceptions import InvalidVersionError def get_version(fh): """Determines what version the ND2 is. Args: fh: File handle of the .nd2 file Returns: tuple: Major and minor version """ # the first 16 bytes seem to have no meaning, so we skip them fh.seek(16) # the next 38 bytes contain the string that we want to parse. Unlike most of the ND2, this is in UTF-8 data = fh.read(38).decode("utf8") return parse_version(data) def parse_version(data): """Parses a string with the version data in it. Args: data (unicode): the 19th through 54th byte of the ND2, representing the version Returns: tuple: Major and minor version """ match = re.search(r"""^ND2 FILE SIGNATURE CHUNK NAME01!Ver(?P\d)\.(?P\d)$""", data) if match: # We haven't seen a lot of ND2s but the ones we have seen conform to this return int(match.group('major')), int(match.group('minor')) raise InvalidVersionError("The version of the ND2 you specified is not supported.") def read_chunk(fh, chunk_location): """Reads a piece of data given the location of its pointer. Args: fh: an open file handle to the ND2 chunk_location (int): location to read Returns: bytes: the data at the chunk location """ if chunk_location is None: return None fh.seek(chunk_location) # The chunk metadata is always 16 bytes long chunk_metadata = fh.read(16) header, relative_offset, data_length = struct.unpack("IIQ", chunk_metadata) if header != 0xabeceda: raise ValueError("The ND2 file seems to be corrupted.") # We start at the location of the chunk metadata, skip over the metadata, and then proceed to the # start of the actual data field, which is at some arbitrary place after the metadata. fh.seek(chunk_location + 16 + relative_offset) return fh.read(data_length) def read_array(fh, kind, chunk_location): kinds = {'double': 'd', 'int': 'i', 'float': 'f'} if kind not in kinds: raise ValueError('You attempted to read an array of an unknown type.') raw_data = read_chunk(fh, chunk_location) if raw_data is None: return None return array.array(kinds[kind], raw_data) def _parse_unsigned_char(data): return struct.unpack("B", data.read(1))[0] def _parse_unsigned_int(data): return struct.unpack("I", data.read(4))[0] def _parse_unsigned_long(data): return struct.unpack("Q", data.read(8))[0] def _parse_double(data): return struct.unpack("d", data.read(8))[0] def _parse_string(data): value = data.read(2) while not value.endswith(six.b("\x00\x00")): # the string ends at the first instance of \x00\x00 value += data.read(2) return value.decode("utf16")[:-1].encode("utf8") def _parse_char_array(data): array_length = struct.unpack("Q", data.read(8))[0] return array.array("B", data.read(array_length)) def parse_date(text_info): """The date and time when acquisition began. Returns: datetime: The date and time when acquisition began. """ for line in text_info.values(): line = line.decode("utf8") # ND2s seem to randomly switch between 12- and 24-hour representations. try: absolute_start = datetime.strptime(line, "%m/%d/%Y %H:%M:%S") except (TypeError, ValueError): try: absolute_start = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p") except (TypeError, ValueError): absolute_start = None return absolute_start def _parse_metadata_item(data, cursor_position): """Reads hierarchical data, analogous to a Python dict. Returns: dict: the metadata item """ new_count, length = struct.unpack("