You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

285 lines
11 KiB

9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
9 years ago
  1. # -*- coding: utf-8 -*-
  2. import array
  3. from datetime import datetime
  4. from nd2reader.model.metadata import Metadata
  5. from nd2reader.parser.base import BaseParser
  6. from nd2reader.driver.v3 import V3Driver
  7. from nd2reader.common.v3 import read_chunk
  8. import re
  9. import six
  10. import struct
  11. class V3Parser(BaseParser):
  12. """ Parses ND2 files and creates a Metadata and ImageReader object. """
  13. CHUNK_HEADER = 0xabeceda
  14. CHUNK_MAP_START = six.b("ND2 FILEMAP SIGNATURE NAME 0001!")
  15. CHUNK_MAP_END = six.b("ND2 CHUNK MAP SIGNATURE 0000001!")
  16. def __init__(self, fh):
  17. self._fh = fh
  18. self._metadata = None
  19. self._label_map = None
  20. @property
  21. def metadata(self):
  22. if not self._metadata:
  23. self._parse_metadata()
  24. return self._metadata
  25. @property
  26. def driver(self):
  27. return V3Driver(self.metadata, self._label_map, self._fh)
  28. def _parse_metadata(self):
  29. """
  30. Reads all metadata.
  31. """
  32. metadata_dict = {}
  33. self._label_map = self._build_label_map()
  34. for label in self._label_map.keys():
  35. if label.endswith(six.b("LV!")) or six.b("LV|") in label:
  36. data = read_chunk(self._fh, self._label_map[label])
  37. stop = label.index(six.b("LV"))
  38. metadata_dict[label[:stop]] = self._read_metadata(data, 1)
  39. height = metadata_dict[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiHeight')]
  40. width = metadata_dict[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiWidth')]
  41. channels = self._parse_channels(metadata_dict)
  42. date = self._parse_date(metadata_dict)
  43. fields_of_view = self._parse_fields_of_view(metadata_dict)
  44. frames = self._parse_frames(metadata_dict)
  45. z_levels = self._parse_z_levels(metadata_dict)
  46. total_images_per_channel = self._parse_total_images_per_channel(metadata_dict)
  47. self._metadata = Metadata(height, width, channels, date, fields_of_view, frames, z_levels, total_images_per_channel)
  48. def _parse_date(self, metadata_dict):
  49. """
  50. The date and time when acquisition began.
  51. :rtype: datetime.datetime()
  52. """
  53. for line in metadata_dict[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values():
  54. line = line.decode("utf8")
  55. absolute_start_12 = None
  56. absolute_start_24 = None
  57. # ND2s seem to randomly switch between 12- and 24-hour representations.
  58. try:
  59. absolute_start_24 = datetime.strptime(line, "%m/%d/%Y %H:%M:%S")
  60. except (TypeError, ValueError):
  61. pass
  62. try:
  63. absolute_start_12 = datetime.strptime(line, "%m/%d/%Y %I:%M:%S %p")
  64. except (TypeError, ValueError):
  65. pass
  66. if not absolute_start_12 and not absolute_start_24:
  67. continue
  68. return absolute_start_12 if absolute_start_12 else absolute_start_24
  69. raise ValueError("This ND2 has no recorded start time. This is probably a bug.")
  70. def _parse_channels(self, metadata_dict):
  71. """
  72. These are labels created by the NIS Elements user. Typically they may a short description of the filter cube
  73. used (e.g. "bright field", "GFP", etc.)
  74. :rtype: list
  75. """
  76. channels = []
  77. metadata = metadata_dict[six.b('ImageMetadataSeq')][six.b('SLxPictureMetadata')][six.b('sPicturePlanes')]
  78. try:
  79. validity = metadata_dict[six.b('ImageMetadata')][six.b('SLxExperiment')][six.b('ppNextLevelEx')][six.b('')][0][six.b('ppNextLevelEx')][six.b('')][0][six.b('pItemValid')]
  80. except KeyError:
  81. # If none of the channels have been deleted, there is no validity list, so we just make one
  82. validity = [True for _ in metadata]
  83. # Channel information is contained in dictionaries with the keys a0, a1...an where the number
  84. # indicates the order in which the channel is stored. So by sorting the dicts alphabetically
  85. # we get the correct order.
  86. for (label, chan), valid in zip(sorted(metadata[six.b('sPlaneNew')].items()), validity):
  87. if not valid:
  88. continue
  89. channels.append(chan[six.b('sDescription')].decode("utf8"))
  90. return channels
  91. def _parse_fields_of_view(self, metadata_dict):
  92. """
  93. The metadata contains information about fields of view, but it contains it even if some fields
  94. of view were cropped. We can't find anything that states which fields of view are actually
  95. in the image data, so we have to calculate it. There probably is something somewhere, since
  96. NIS Elements can figure it out, but we haven't found it yet.
  97. :rtype: list
  98. """
  99. return self._parse_dimension(r""".*?XY\((\d+)\).*?""", metadata_dict)
  100. def _parse_frames(self, metadata_dict):
  101. """
  102. The number of cycles.
  103. :rtype: list
  104. """
  105. return self._parse_dimension(r""".*?T'?\((\d+)\).*?""", metadata_dict)
  106. def _parse_z_levels(self, metadata_dict):
  107. """
  108. The different levels in the Z-plane. Just a sequence from 0 to n.
  109. :rtype: list
  110. """
  111. return self._parse_dimension(r""".*?Z\((\d+)\).*?""", metadata_dict)
  112. def _parse_dimension_text(self, metadata_dict):
  113. """
  114. While there are metadata values that represent a lot of what we want to capture, they seem to be unreliable.
  115. Sometimes certain elements don't exist, or change their data type randomly. However, the human-readable text
  116. is always there and in the same exact format, so we just parse that instead.
  117. :rtype: str
  118. """
  119. for line in metadata_dict[six.b('ImageTextInfo')][six.b('SLxImageTextInfo')].values():
  120. if six.b("Dimensions:") in line:
  121. metadata = line
  122. break
  123. else:
  124. raise ValueError("Could not parse metadata dimensions!")
  125. for line in metadata.split(six.b("\r\n")):
  126. if line.startswith(six.b("Dimensions:")):
  127. dimension_text = line
  128. break
  129. else:
  130. raise ValueError("Could not parse metadata dimensions!")
  131. return dimension_text
  132. def _parse_dimension(self, pattern, metadata_dict):
  133. dimension_text = self._parse_dimension_text(metadata_dict)
  134. if six.PY3:
  135. dimension_text = dimension_text.decode("utf8")
  136. match = re.match(pattern, dimension_text)
  137. if not match:
  138. return [0]
  139. count = int(match.group(1))
  140. return list(range(count))
  141. def _parse_total_images_per_channel(self, metadata_dict):
  142. """
  143. The total number of images per channel. Warning: this may be inaccurate as it includes "gap" images.
  144. :rtype: int
  145. """
  146. return metadata_dict[six.b('ImageAttributes')][six.b('SLxImageAttributes')][six.b('uiSequenceCount')]
  147. def _build_label_map(self):
  148. """
  149. Every label ends with an exclamation point, however, we can't directly search for those to find all the labels
  150. as some of the bytes contain the value 33, which is the ASCII code for "!". So we iteratively find each label,
  151. grab the subsequent data (always 16 bytes long), advance to the next label and repeat.
  152. :rtype: dict
  153. """
  154. label_map = {}
  155. self._fh.seek(-8, 2)
  156. chunk_map_start_location = struct.unpack("Q", self._fh.read(8))[0]
  157. self._fh.seek(chunk_map_start_location)
  158. raw_text = self._fh.read(-1)
  159. label_start = raw_text.index(V3Parser.CHUNK_MAP_START) + 32
  160. while True:
  161. data_start = raw_text.index(six.b("!"), label_start) + 1
  162. key = raw_text[label_start: data_start]
  163. location, length = struct.unpack("QQ", raw_text[data_start: data_start + 16])
  164. if key == V3Parser.CHUNK_MAP_END:
  165. # We've reached the end of the chunk map
  166. break
  167. label_map[key] = location
  168. label_start = data_start + 16
  169. return label_map
  170. def _parse_unsigned_char(self, data):
  171. return struct.unpack("B", data.read(1))[0]
  172. def _parse_unsigned_int(self, data):
  173. return struct.unpack("I", data.read(4))[0]
  174. def _parse_unsigned_long(self, data):
  175. return struct.unpack("Q", data.read(8))[0]
  176. def _parse_double(self, data):
  177. return struct.unpack("d", data.read(8))[0]
  178. def _parse_string(self, data):
  179. value = data.read(2)
  180. while not value.endswith(six.b("\x00\x00")):
  181. # the string ends at the first instance of \x00\x00
  182. value += data.read(2)
  183. return value.decode("utf16")[:-1].encode("utf8")
  184. def _parse_char_array(self, data):
  185. array_length = struct.unpack("Q", data.read(8))[0]
  186. return array.array("B", data.read(array_length))
  187. def _parse_metadata_item(self, data):
  188. """
  189. Reads hierarchical data, analogous to a Python dict.
  190. """
  191. new_count, length = struct.unpack("<IQ", data.read(12))
  192. length -= data.tell() - self._cursor_position
  193. next_data_length = data.read(length)
  194. value = self._read_metadata(next_data_length, new_count)
  195. # Skip some offsets
  196. data.read(new_count * 8)
  197. return value
  198. def _get_value(self, data, data_type):
  199. """
  200. ND2s use various codes to indicate different data types, which we translate here.
  201. """
  202. parser = {1: self._parse_unsigned_char,
  203. 2: self._parse_unsigned_int,
  204. 3: self._parse_unsigned_int,
  205. 5: self._parse_unsigned_long,
  206. 6: self._parse_double,
  207. 8: self._parse_string,
  208. 9: self._parse_char_array,
  209. 11: self._parse_metadata_item}
  210. return parser[data_type](data)
  211. def _read_metadata(self, data, count):
  212. """
  213. Iterates over each element some section of the metadata and parses it.
  214. """
  215. data = six.BytesIO(data)
  216. metadata = {}
  217. for _ in range(count):
  218. self._cursor_position = data.tell()
  219. header = data.read(2)
  220. if not header:
  221. # We've reached the end of some hierarchy of data
  222. break
  223. if six.PY3:
  224. header = header.decode("utf8")
  225. data_type, name_length = map(ord, header)
  226. name = data.read(name_length * 2).decode("utf16")[:-1].encode("utf8")
  227. value = self._get_value(data, data_type)
  228. if name not in metadata.keys():
  229. metadata[name] = value
  230. else:
  231. if not isinstance(metadata[name], list):
  232. # We have encountered this key exactly once before. Since we're seeing it again, we know we
  233. # need to convert it to a list before proceeding.
  234. metadata[name] = [metadata[name]]
  235. # We've encountered this key before so we're guaranteed to be dealing with a list. Thus we append
  236. # the value to the already-existing list.
  237. metadata[name].append(value)
  238. return metadata