You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

117 lines
4.0 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import itertools
  4. import json
  5. import re
  6. from .common import InfoExtractor
  7. from ..compat import (
  8. compat_urllib_request,
  9. compat_urlparse,
  10. )
  11. from ..utils import (
  12. int_or_none,
  13. js_to_json,
  14. mimetype2ext,
  15. unified_strdate,
  16. )
  17. class SandiaIE(InfoExtractor):
  18. IE_DESC = 'Sandia National Laboratories'
  19. _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
  20. _TEST = {
  21. 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
  22. 'md5': '9422edc9b9a60151727e4b6d8bef393d',
  23. 'info_dict': {
  24. 'id': '24aace4429fc450fb5b38cdbf424a66e1d',
  25. 'ext': 'mp4',
  26. 'title': 'Xyce Software Training - Section 1',
  27. 'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
  28. 'upload_date': '20120904',
  29. 'duration': 7794,
  30. }
  31. }
  32. def _real_extract(self, url):
  33. video_id = self._match_id(url)
  34. req = compat_urllib_request.Request(url)
  35. req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
  36. webpage = self._download_webpage(req, video_id)
  37. js_path = self._search_regex(
  38. r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
  39. webpage, 'JS code URL')
  40. js_url = compat_urlparse.urljoin(url, js_path)
  41. js_code = self._download_webpage(
  42. js_url, video_id, note='Downloading player')
  43. def extract_str(key, **args):
  44. return self._search_regex(
  45. r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
  46. js_code, key, **args)
  47. def extract_data(key, **args):
  48. data_json = extract_str(key, **args)
  49. if data_json is None:
  50. return data_json
  51. return self._parse_json(
  52. data_json, video_id, transform_source=js_to_json)
  53. formats = []
  54. for i in itertools.count():
  55. fd = extract_data('VideoUrls[%d]' % i, default=None)
  56. if fd is None:
  57. break
  58. formats.append({
  59. 'format_id': '%s' % i,
  60. 'format_note': fd['MimeType'].partition('/')[2],
  61. 'ext': mimetype2ext(fd['MimeType']),
  62. 'url': fd['Location'],
  63. 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
  64. })
  65. self._sort_formats(formats)
  66. slide_baseurl = compat_urlparse.urljoin(
  67. url, extract_data('SlideBaseUrl'))
  68. slide_template = slide_baseurl + re.sub(
  69. r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
  70. slides = []
  71. last_slide_time = 0
  72. for i in itertools.count(1):
  73. sd = extract_str('Slides[%d]' % i, default=None)
  74. if sd is None:
  75. break
  76. timestamp = int_or_none(self._search_regex(
  77. r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
  78. sd, 'slide %s timestamp' % i, fatal=False))
  79. slides.append({
  80. 'url': slide_template % i,
  81. 'duration': timestamp - last_slide_time,
  82. })
  83. last_slide_time = timestamp
  84. formats.append({
  85. 'format_id': 'slides',
  86. 'protocol': 'slideshow',
  87. 'url': json.dumps(slides),
  88. 'preference': -10000, # Downloader not yet written
  89. })
  90. self._sort_formats(formats)
  91. title = extract_data('Title')
  92. description = extract_data('Description', fatal=False)
  93. duration = int_or_none(extract_data(
  94. 'Duration', fatal=False), scale=1000)
  95. upload_date = unified_strdate(extract_data('AirDate', fatal=False))
  96. return {
  97. 'id': video_id,
  98. 'title': title,
  99. 'description': description,
  100. 'formats': formats,
  101. 'upload_date': upload_date,
  102. 'duration': duration,
  103. }