You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

181 lines
6.0 KiB

10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. import calendar
  6. import datetime
  7. from .common import InfoExtractor
  8. from ..utils import (
  9. HEADRequest,
  10. unified_strdate,
  11. ExtractorError,
  12. )
  13. class ORFTVthekIE(InfoExtractor):
  14. IE_NAME = 'orf:tvthek'
  15. IE_DESC = 'ORF TVthek'
  16. _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
  17. _TEST = {
  18. 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747',
  19. 'file': '7319747.mp4',
  20. 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375',
  21. 'info_dict': {
  22. 'title': 'Was Sie schon immer über Klassik wissen wollten',
  23. 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4',
  24. 'duration': 3508,
  25. 'upload_date': '20140105',
  26. },
  27. 'skip': 'Blocked outside of Austria',
  28. }
  29. def _real_extract(self, url):
  30. mobj = re.match(self._VALID_URL, url)
  31. playlist_id = mobj.group('id')
  32. webpage = self._download_webpage(url, playlist_id)
  33. data_json = self._search_regex(
  34. r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
  35. all_data = json.loads(data_json)
  36. def get_segments(all_data):
  37. for data in all_data:
  38. if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
  39. return data['values']['segments']
  40. sdata = get_segments(all_data)
  41. if not sdata:
  42. raise ExtractorError('Unable to extract segments')
  43. def quality_to_int(s):
  44. m = re.search('([0-9]+)', s)
  45. if m is None:
  46. return -1
  47. return int(m.group(1))
  48. entries = []
  49. for sd in sdata:
  50. video_id = sd['id']
  51. formats = [{
  52. 'preference': -10 if fd['delivery'] == 'hls' else None,
  53. 'format_id': '%s-%s-%s' % (
  54. fd['delivery'], fd['quality'], fd['quality_string']),
  55. 'url': fd['src'],
  56. 'protocol': fd['protocol'],
  57. 'quality': quality_to_int(fd['quality']),
  58. } for fd in sd['playlist_item_array']['sources']]
  59. # Check for geoblocking.
  60. # There is a property is_geoprotection, but that's always false
  61. geo_str = sd.get('geoprotection_string')
  62. if geo_str:
  63. try:
  64. http_url = next(
  65. f['url']
  66. for f in formats
  67. if re.match(r'^https?://.*\.mp4$', f['url']))
  68. except StopIteration:
  69. pass
  70. else:
  71. req = HEADRequest(http_url)
  72. self._request_webpage(
  73. req, video_id,
  74. note='Testing for geoblocking',
  75. errnote=((
  76. 'This video seems to be blocked outside of %s. '
  77. 'You may want to try the streaming-* formats.')
  78. % geo_str),
  79. fatal=False)
  80. self._sort_formats(formats)
  81. upload_date = unified_strdate(sd['created_date'])
  82. entries.append({
  83. '_type': 'video',
  84. 'id': video_id,
  85. 'title': sd['header'],
  86. 'formats': formats,
  87. 'description': sd.get('description'),
  88. 'duration': int(sd['duration_in_seconds']),
  89. 'upload_date': upload_date,
  90. 'thumbnail': sd.get('image_full_url'),
  91. })
  92. return {
  93. '_type': 'playlist',
  94. 'entries': entries,
  95. 'id': playlist_id,
  96. }
  97. # Audios on ORF radio are only available for 7 days, so we can't add tests.
  98. class ORFOE1IE(InfoExtractor):
  99. IE_NAME = 'orf:oe1'
  100. IE_DESC = 'Radio Österreich 1'
  101. _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
  102. def _real_extract(self, url):
  103. mobj = re.match(self._VALID_URL, url)
  104. show_id = mobj.group('id')
  105. data = self._download_json(
  106. 'http://oe1.orf.at/programm/%s/konsole' % show_id,
  107. show_id
  108. )
  109. timestamp = datetime.datetime.strptime('%s %s' % (
  110. data['item']['day_label'],
  111. data['item']['time']
  112. ), '%d.%m.%Y %H:%M')
  113. unix_timestamp = calendar.timegm(timestamp.utctimetuple())
  114. return {
  115. 'id': show_id,
  116. 'title': data['item']['title'],
  117. 'url': data['item']['url_stream'],
  118. 'ext': 'mp3',
  119. 'description': data['item'].get('info'),
  120. 'timestamp': unix_timestamp
  121. }
  122. class ORFFM4IE(InfoExtractor):
  123. IE_DESC = 'orf:fm4'
  124. IE_DESC = 'radio FM4'
  125. _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
  126. def _real_extract(self, url):
  127. mobj = re.match(self._VALID_URL, url)
  128. show_date = mobj.group('date')
  129. show_id = mobj.group('show')
  130. data = self._download_json(
  131. 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
  132. show_id
  133. )
  134. def extract_entry_dict(info, title, subtitle):
  135. return {
  136. 'id': info['loopStreamId'].replace('.mp3', ''),
  137. 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
  138. 'title': title,
  139. 'description': subtitle,
  140. 'duration': (info['end'] - info['start']) / 1000,
  141. 'timestamp': info['start'] / 1000,
  142. 'ext': 'mp3'
  143. }
  144. entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
  145. return {
  146. '_type': 'playlist',
  147. 'id': show_id,
  148. 'title': data['title'],
  149. 'description': data['subtitle'],
  150. 'entries': entries
  151. }