You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

181 lines
5.9 KiB

10 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. import calendar
  6. import datetime
  7. from .common import InfoExtractor
  8. from ..utils import (
  9. HEADRequest,
  10. unified_strdate,
  11. ExtractorError,
  12. )
  13. class ORFTVthekIE(InfoExtractor):
  14. IE_NAME = 'orf:tvthek'
  15. IE_DESC = 'ORF TVthek'
  16. _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
  17. _TEST = {
  18. 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
  19. 'playlist': [{
  20. 'md5': '2942210346ed779588f428a92db88712',
  21. 'info_dict': {
  22. 'id': '8896777',
  23. 'ext': 'mp4',
  24. 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
  25. 'description': 'md5:c1272f0245537812d4e36419c207b67d',
  26. 'duration': 2668,
  27. 'upload_date': '20141208',
  28. },
  29. }],
  30. 'skip': 'Blocked outside of Austria',
  31. }
  32. def _real_extract(self, url):
  33. playlist_id = self._match_id(url)
  34. webpage = self._download_webpage(url, playlist_id)
  35. data_json = self._search_regex(
  36. r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
  37. all_data = json.loads(data_json)
  38. def get_segments(all_data):
  39. for data in all_data:
  40. if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
  41. return data['values']['segments']
  42. sdata = get_segments(all_data)
  43. if not sdata:
  44. raise ExtractorError('Unable to extract segments')
  45. def quality_to_int(s):
  46. m = re.search('([0-9]+)', s)
  47. if m is None:
  48. return -1
  49. return int(m.group(1))
  50. entries = []
  51. for sd in sdata:
  52. video_id = sd['id']
  53. formats = [{
  54. 'preference': -10 if fd['delivery'] == 'hls' else None,
  55. 'format_id': '%s-%s-%s' % (
  56. fd['delivery'], fd['quality'], fd['quality_string']),
  57. 'url': fd['src'],
  58. 'protocol': fd['protocol'],
  59. 'quality': quality_to_int(fd['quality']),
  60. } for fd in sd['playlist_item_array']['sources']]
  61. # Check for geoblocking.
  62. # There is a property is_geoprotection, but that's always false
  63. geo_str = sd.get('geoprotection_string')
  64. if geo_str:
  65. try:
  66. http_url = next(
  67. f['url']
  68. for f in formats
  69. if re.match(r'^https?://.*\.mp4$', f['url']))
  70. except StopIteration:
  71. pass
  72. else:
  73. req = HEADRequest(http_url)
  74. self._request_webpage(
  75. req, video_id,
  76. note='Testing for geoblocking',
  77. errnote=((
  78. 'This video seems to be blocked outside of %s. '
  79. 'You may want to try the streaming-* formats.')
  80. % geo_str),
  81. fatal=False)
  82. self._sort_formats(formats)
  83. upload_date = unified_strdate(sd['created_date'])
  84. entries.append({
  85. '_type': 'video',
  86. 'id': video_id,
  87. 'title': sd['header'],
  88. 'formats': formats,
  89. 'description': sd.get('description'),
  90. 'duration': int(sd['duration_in_seconds']),
  91. 'upload_date': upload_date,
  92. 'thumbnail': sd.get('image_full_url'),
  93. })
  94. return {
  95. '_type': 'playlist',
  96. 'entries': entries,
  97. 'id': playlist_id,
  98. }
  99. # Audios on ORF radio are only available for 7 days, so we can't add tests.
  100. class ORFOE1IE(InfoExtractor):
  101. IE_NAME = 'orf:oe1'
  102. IE_DESC = 'Radio Österreich 1'
  103. _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
  104. def _real_extract(self, url):
  105. show_id = self._match_id(url)
  106. data = self._download_json(
  107. 'http://oe1.orf.at/programm/%s/konsole' % show_id,
  108. show_id
  109. )
  110. timestamp = datetime.datetime.strptime('%s %s' % (
  111. data['item']['day_label'],
  112. data['item']['time']
  113. ), '%d.%m.%Y %H:%M')
  114. unix_timestamp = calendar.timegm(timestamp.utctimetuple())
  115. return {
  116. 'id': show_id,
  117. 'title': data['item']['title'],
  118. 'url': data['item']['url_stream'],
  119. 'ext': 'mp3',
  120. 'description': data['item'].get('info'),
  121. 'timestamp': unix_timestamp
  122. }
  123. class ORFFM4IE(InfoExtractor):
  124. IE_DESC = 'orf:fm4'
  125. IE_DESC = 'radio FM4'
  126. _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
  127. def _real_extract(self, url):
  128. mobj = re.match(self._VALID_URL, url)
  129. show_date = mobj.group('date')
  130. show_id = mobj.group('show')
  131. data = self._download_json(
  132. 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
  133. show_id
  134. )
  135. def extract_entry_dict(info, title, subtitle):
  136. return {
  137. 'id': info['loopStreamId'].replace('.mp3', ''),
  138. 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
  139. 'title': title,
  140. 'description': subtitle,
  141. 'duration': (info['end'] - info['start']) / 1000,
  142. 'timestamp': info['start'] / 1000,
  143. 'ext': 'mp3'
  144. }
  145. entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
  146. return {
  147. '_type': 'playlist',
  148. 'id': show_id,
  149. 'title': data['title'],
  150. 'description': data['subtitle'],
  151. 'entries': entries
  152. }