You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

199 lines
6.8 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. import calendar
  6. import datetime
  7. from .common import InfoExtractor
  8. from ..utils import (
  9. HEADRequest,
  10. unified_strdate,
  11. ExtractorError,
  12. )
  13. class ORFTVthekIE(InfoExtractor):
  14. IE_NAME = 'orf:tvthek'
  15. IE_DESC = 'ORF TVthek'
  16. _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
  17. _TESTS = [{
  18. 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
  19. 'playlist': [{
  20. 'md5': '2942210346ed779588f428a92db88712',
  21. 'info_dict': {
  22. 'id': '8896777',
  23. 'ext': 'mp4',
  24. 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
  25. 'description': 'md5:c1272f0245537812d4e36419c207b67d',
  26. 'duration': 2668,
  27. 'upload_date': '20141208',
  28. },
  29. }],
  30. 'skip': 'Blocked outside of Austria / Germany',
  31. }, {
  32. 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
  33. 'playlist': [{
  34. 'md5': '68f543909aea49d621dfc7703a11cfaf',
  35. 'info_dict': {
  36. 'id': '7982259',
  37. 'ext': 'mp4',
  38. 'title': 'Best of Ingrid Thurnher',
  39. 'upload_date': '20140527',
  40. 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
  41. }
  42. }],
  43. '_skip': 'Blocked outside of Austria / Germany',
  44. }]
  45. def _real_extract(self, url):
  46. playlist_id = self._match_id(url)
  47. webpage = self._download_webpage(url, playlist_id)
  48. data_json = self._search_regex(
  49. r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
  50. all_data = json.loads(data_json)
  51. def get_segments(all_data):
  52. for data in all_data:
  53. if data['name'] in (
  54. 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
  55. 'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
  56. return data['values']['segments']
  57. sdata = get_segments(all_data)
  58. if not sdata:
  59. raise ExtractorError('Unable to extract segments')
  60. def quality_to_int(s):
  61. m = re.search('([0-9]+)', s)
  62. if m is None:
  63. return -1
  64. return int(m.group(1))
  65. entries = []
  66. for sd in sdata:
  67. video_id = sd['id']
  68. formats = [{
  69. 'preference': -10 if fd['delivery'] == 'hls' else None,
  70. 'format_id': '%s-%s-%s' % (
  71. fd['delivery'], fd['quality'], fd['quality_string']),
  72. 'url': fd['src'],
  73. 'protocol': fd['protocol'],
  74. 'quality': quality_to_int(fd['quality']),
  75. } for fd in sd['playlist_item_array']['sources']]
  76. # Check for geoblocking.
  77. # There is a property is_geoprotection, but that's always false
  78. geo_str = sd.get('geoprotection_string')
  79. if geo_str:
  80. try:
  81. http_url = next(
  82. f['url']
  83. for f in formats
  84. if re.match(r'^https?://.*\.mp4$', f['url']))
  85. except StopIteration:
  86. pass
  87. else:
  88. req = HEADRequest(http_url)
  89. self._request_webpage(
  90. req, video_id,
  91. note='Testing for geoblocking',
  92. errnote=((
  93. 'This video seems to be blocked outside of %s. '
  94. 'You may want to try the streaming-* formats.')
  95. % geo_str),
  96. fatal=False)
  97. self._sort_formats(formats)
  98. upload_date = unified_strdate(sd['created_date'])
  99. entries.append({
  100. '_type': 'video',
  101. 'id': video_id,
  102. 'title': sd['header'],
  103. 'formats': formats,
  104. 'description': sd.get('description'),
  105. 'duration': int(sd['duration_in_seconds']),
  106. 'upload_date': upload_date,
  107. 'thumbnail': sd.get('image_full_url'),
  108. })
  109. return {
  110. '_type': 'playlist',
  111. 'entries': entries,
  112. 'id': playlist_id,
  113. }
  114. class ORFOE1IE(InfoExtractor):
  115. IE_NAME = 'orf:oe1'
  116. IE_DESC = 'Radio Österreich 1'
  117. _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
  118. # Audios on ORF radio are only available for 7 days, so we can't add tests.
  119. _TEST = {
  120. 'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
  121. 'only_matching': True,
  122. }
  123. def _real_extract(self, url):
  124. show_id = self._match_id(url)
  125. data = self._download_json(
  126. 'http://oe1.orf.at/programm/%s/konsole' % show_id,
  127. show_id
  128. )
  129. timestamp = datetime.datetime.strptime('%s %s' % (
  130. data['item']['day_label'],
  131. data['item']['time']
  132. ), '%d.%m.%Y %H:%M')
  133. unix_timestamp = calendar.timegm(timestamp.utctimetuple())
  134. return {
  135. 'id': show_id,
  136. 'title': data['item']['title'],
  137. 'url': data['item']['url_stream'],
  138. 'ext': 'mp3',
  139. 'description': data['item'].get('info'),
  140. 'timestamp': unix_timestamp
  141. }
  142. class ORFFM4IE(InfoExtractor):
  143. IE_NAME = 'orf:fm4'
  144. IE_DESC = 'radio FM4'
  145. _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
  146. def _real_extract(self, url):
  147. mobj = re.match(self._VALID_URL, url)
  148. show_date = mobj.group('date')
  149. show_id = mobj.group('show')
  150. data = self._download_json(
  151. 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
  152. show_id
  153. )
  154. def extract_entry_dict(info, title, subtitle):
  155. return {
  156. 'id': info['loopStreamId'].replace('.mp3', ''),
  157. 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
  158. 'title': title,
  159. 'description': subtitle,
  160. 'duration': (info['end'] - info['start']) / 1000,
  161. 'timestamp': info['start'] / 1000,
  162. 'ext': 'mp3'
  163. }
  164. entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
  165. return {
  166. '_type': 'playlist',
  167. 'id': show_id,
  168. 'title': data['title'],
  169. 'description': data['subtitle'],
  170. 'entries': entries
  171. }