You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

107 lines
3.7 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. HEADRequest,
  8. unified_strdate,
  9. ExtractorError,
  10. )
  11. class ORFIE(InfoExtractor):
  12. _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
  13. _TEST = {
  14. 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747',
  15. 'file': '7319747.mp4',
  16. 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375',
  17. 'info_dict': {
  18. 'title': 'Was Sie schon immer über Klassik wissen wollten',
  19. 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4',
  20. 'duration': 3508,
  21. 'upload_date': '20140105',
  22. },
  23. 'skip': 'Blocked outside of Austria',
  24. }
  25. def _real_extract(self, url):
  26. mobj = re.match(self._VALID_URL, url)
  27. playlist_id = mobj.group('id')
  28. webpage = self._download_webpage(url, playlist_id)
  29. data_json = self._search_regex(
  30. r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
  31. all_data = json.loads(data_json)
  32. def get_segments(all_data):
  33. for data in all_data:
  34. if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
  35. return data['values']['segments']
  36. sdata = get_segments(all_data)
  37. if not sdata:
  38. raise ExtractorError('Unable to extract segments')
  39. def quality_to_int(s):
  40. m = re.search('([0-9]+)', s)
  41. if m is None:
  42. return -1
  43. return int(m.group(1))
  44. entries = []
  45. for sd in sdata:
  46. video_id = sd['id']
  47. formats = [{
  48. 'preference': -10 if fd['delivery'] == 'hls' else None,
  49. 'format_id': '%s-%s-%s' % (
  50. fd['delivery'], fd['quality'], fd['quality_string']),
  51. 'url': fd['src'],
  52. 'protocol': fd['protocol'],
  53. 'quality': quality_to_int(fd['quality']),
  54. } for fd in sd['playlist_item_array']['sources']]
  55. # Check for geoblocking.
  56. # There is a property is_geoprotection, but that's always false
  57. geo_str = sd.get('geoprotection_string')
  58. if geo_str:
  59. try:
  60. http_url = next(
  61. f['url']
  62. for f in formats
  63. if re.match(r'^https?://.*\.mp4$', f['url']))
  64. except StopIteration:
  65. pass
  66. else:
  67. req = HEADRequest(http_url)
  68. self._request_webpage(
  69. req, video_id,
  70. note='Testing for geoblocking',
  71. errnote=((
  72. 'This video seems to be blocked outside of %s. '
  73. 'You may want to try the streaming-* formats.')
  74. % geo_str),
  75. fatal=False)
  76. self._sort_formats(formats)
  77. upload_date = unified_strdate(sd['created_date'])
  78. entries.append({
  79. '_type': 'video',
  80. 'id': video_id,
  81. 'title': sd['header'],
  82. 'formats': formats,
  83. 'description': sd.get('description'),
  84. 'duration': int(sd['duration_in_seconds']),
  85. 'upload_date': upload_date,
  86. 'thumbnail': sd.get('image_full_url'),
  87. })
  88. return {
  89. '_type': 'playlist',
  90. 'entries': entries,
  91. 'id': playlist_id,
  92. }