You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

98 lines
3.4 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. HEADRequest,
  8. unified_strdate,
  9. )
  10. class ORFIE(InfoExtractor):
  11. _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
  12. _TEST = {
  13. 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747',
  14. 'file': '7319747.mp4',
  15. 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375',
  16. 'info_dict': {
  17. 'title': 'Was Sie schon immer über Klassik wissen wollten',
  18. 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4',
  19. 'duration': 3508,
  20. 'upload_date': '20140105',
  21. },
  22. 'skip': 'Blocked outside of Austria',
  23. }
  24. def _real_extract(self, url):
  25. mobj = re.match(self._VALID_URL, url)
  26. playlist_id = mobj.group('id')
  27. webpage = self._download_webpage(url, playlist_id)
  28. data_json = self._search_regex(
  29. r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
  30. all_data = json.loads(data_json)
  31. sdata = all_data[0]['values']['segments']
  32. def quality_to_int(s):
  33. m = re.search('([0-9]+)', s)
  34. if m is None:
  35. return -1
  36. return int(m.group(1))
  37. entries = []
  38. for sd in sdata:
  39. video_id = sd['id']
  40. formats = [{
  41. 'preference': -10 if fd['delivery'] == 'hls' else None,
  42. 'format_id': '%s-%s-%s' % (
  43. fd['delivery'], fd['quality'], fd['quality_string']),
  44. 'url': fd['src'],
  45. 'protocol': fd['protocol'],
  46. 'quality': quality_to_int(fd['quality']),
  47. } for fd in sd['playlist_item_array']['sources']]
  48. # Check for geoblocking.
  49. # There is a property is_geoprotection, but that's always false
  50. geo_str = sd.get('geoprotection_string')
  51. if geo_str:
  52. try:
  53. http_url = next(
  54. f['url']
  55. for f in formats
  56. if re.match(r'^https?://.*\.mp4$', f['url']))
  57. except StopIteration:
  58. pass
  59. else:
  60. req = HEADRequest(http_url)
  61. self._request_webpage(
  62. req, video_id,
  63. note='Testing for geoblocking',
  64. errnote=((
  65. 'This video seems to be blocked outside of %s. '
  66. 'You may want to try the streaming-* formats.')
  67. % geo_str),
  68. fatal=False)
  69. self._sort_formats(formats)
  70. upload_date = unified_strdate(sd['created_date'])
  71. entries.append({
  72. '_type': 'video',
  73. 'id': video_id,
  74. 'title': sd['header'],
  75. 'formats': formats,
  76. 'description': sd.get('description'),
  77. 'duration': int(sd['duration_in_seconds']),
  78. 'upload_date': upload_date,
  79. 'thumbnail': sd.get('image_full_url'),
  80. })
  81. return {
  82. '_type': 'playlist',
  83. 'entries': entries,
  84. 'id': playlist_id,
  85. }