You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 lines
4.4 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. orderedSet,
  8. parse_duration,
  9. try_get,
  10. )
  11. class MarkizaIE(InfoExtractor):
  12. _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
  13. _TESTS = [{
  14. 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
  15. 'md5': 'ada4e9fad038abeed971843aa028c7b0',
  16. 'info_dict': {
  17. 'id': '139078',
  18. 'ext': 'mp4',
  19. 'title': 'Oteckovia 109',
  20. 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  21. 'thumbnail': r're:^https?://.*\.jpg$',
  22. 'duration': 2760,
  23. },
  24. }, {
  25. 'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
  26. 'info_dict': {
  27. 'id': '85430',
  28. 'title': 'Televízne noviny',
  29. },
  30. 'playlist_count': 23,
  31. }, {
  32. 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
  33. 'only_matching': True,
  34. }, {
  35. 'url': 'http://videoarchiv.markiza.sk/video/84723',
  36. 'only_matching': True,
  37. }, {
  38. 'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
  39. 'only_matching': True,
  40. }, {
  41. 'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
  42. 'only_matching': True,
  43. }, {
  44. 'url': 'http://videoarchiv.markiza.sk/embed/85295',
  45. 'only_matching': True,
  46. }]
  47. def _real_extract(self, url):
  48. video_id = self._match_id(url)
  49. data = self._download_json(
  50. 'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
  51. video_id, query={'id': video_id})
  52. info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
  53. if info.get('_type') == 'playlist':
  54. info.update({
  55. 'id': video_id,
  56. 'title': try_get(
  57. data, lambda x: x['details']['name'], compat_str),
  58. })
  59. else:
  60. info['duration'] = parse_duration(
  61. try_get(data, lambda x: x['details']['duration'], compat_str))
  62. return info
  63. class MarkizaPageIE(InfoExtractor):
  64. _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
  65. _TESTS = [{
  66. 'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
  67. 'md5': 'ada4e9fad038abeed971843aa028c7b0',
  68. 'info_dict': {
  69. 'id': '139355',
  70. 'ext': 'mp4',
  71. 'title': 'Oteckovia 110',
  72. 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  73. 'thumbnail': r're:^https?://.*\.jpg$',
  74. 'duration': 2604,
  75. },
  76. 'params': {
  77. 'skip_download': True,
  78. },
  79. }, {
  80. 'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
  81. 'only_matching': True,
  82. }, {
  83. 'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
  84. 'only_matching': True,
  85. }, {
  86. 'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
  87. 'only_matching': True,
  88. }, {
  89. 'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
  90. 'only_matching': True,
  91. }, {
  92. 'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
  93. 'only_matching': True,
  94. }]
  95. @classmethod
  96. def suitable(cls, url):
  97. return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
  98. def _real_extract(self, url):
  99. playlist_id = self._match_id(url)
  100. webpage = self._download_webpage(
  101. # Downloading for some hosts (e.g. dajto, doma) fails with 500
  102. # although everything seems to be OK, so considering 500
  103. # status code to be expected.
  104. url, playlist_id, expected_status=500)
  105. entries = [
  106. self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
  107. for video_id in orderedSet(re.findall(
  108. r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
  109. webpage))]
  110. return self.playlist_result(entries, playlist_id)