You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

88 lines
3.0 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. ExtractorError,
  6. unified_strdate,
  7. int_or_none,
  8. )
  9. class Puls4IE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
  11. _TESTS = [{
  12. 'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
  13. 'md5': '49f6a6629747eeec43cef6a46b5df81d',
  14. 'info_dict': {
  15. 'id': '2716816',
  16. 'ext': 'mp4',
  17. 'title': 'Pro und Contra vom 23.02.2015',
  18. 'description': 'md5:293e44634d9477a67122489994675db6',
  19. 'duration': 2989,
  20. 'upload_date': '20150224',
  21. 'uploader': 'PULS_4',
  22. },
  23. 'skip': 'Only works from Germany',
  24. }, {
  25. 'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
  26. 'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
  27. 'info_dict': {
  28. 'id': '1298106',
  29. 'ext': 'mp4',
  30. 'title': 'Lucky Fritz',
  31. },
  32. 'skip': 'Only works from Germany',
  33. }]
  34. def _real_extract(self, url):
  35. video_id = self._match_id(url)
  36. webpage = self._download_webpage(url, video_id)
  37. error_message = self._html_search_regex(
  38. r'<div[^>]+class="message-error"[^>]*>(.+?)</div>',
  39. webpage, 'error message', default=None)
  40. if error_message:
  41. raise ExtractorError(
  42. '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
  43. real_url = self._html_search_regex(
  44. r'\"fsk-button\".+?href=\"([^"]+)',
  45. webpage, 'fsk_button', default=None)
  46. if real_url:
  47. webpage = self._download_webpage(real_url, video_id)
  48. player = self._search_regex(
  49. r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
  50. webpage, 'player')
  51. player_json = self._parse_json(
  52. '[%s]' % player, video_id,
  53. transform_source=lambda s: s.replace('undefined,', ''))
  54. formats = None
  55. result = None
  56. for v in player_json:
  57. if isinstance(v, list) and not formats:
  58. formats = [{
  59. 'url': f['url'],
  60. 'format': 'hd' if f.get('hd') else 'sd',
  61. 'width': int_or_none(f.get('size_x')),
  62. 'height': int_or_none(f.get('size_y')),
  63. 'tbr': int_or_none(f.get('bitrate')),
  64. } for f in v]
  65. self._sort_formats(formats)
  66. elif isinstance(v, dict) and not result:
  67. result = {
  68. 'id': video_id,
  69. 'title': v['videopartname'].strip(),
  70. 'description': v.get('videotitle'),
  71. 'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
  72. 'upload_date': unified_strdate(v.get('clipreleasetime')),
  73. 'uploader': v.get('channel'),
  74. }
  75. result['formats'] = formats
  76. return result