You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

98 lines
3.2 KiB

  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. ExtractorError,
  8. clean_html,
  9. unified_strdate,
  10. int_or_none,
  11. )
  12. class RTL2IE(InfoExtractor):
  13. """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
  14. _VALID_URL = r'http?://(?P<url>(?P<domain>(www\.)?rtl2\.de)/.*/(?P<video_id>.*))'
  15. _TEST = {
  16. 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
  17. 'md5': 'dsadasdada',
  18. 'info_dict': {
  19. 'id': 'folge-203-0',
  20. 'ext': 'f4v',
  21. 'title': 'GRIP sucht den Sommerk\xf6nig',
  22. 'description' : 'Matthias, Det und Helge treten gegeneinander an.'
  23. # TODO more properties, either as:
  24. # * A value
  25. # * MD5 checksum; start the string with md5:
  26. # * A regular expression; start the string with re:
  27. # * Any Python type (for example int or float)
  28. },
  29. #'params': {
  30. # rtmp download
  31. # 'skip_download': True,
  32. #},
  33. }
  34. def _real_extract(self, url):
  35. mobj = re.match(self._VALID_URL, url)
  36. video_page_url = 'http://%s/' % mobj.group('domain')
  37. video_id = mobj.group('video_id')
  38. webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
  39. vico_id = self._html_search_regex(r'vico_id: ([0-9]+)', webpage, '%s');
  40. vivi_id = self._html_search_regex(r'vivi_id: ([0-9]+)', webpage, '%s');
  41. info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
  42. webpage = self._download_webpage(info_url, '')
  43. video_info = json.loads(webpage.decode("latin1"))
  44. print video_info
  45. #self._download_webpage('http://cp108781.edgefcs.net/crossdomain.xml', '')
  46. download_url = video_info["video"]["streamurl"] # self._html_search_regex(r'streamurl\":\"(.*?)\"', webpage, '%s');
  47. title = video_info["video"]["titel"] # self._html_search_regex(r'titel\":\"(.*?)\"', webpage, '%s');
  48. description = video_info["video"]["beschreibung"] # self._html_search_regex(r'beschreibung\":\"(.*?)\"', webpage, '%s');
  49. #ext = self._html_search_regex(r'streamurl\":\".*?(\..{2,4})\"', webpage, '%s');
  50. thumbnail = video_info["video"]["image"]
  51. download_url = download_url.replace("\\", "")
  52. stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, '%s');
  53. #upload_date = self._html_search_regex(r'property=\"dc:date\".*?datatype=\"xsd:dateTime\".*?content=\"(.*?)\"', webpage, 'title')
  54. #download_url += " -y " + stream_url
  55. #print stream_url
  56. #print download_url
  57. #print description
  58. #print title
  59. #print ext
  60. formats = []
  61. fmt = {
  62. 'url' : download_url,
  63. #'app': 'ondemand?_fcs_vhost=cp108781.edgefcs.net',
  64. 'play_path': stream_url,
  65. #'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
  66. #'page_url': 'http://www.cbsnews.com',
  67. #'ext': ext,
  68. }
  69. formats.append(fmt)
  70. return {
  71. 'id': video_id,
  72. 'title': title,
  73. 'thumbnail' : thumbnail,
  74. 'description' : description,
  75. 'formats': formats,
  76. }