You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
3.5 KiB

10 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import compat_urlparse
  5. from ..utils import (
  6. int_or_none,
  7. qualities,
  8. unified_strdate,
  9. )
  10. class FirstTVIE(InfoExtractor):
  11. IE_NAME = '1tv'
  12. IE_DESC = 'Первый канал'
  13. _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
  14. _TESTS = [{
  15. # single format
  16. 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
  17. 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
  18. 'info_dict': {
  19. 'id': '40049',
  20. 'ext': 'mp4',
  21. 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
  22. 'description': 'md5:36a39c1d19618fec57d12efe212a8370',
  23. 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
  24. 'upload_date': '20150212',
  25. 'duration': 2694,
  26. },
  27. }, {
  28. # multiple formats
  29. 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
  30. 'info_dict': {
  31. 'id': '364746',
  32. 'ext': 'mp4',
  33. 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
  34. 'description': 'md5:a242eea0031fd180a4497d52640a9572',
  35. 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
  36. 'upload_date': '20160407',
  37. 'duration': 179,
  38. 'formats': 'mincount:3',
  39. },
  40. 'params': {
  41. 'skip_download': True,
  42. },
  43. }]
  44. def _real_extract(self, url):
  45. display_id = self._match_id(url)
  46. webpage = self._download_webpage(url, display_id)
  47. playlist_url = compat_urlparse.urljoin(url, self._search_regex(
  48. r'data-playlist-url="([^"]+)', webpage, 'playlist url'))
  49. item = self._download_json(playlist_url, display_id)[0]
  50. video_id = item['id']
  51. quality = qualities(('ld', 'sd', 'hd', ))
  52. formats = []
  53. for f in item.get('mbr', []):
  54. src = f.get('src')
  55. if not src:
  56. continue
  57. fname = f.get('name')
  58. formats.append({
  59. 'url': src,
  60. 'format_id': fname,
  61. 'quality': quality(fname),
  62. })
  63. self._sort_formats(formats)
  64. title = self._html_search_regex(
  65. (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
  66. r"'title'\s*:\s*'([^']+)'"),
  67. webpage, 'title', default=None) or item['title']
  68. description = self._html_search_regex(
  69. r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
  70. webpage, 'description', default=None) or self._html_search_meta(
  71. 'description', webpage, 'description')
  72. duration = int_or_none(self._html_search_meta(
  73. 'video:duration', webpage, 'video duration', fatal=False))
  74. upload_date = unified_strdate(self._html_search_meta(
  75. 'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
  76. return {
  77. 'id': video_id,
  78. 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
  79. 'title': title,
  80. 'description': description,
  81. 'upload_date': upload_date,
  82. 'duration': int_or_none(duration),
  83. 'formats': formats
  84. }