You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

118 lines
3.6 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import compat_str
  5. from ..utils import (
  6. determine_ext,
  7. float_or_none,
  8. int_or_none,
  9. try_get,
  10. urlencode_postdata,
  11. )
  12. class YandexDiskIE(InfoExtractor):
  13. _VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
  14. _TESTS = [{
  15. 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
  16. 'md5': '33955d7ae052f15853dc41f35f17581c',
  17. 'info_dict': {
  18. 'id': 'VdOeDou8eZs6Y',
  19. 'ext': 'mp4',
  20. 'title': '4.mp4',
  21. 'duration': 168.6,
  22. 'uploader': 'y.botova',
  23. 'uploader_id': '300043621',
  24. 'view_count': int,
  25. },
  26. }, {
  27. 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
  28. 'only_matching': True,
  29. }]
  30. def _real_extract(self, url):
  31. video_id = self._match_id(url)
  32. status = self._download_webpage(
  33. 'https://disk.yandex.com/auth/status', video_id, query={
  34. 'urlOrigin': url,
  35. 'source': 'public',
  36. 'md5': 'false',
  37. })
  38. sk = self._search_regex(
  39. r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
  40. status, 'sk', group='value')
  41. webpage = self._download_webpage(url, video_id)
  42. models = self._parse_json(
  43. self._search_regex(
  44. r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
  45. webpage, 'video JSON'),
  46. video_id)
  47. data = next(
  48. model['data'] for model in models
  49. if model.get('model') == 'resource')
  50. video_hash = data['id']
  51. title = data['name']
  52. models = self._download_json(
  53. 'https://disk.yandex.com/models/', video_id,
  54. data=urlencode_postdata({
  55. '_model.0': 'videoInfo',
  56. 'id.0': video_hash,
  57. '_model.1': 'do-get-resource-url',
  58. 'id.1': video_hash,
  59. 'version': '13.6',
  60. 'sk': sk,
  61. }), query={'_m': 'videoInfo'})['models']
  62. videos = try_get(models, lambda x: x[0]['data']['videos'], list) or []
  63. source_url = try_get(
  64. models, lambda x: x[1]['data']['file'], compat_str)
  65. formats = []
  66. if source_url:
  67. formats.append({
  68. 'url': source_url,
  69. 'format_id': 'source',
  70. 'ext': determine_ext(title, 'mp4'),
  71. 'quality': 1,
  72. })
  73. for video in videos:
  74. format_url = video.get('url')
  75. if not format_url:
  76. continue
  77. if determine_ext(format_url) == 'm3u8':
  78. formats.extend(self._extract_m3u8_formats(
  79. format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  80. m3u8_id='hls', fatal=False))
  81. else:
  82. formats.append({
  83. 'url': format_url,
  84. })
  85. self._sort_formats(formats)
  86. duration = float_or_none(try_get(
  87. models, lambda x: x[0]['data']['duration']), 1000)
  88. uploader = try_get(
  89. data, lambda x: x['user']['display_name'], compat_str)
  90. uploader_id = try_get(
  91. data, lambda x: x['user']['uid'], compat_str)
  92. view_count = int_or_none(try_get(
  93. data, lambda x: x['meta']['views_counter']))
  94. return {
  95. 'id': video_id,
  96. 'title': title,
  97. 'duration': duration,
  98. 'uploader': uploader,
  99. 'uploader_id': uploader_id,
  100. 'view_count': view_count,
  101. 'formats': formats,
  102. }