You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

159 lines
5.7 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. int_or_none,
  7. remove_end,
  8. )
  9. class MailRuIE(InfoExtractor):
  10. IE_NAME = 'mailru'
  11. IE_DESC = 'Видео@Mail.Ru'
  12. _VALID_URL = r'''(?x)
  13. https?://
  14. (?:(?:www|m)\.)?my\.mail\.ru/
  15. (?:
  16. video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
  17. (?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
  18. (?:video/embed|\+/video/meta)/(?P<metaid>\d+)
  19. )
  20. '''
  21. _TESTS = [
  22. {
  23. 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
  24. 'md5': 'dea205f03120046894db4ebb6159879a',
  25. 'info_dict': {
  26. 'id': '46301138_76',
  27. 'ext': 'mp4',
  28. 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
  29. 'timestamp': 1393235077,
  30. 'upload_date': '20140224',
  31. 'uploader': 'sonypicturesrus',
  32. 'uploader_id': 'sonypicturesrus@mail.ru',
  33. 'duration': 184,
  34. },
  35. 'skip': 'Not accessible from Travis CI server',
  36. },
  37. {
  38. 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
  39. 'md5': '00a91a58c3402204dcced523777b475f',
  40. 'info_dict': {
  41. 'id': '46843144_1263',
  42. 'ext': 'mp4',
  43. 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
  44. 'timestamp': 1397039888,
  45. 'upload_date': '20140409',
  46. 'uploader': 'hitech',
  47. 'uploader_id': 'hitech@corp.mail.ru',
  48. 'duration': 245,
  49. },
  50. 'skip': 'Not accessible from Travis CI server',
  51. },
  52. {
  53. # only available via metaUrl API
  54. 'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
  55. 'md5': '3b26d2491c6949d031a32b96bd97c096',
  56. 'info_dict': {
  57. 'id': '56664382_502',
  58. 'ext': 'mp4',
  59. 'title': ':8336',
  60. 'timestamp': 1449094163,
  61. 'upload_date': '20151202',
  62. 'uploader': '720pizle@mail.ru',
  63. 'uploader_id': '720pizle@mail.ru',
  64. 'duration': 6001,
  65. },
  66. 'skip': 'Not accessible from Travis CI server',
  67. },
  68. {
  69. 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
  70. 'only_matching': True,
  71. },
  72. {
  73. 'url': 'https://my.mail.ru/video/embed/7949340477499637815',
  74. 'only_matching': True,
  75. },
  76. {
  77. 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
  78. 'only_matching': True,
  79. }
  80. ]
  81. def _real_extract(self, url):
  82. mobj = re.match(self._VALID_URL, url)
  83. meta_id = mobj.group('metaid')
  84. video_id = None
  85. if meta_id:
  86. meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id
  87. else:
  88. video_id = mobj.group('idv1')
  89. if not video_id:
  90. video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
  91. webpage = self._download_webpage(url, video_id)
  92. page_config = self._parse_json(self._search_regex(
  93. r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
  94. webpage, 'page config', default='{}'), video_id, fatal=False)
  95. if page_config:
  96. meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
  97. else:
  98. meta_url = None
  99. video_data = None
  100. if meta_url:
  101. video_data = self._download_json(
  102. meta_url, video_id or meta_id, 'Downloading video meta JSON',
  103. fatal=not video_id)
  104. # Fallback old approach
  105. if not video_data:
  106. video_data = self._download_json(
  107. 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
  108. video_id, 'Downloading video JSON')
  109. formats = []
  110. for f in video_data['videos']:
  111. video_url = f.get('url')
  112. if not video_url:
  113. continue
  114. format_id = f.get('key')
  115. height = int_or_none(self._search_regex(
  116. r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
  117. formats.append({
  118. 'url': video_url,
  119. 'format_id': format_id,
  120. 'height': height,
  121. })
  122. self._sort_formats(formats)
  123. meta_data = video_data['meta']
  124. title = remove_end(meta_data['title'], '.mp4')
  125. author = video_data.get('author')
  126. uploader = author.get('name')
  127. uploader_id = author.get('id') or author.get('email')
  128. view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
  129. acc_id = meta_data.get('accId')
  130. item_id = meta_data.get('itemId')
  131. content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
  132. thumbnail = meta_data.get('poster')
  133. duration = int_or_none(meta_data.get('duration'))
  134. timestamp = int_or_none(meta_data.get('timestamp'))
  135. return {
  136. 'id': content_id,
  137. 'title': title,
  138. 'thumbnail': thumbnail,
  139. 'timestamp': timestamp,
  140. 'uploader': uploader,
  141. 'uploader_id': uploader_id,
  142. 'duration': duration,
  143. 'view_count': view_count,
  144. 'formats': formats,
  145. }