You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
3.2 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. qualities,
  10. unescapeHTML,
  11. )
  12. class YapFilesIE(InfoExtractor):
  13. _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
  14. _VALID_URL = r'https?:%s' % _YAPFILES_URL
  15. _TESTS = [{
  16. # with hd
  17. 'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
  18. 'md5': '2db19e2bfa2450568868548a1aa1956c',
  19. 'info_dict': {
  20. 'id': 'vMDE1NjcyNDUt0413',
  21. 'ext': 'mp4',
  22. 'title': 'Самый худший пароль WIFI',
  23. 'thumbnail': r're:^https?://.*\.jpg$',
  24. 'duration': 72,
  25. },
  26. }, {
  27. # without hd
  28. 'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
  29. 'only_matching': True,
  30. }]
  31. @staticmethod
  32. def _extract_urls(webpage):
  33. return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
  34. r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
  35. % YapFilesIE._YAPFILES_URL, webpage)]
  36. def _real_extract(self, url):
  37. video_id = self._match_id(url)
  38. webpage = self._download_webpage(url, video_id, fatal=False)
  39. player_url = None
  40. query = {}
  41. if webpage:
  42. player_url = self._search_regex(
  43. r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
  44. 'player url', default=None, group='url')
  45. if not player_url:
  46. player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
  47. query = {
  48. 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
  49. 'type': 'json',
  50. 'ref': url,
  51. }
  52. player = self._download_json(
  53. player_url, video_id, query=query)['player']
  54. playlist_url = player['playlist']
  55. title = player['title']
  56. thumbnail = player.get('poster')
  57. if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
  58. raise ExtractorError(
  59. 'Video %s has been removed' % video_id, expected=True)
  60. playlist = self._download_json(
  61. playlist_url, video_id)['player']['main']
  62. hd_height = int_or_none(player.get('hd'))
  63. QUALITIES = ('sd', 'hd')
  64. quality_key = qualities(QUALITIES)
  65. formats = []
  66. for format_id in QUALITIES:
  67. is_hd = format_id == 'hd'
  68. format_url = playlist.get(
  69. 'file%s' % ('_hd' if is_hd else ''))
  70. if not format_url or not isinstance(format_url, compat_str):
  71. continue
  72. formats.append({
  73. 'url': format_url,
  74. 'format_id': format_id,
  75. 'quality': quality_key(format_id),
  76. 'height': hd_height if is_hd else None,
  77. })
  78. self._sort_formats(formats)
  79. return {
  80. 'id': video_id,
  81. 'title': title,
  82. 'thumbnail': thumbnail,
  83. 'duration': int_or_none(player.get('length')),
  84. 'formats': formats,
  85. }