You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

79 lines
2.4 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. determine_ext,
  7. int_or_none,
  8. js_to_json,
  9. )
  10. class SportBoxEmbedIE(InfoExtractor):
  11. _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
  12. _TESTS = [{
  13. 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
  14. 'info_dict': {
  15. 'id': '211355',
  16. 'ext': 'mp4',
  17. 'title': '211355',
  18. 'thumbnail': r're:^https?://.*\.jpg$',
  19. 'duration': 292,
  20. 'view_count': int,
  21. },
  22. 'params': {
  23. # m3u8 download
  24. 'skip_download': True,
  25. },
  26. }, {
  27. 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
  28. 'only_matching': True,
  29. }, {
  30. 'url': 'https://news.sportbox.ru/vdl/player/media/193095',
  31. 'only_matching': True,
  32. }]
  33. @staticmethod
  34. def _extract_urls(webpage):
  35. return re.findall(
  36. r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
  37. webpage)
  38. def _real_extract(self, url):
  39. video_id = self._match_id(url)
  40. webpage = self._download_webpage(url, video_id)
  41. wjplayer_data = self._parse_json(
  42. self._search_regex(
  43. r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'),
  44. video_id, transform_source=js_to_json)
  45. formats = []
  46. for source in wjplayer_data['sources']:
  47. src = source.get('src')
  48. if not src:
  49. continue
  50. if determine_ext(src) == 'm3u8':
  51. formats.extend(self._extract_m3u8_formats(
  52. src, video_id, 'mp4', entry_protocol='m3u8_native',
  53. m3u8_id='hls', fatal=False))
  54. else:
  55. formats.append({
  56. 'url': src,
  57. })
  58. self._sort_formats(formats)
  59. view_count = int_or_none(self._search_regex(
  60. r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
  61. return {
  62. 'id': video_id,
  63. 'title': video_id,
  64. 'thumbnail': wjplayer_data.get('poster'),
  65. 'duration': int_or_none(wjplayer_data.get('duration')),
  66. 'view_count': view_count,
  67. 'formats': formats,
  68. }