You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.2 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import js_to_json
  6. class SportBoxEmbedIE(InfoExtractor):
  7. _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
  8. _TESTS = [{
  9. 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
  10. 'info_dict': {
  11. 'id': '211355',
  12. 'ext': 'mp4',
  13. 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
  14. 'thumbnail': r're:^https?://.*\.jpg$',
  15. },
  16. 'params': {
  17. # m3u8 download
  18. 'skip_download': True,
  19. },
  20. }, {
  21. 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
  22. 'only_matching': True,
  23. }]
  24. @staticmethod
  25. def _extract_urls(webpage):
  26. return re.findall(
  27. r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
  28. webpage)
  29. def _real_extract(self, url):
  30. video_id = self._match_id(url)
  31. webpage = self._download_webpage(url, video_id)
  32. formats = []
  33. def cleanup_js(code):
  34. # desktop_advert_config contains complex Javascripts and we don't need it
  35. return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
  36. jwplayer_data = self._parse_json(self._search_regex(
  37. r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
  38. transform_source=cleanup_js)
  39. hls_url = jwplayer_data.get('hls_url')
  40. if hls_url:
  41. formats.extend(self._extract_m3u8_formats(
  42. hls_url, video_id, ext='mp4', m3u8_id='hls'))
  43. rtsp_url = jwplayer_data.get('rtsp_url')
  44. if rtsp_url:
  45. formats.append({
  46. 'url': rtsp_url,
  47. 'format_id': 'rtsp',
  48. })
  49. self._sort_formats(formats)
  50. title = jwplayer_data['node_title']
  51. thumbnail = jwplayer_data.get('image_url')
  52. return {
  53. 'id': video_id,
  54. 'title': title,
  55. 'thumbnail': thumbnail,
  56. 'formats': formats,
  57. }