You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
2.3 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..compat import compat_urlparse
  6. from ..utils import (
  7. js_to_json,
  8. unified_strdate,
  9. )
  10. class SportBoxEmbedIE(InfoExtractor):
  11. _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
  12. _TESTS = [{
  13. 'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
  14. 'info_dict': {
  15. 'id': '211355',
  16. 'ext': 'mp4',
  17. 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
  18. 'thumbnail': r're:^https?://.*\.jpg$',
  19. },
  20. 'params': {
  21. # m3u8 download
  22. 'skip_download': True,
  23. },
  24. }, {
  25. 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
  26. 'only_matching': True,
  27. }]
  28. @staticmethod
  29. def _extract_urls(webpage):
  30. return re.findall(
  31. r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
  32. webpage)
  33. def _real_extract(self, url):
  34. video_id = self._match_id(url)
  35. webpage = self._download_webpage(url, video_id)
  36. formats = []
  37. def cleanup_js(code):
  38. # desktop_advert_config contains complex Javascripts and we don't need it
  39. return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
  40. jwplayer_data = self._parse_json(self._search_regex(
  41. r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
  42. transform_source=cleanup_js)
  43. hls_url = jwplayer_data.get('hls_url')
  44. if hls_url:
  45. formats.extend(self._extract_m3u8_formats(
  46. hls_url, video_id, ext='mp4', m3u8_id='hls'))
  47. rtsp_url = jwplayer_data.get('rtsp_url')
  48. if rtsp_url:
  49. formats.append({
  50. 'url': rtsp_url,
  51. 'format_id': 'rtsp',
  52. })
  53. self._sort_formats(formats)
  54. title = jwplayer_data['node_title']
  55. thumbnail = jwplayer_data.get('image_url')
  56. return {
  57. 'id': video_id,
  58. 'title': title,
  59. 'thumbnail': thumbnail,
  60. 'formats': formats,
  61. }