You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

108 lines
3.7 KiB

  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..compat import compat_urllib_parse_unquote
  4. from ..utils import (
  5. xpath_text,
  6. xpath_with_ns,
  7. int_or_none,
  8. parse_iso8601,
  9. )
  10. class BetIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
  12. _TESTS = [
  13. {
  14. 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
  15. 'info_dict': {
  16. 'id': 'news/national/2014/a-conversation-with-president-obama',
  17. 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
  18. 'ext': 'flv',
  19. 'title': 'A Conversation With President Obama',
  20. 'description': 'md5:699d0652a350cf3e491cd15cc745b5da',
  21. 'duration': 1534,
  22. 'timestamp': 1418075340,
  23. 'upload_date': '20141208',
  24. 'uploader': 'admin',
  25. 'thumbnail': 're:(?i)^https?://.*\.jpg$',
  26. },
  27. 'params': {
  28. # rtmp download
  29. 'skip_download': True,
  30. },
  31. },
  32. {
  33. 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
  34. 'info_dict': {
  35. 'id': 'news/national/2014/justice-for-ferguson-a-community-reacts',
  36. 'display_id': 'justice-for-ferguson-a-community-reacts',
  37. 'ext': 'flv',
  38. 'title': 'Justice for Ferguson: A Community Reacts',
  39. 'description': 'A BET News special.',
  40. 'duration': 1696,
  41. 'timestamp': 1416942360,
  42. 'upload_date': '20141125',
  43. 'uploader': 'admin',
  44. 'thumbnail': 're:(?i)^https?://.*\.jpg$',
  45. },
  46. 'params': {
  47. # rtmp download
  48. 'skip_download': True,
  49. },
  50. }
  51. ]
  52. def _real_extract(self, url):
  53. display_id = self._match_id(url)
  54. webpage = self._download_webpage(url, display_id)
  55. media_url = compat_urllib_parse_unquote(self._search_regex(
  56. [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
  57. webpage, 'media URL'))
  58. video_id = self._search_regex(
  59. r'/video/(.*)/_jcr_content/', media_url, 'video id')
  60. mrss = self._download_xml(media_url, display_id)
  61. item = mrss.find('./channel/item')
  62. NS_MAP = {
  63. 'dc': 'http://purl.org/dc/elements/1.1/',
  64. 'media': 'http://search.yahoo.com/mrss/',
  65. 'ka': 'http://kickapps.com/karss',
  66. }
  67. title = xpath_text(item, './title', 'title')
  68. description = xpath_text(
  69. item, './description', 'description', fatal=False)
  70. timestamp = parse_iso8601(xpath_text(
  71. item, xpath_with_ns('./dc:date', NS_MAP),
  72. 'upload date', fatal=False))
  73. uploader = xpath_text(
  74. item, xpath_with_ns('./dc:creator', NS_MAP),
  75. 'uploader', fatal=False)
  76. media_content = item.find(
  77. xpath_with_ns('./media:content', NS_MAP))
  78. duration = int_or_none(media_content.get('duration'))
  79. smil_url = media_content.get('url')
  80. thumbnail = media_content.find(
  81. xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
  82. formats = self._extract_smil_formats(smil_url, display_id)
  83. return {
  84. 'id': video_id,
  85. 'display_id': display_id,
  86. 'title': title,
  87. 'description': description,
  88. 'thumbnail': thumbnail,
  89. 'timestamp': timestamp,
  90. 'uploader': uploader,
  91. 'duration': duration,
  92. 'formats': formats,
  93. }