You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

108 lines
3.7 KiB

10 years ago
  1. from __future__ import unicode_literals
  2. from .common import InfoExtractor
  3. from ..compat import compat_urllib_parse
  4. from ..utils import (
  5. xpath_text,
  6. xpath_with_ns,
  7. int_or_none,
  8. parse_iso8601,
  9. )
  10. class BetIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
  12. _TESTS = [
  13. {
  14. 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
  15. 'info_dict': {
  16. 'id': '406429c6-1b8a-463e-83fc-814adb81a9db',
  17. 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
  18. 'ext': 'flv',
  19. 'title': 'BET News Presents: A Conversation With President Obama',
  20. 'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
  21. 'duration': 1534,
  22. 'timestamp': 1418075340,
  23. 'upload_date': '20141208',
  24. 'uploader': 'admin',
  25. 'thumbnail': 're:(?i)^https?://.*\.jpg$',
  26. },
  27. 'params': {
  28. # rtmp download
  29. 'skip_download': True,
  30. },
  31. },
  32. {
  33. 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
  34. 'info_dict': {
  35. 'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
  36. 'display_id': 'justice-for-ferguson-a-community-reacts',
  37. 'ext': 'flv',
  38. 'title': 'Justice for Ferguson: A Community Reacts',
  39. 'description': 'A BET News special.',
  40. 'duration': 1696,
  41. 'timestamp': 1416942360,
  42. 'upload_date': '20141125',
  43. 'uploader': 'admin',
  44. 'thumbnail': 're:(?i)^https?://.*\.jpg$',
  45. },
  46. 'params': {
  47. # rtmp download
  48. 'skip_download': True,
  49. },
  50. }
  51. ]
  52. def _real_extract(self, url):
  53. display_id = self._match_id(url)
  54. webpage = self._download_webpage(url, display_id)
  55. media_url = compat_urllib_parse.unquote(self._search_regex(
  56. [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
  57. webpage, 'media URL'))
  58. mrss = self._download_xml(media_url, display_id)
  59. item = mrss.find('./channel/item')
  60. NS_MAP = {
  61. 'dc': 'http://purl.org/dc/elements/1.1/',
  62. 'media': 'http://search.yahoo.com/mrss/',
  63. 'ka': 'http://kickapps.com/karss',
  64. }
  65. title = xpath_text(item, './title', 'title')
  66. description = xpath_text(
  67. item, './description', 'description', fatal=False)
  68. video_id = xpath_text(item, './guid', 'video id', fatal=False)
  69. timestamp = parse_iso8601(xpath_text(
  70. item, xpath_with_ns('./dc:date', NS_MAP),
  71. 'upload date', fatal=False))
  72. uploader = xpath_text(
  73. item, xpath_with_ns('./dc:creator', NS_MAP),
  74. 'uploader', fatal=False)
  75. media_content = item.find(
  76. xpath_with_ns('./media:content', NS_MAP))
  77. duration = int_or_none(media_content.get('duration'))
  78. smil_url = media_content.get('url')
  79. thumbnail = media_content.find(
  80. xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
  81. formats = self._extract_smil_formats(smil_url, display_id)
  82. return {
  83. 'id': video_id,
  84. 'display_id': display_id,
  85. 'title': title,
  86. 'description': description,
  87. 'thumbnail': thumbnail,
  88. 'timestamp': timestamp,
  89. 'uploader': uploader,
  90. 'duration': duration,
  91. 'formats': formats,
  92. }