You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

97 lines
3.5 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from .facebook import FacebookIE
  7. class BuzzFeedIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
  9. _TESTS = [{
  10. 'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
  11. 'info_dict': {
  12. 'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
  13. 'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
  14. 'description': 'Rambro!',
  15. },
  16. 'playlist': [{
  17. 'info_dict': {
  18. 'id': 'aVCR29aE_OQ',
  19. 'ext': 'mp4',
  20. 'title': 'Angry Ram destroys a punching bag..',
  21. 'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
  22. 'upload_date': '20141024',
  23. 'uploader_id': 'Buddhanz1',
  24. 'uploader': 'Angry Ram',
  25. }
  26. }]
  27. }, {
  28. 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
  29. 'params': {
  30. 'skip_download': True, # Got enough YouTube download tests
  31. },
  32. 'info_dict': {
  33. 'id': 'look-at-this-cute-dog-omg',
  34. 'description': 're:Munchkin the Teddy Bear is back ?!',
  35. 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
  36. },
  37. 'playlist': [{
  38. 'info_dict': {
  39. 'id': 'mVmBL8B-In0',
  40. 'ext': 'mp4',
  41. 'title': 're:Munchkin the Teddy Bear gets her exercise',
  42. 'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
  43. 'upload_date': '20141124',
  44. 'uploader_id': 'CindysMunchkin',
  45. 'uploader': 're:^Munchkin the',
  46. },
  47. }]
  48. }, {
  49. 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
  50. 'info_dict': {
  51. 'id': 'the-most-adorable-crash-landing-ever',
  52. 'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
  53. 'description': 'This gosling knows how to stick a landing.',
  54. },
  55. 'playlist': [{
  56. 'md5': '763ca415512f91ca62e4621086900a23',
  57. 'info_dict': {
  58. 'id': '971793786185728',
  59. 'ext': 'mp4',
  60. 'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
  61. 'uploader': 'Calgary Outdoor Centre-University of Calgary',
  62. },
  63. }],
  64. 'add_ie': ['Facebook'],
  65. }]
  66. def _real_extract(self, url):
  67. playlist_id = self._match_id(url)
  68. webpage = self._download_webpage(url, playlist_id)
  69. all_buckets = re.findall(
  70. r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
  71. webpage)
  72. entries = []
  73. for bd_json in all_buckets:
  74. bd = json.loads(bd_json)
  75. video = bd.get('video') or bd.get('progload_video')
  76. if not video:
  77. continue
  78. entries.append(self.url_result(video['url']))
  79. facebook_url = FacebookIE._extract_url(webpage)
  80. if facebook_url:
  81. entries.append(self.url_result(facebook_url))
  82. return {
  83. '_type': 'playlist',
  84. 'id': playlist_id,
  85. 'title': self._og_search_title(webpage),
  86. 'description': self._og_search_description(webpage),
  87. 'entries': entries,
  88. }