You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.2 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. import itertools
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_urllib_request,
  8. )
  9. class BambuserIE(InfoExtractor):
  10. IE_NAME = 'bambuser'
  11. _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
  12. _API_KEY = '005f64509e19a868399060af746a00aa'
  13. _TEST = {
  14. 'url': 'http://bambuser.com/v/4050584',
  15. # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
  16. # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
  17. 'info_dict': {
  18. 'id': '4050584',
  19. 'ext': 'flv',
  20. 'title': 'Education engineering days - lightning talks',
  21. 'duration': 3741,
  22. 'uploader': 'pixelversity',
  23. 'uploader_id': '344706',
  24. },
  25. 'params': {
  26. # It doesn't respect the 'Range' header, it would download the whole video
  27. # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
  28. 'skip_download': True,
  29. },
  30. }
  31. def _real_extract(self, url):
  32. mobj = re.match(self._VALID_URL, url)
  33. video_id = mobj.group('id')
  34. info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
  35. '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
  36. info_json = self._download_webpage(info_url, video_id)
  37. info = json.loads(info_json)['result']
  38. return {
  39. 'id': video_id,
  40. 'title': info['title'],
  41. 'url': info['url'],
  42. 'thumbnail': info.get('preview'),
  43. 'duration': int(info['length']),
  44. 'view_count': int(info['views_total']),
  45. 'uploader': info['username'],
  46. 'uploader_id': info['owner']['uid'],
  47. }
  48. class BambuserChannelIE(InfoExtractor):
  49. IE_NAME = 'bambuser:channel'
  50. _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
  51. # The maximum number we can get with each request
  52. _STEP = 50
  53. _TEST = {
  54. 'url': 'http://bambuser.com/channel/pixelversity',
  55. 'info_dict': {
  56. 'title': 'pixelversity',
  57. },
  58. 'playlist_mincount': 60,
  59. }
  60. def _real_extract(self, url):
  61. mobj = re.match(self._VALID_URL, url)
  62. user = mobj.group('user')
  63. urls = []
  64. last_id = ''
  65. for i in itertools.count(1):
  66. req_url = (
  67. 'http://bambuser.com/xhr-api/index.php?username={user}'
  68. '&sort=created&access_mode=0%2C1%2C2&limit={count}'
  69. '&method=broadcast&format=json&vid_older_than={last}'
  70. ).format(user=user, count=self._STEP, last=last_id)
  71. req = compat_urllib_request.Request(req_url)
  72. # Without setting this header, we wouldn't get any result
  73. req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
  74. data = self._download_json(
  75. req, user, 'Downloading page %d' % i)
  76. results = data['result']
  77. if not results:
  78. break
  79. last_id = results[-1]['vid']
  80. urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
  81. return {
  82. '_type': 'playlist',
  83. 'title': user,
  84. 'entries': urls,
  85. }