You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

40 lines
1.3 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. class BreakIE(InfoExtractor):
  6. _VALID_URL = r'http://(?:www\.)?break\.com/video/([^/]+)'
  7. _TEST = {
  8. 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
  9. 'md5': 'a3513fb1547fba4fb6cfac1bffc6c46b',
  10. 'info_dict': {
  11. 'id': '2468056',
  12. 'ext': 'mp4',
  13. 'title': 'When Girls Act Like D-Bags',
  14. }
  15. }
  16. def _real_extract(self, url):
  17. mobj = re.match(self._VALID_URL, url)
  18. video_id = mobj.group(1).split("-")[-1]
  19. embed_url = 'http://www.break.com/embed/%s' % video_id
  20. webpage = self._download_webpage(embed_url, video_id)
  21. info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
  22. webpage, 'info json', flags=re.DOTALL)
  23. info = json.loads(info_json)
  24. video_url = info['videoUri']
  25. youtube_id = info.get('youtubeId')
  26. if youtube_id:
  27. return self.url_result(youtube_id, 'Youtube')
  28. final_url = video_url + '?' + info['AuthToken']
  29. return {
  30. 'id': video_id,
  31. 'url': final_url,
  32. 'title': info['contentName'],
  33. 'thumbnail': info['thumbUri'],
  34. }