You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 lines
2.1 KiB

  1. import json
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. determine_ext,
  6. )
  7. class ViddlerIE(InfoExtractor):
  8. _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
  9. _TEST = {
  10. u"url": u"http://www.viddler.com/v/43903784",
  11. u'file': u'43903784.mp4',
  12. u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
  13. u'info_dict': {
  14. u"title": u"Video Made Easy",
  15. u"uploader": u"viddler",
  16. u"duration": 100.89,
  17. }
  18. }
  19. def _real_extract(self, url):
  20. mobj = re.match(self._VALID_URL, url)
  21. video_id = mobj.group('id')
  22. embed_url = mobj.group('domain') + u'/embed/' + video_id
  23. webpage = self._download_webpage(embed_url, video_id)
  24. video_sources_code = self._search_regex(
  25. r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
  26. video_sources = json.loads(video_sources_code.replace("'", '"'))
  27. formats = [{
  28. 'url': video_url,
  29. 'format': format_id,
  30. } for video_url, format_id in video_sources.items()]
  31. title = self._html_search_regex(
  32. r"title\s*:\s*'([^']*)'", webpage, u'title')
  33. uploader = self._html_search_regex(
  34. r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
  35. duration_s = self._html_search_regex(
  36. r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
  37. duration = float(duration_s) if duration_s else None
  38. thumbnail = self._html_search_regex(
  39. r"thumbnail\s*:\s*'([^']*)'",
  40. webpage, u'thumbnail', fatal=False)
  41. info = {
  42. '_type': 'video',
  43. 'id': video_id,
  44. 'title': title,
  45. 'thumbnail': thumbnail,
  46. 'uploader': uploader,
  47. 'duration': duration,
  48. 'formats': formats,
  49. }
  50. # TODO: Remove when #980 has been merged
  51. info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
  52. info.update(info['formats'][-1])
  53. return info