You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

58 lines
1.9 KiB

  1. import re
  2. import time
  3. import xml.etree.ElementTree
  4. from .common import InfoExtractor
  5. from ..utils import ExtractorError
  6. class ClipfishIE(InfoExtractor):
  7. IE_NAME = u'clipfish'
  8. _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
  9. _TEST = {
  10. u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
  11. u'file': u'3966754.mp4',
  12. u'md5': u'2521cd644e862936cf2e698206e47385',
  13. u'info_dict': {
  14. u'title': u'FIFA 14 - E3 2013 Trailer',
  15. u'duration': 82,
  16. },
  17. u'skip': 'Blocked in the US'
  18. }
  19. def _real_extract(self, url):
  20. mobj = re.match(self._VALID_URL, url)
  21. video_id = mobj.group(1)
  22. info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
  23. (video_id, int(time.time())))
  24. doc = self._download_xml(
  25. info_url, video_id, note=u'Downloading info page')
  26. title = doc.find('title').text
  27. video_url = doc.find('filename').text
  28. if video_url is None:
  29. xml_bytes = xml.etree.ElementTree.tostring(doc)
  30. raise ExtractorError(u'Cannot find video URL in document %r' %
  31. xml_bytes)
  32. thumbnail = doc.find('imageurl').text
  33. duration_str = doc.find('duration').text
  34. m = re.match(
  35. r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
  36. duration_str)
  37. if m:
  38. duration = (
  39. (int(m.group('hours')) * 60 * 60) +
  40. (int(m.group('minutes')) * 60) +
  41. (int(m.group('seconds')))
  42. )
  43. else:
  44. duration = None
  45. return {
  46. 'id': video_id,
  47. 'title': title,
  48. 'url': video_url,
  49. 'thumbnail': thumbnail,
  50. 'duration': duration,
  51. }