You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
1.9 KiB

  1. import re
  2. import time
  3. import xml.etree.ElementTree
  4. from .common import InfoExtractor
  5. class ClipfishIE(InfoExtractor):
  6. IE_NAME = u'clipfish'
  7. _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
  8. _TEST = {
  9. u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
  10. u'file': u'4028320.f4v',
  11. u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
  12. u'info_dict': {
  13. u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
  14. u'duration': 399,
  15. }
  16. }
  17. def _real_extract(self, url):
  18. mobj = re.match(self._VALID_URL, url)
  19. video_id = mobj.group(1)
  20. info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
  21. (video_id, int(time.time())))
  22. doc = self._download_xml(
  23. info_url, video_id, note=u'Downloading info page')
  24. title = doc.find('title').text
  25. video_url = doc.find('filename').text
  26. if video_url is None:
  27. xml_bytes = xml.etree.ElementTree.tostring(doc)
  28. raise ExtractorError(u'Cannot find video URL in document %r' %
  29. xml_bytes)
  30. thumbnail = doc.find('imageurl').text
  31. duration_str = doc.find('duration').text
  32. m = re.match(
  33. r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
  34. duration_str)
  35. if m:
  36. duration = (
  37. (int(m.group('hours')) * 60 * 60) +
  38. (int(m.group('minutes')) * 60) +
  39. (int(m.group('seconds')))
  40. )
  41. else:
  42. duration = None
  43. return {
  44. 'id': video_id,
  45. 'title': title,
  46. 'url': video_url,
  47. 'thumbnail': thumbnail,
  48. 'duration': duration,
  49. }