You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

67 lines
2.1 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import qualities
  5. class UnistraIE(InfoExtractor):
  6. _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
  7. _TESTS = [
  8. {
  9. 'url': 'http://utv.unistra.fr/video.php?id_video=154',
  10. 'md5': '736f605cfdc96724d55bb543ab3ced24',
  11. 'info_dict': {
  12. 'id': '154',
  13. 'ext': 'mp4',
  14. 'title': 'M!ss Yella',
  15. 'description': 'md5:104892c71bd48e55d70b902736b81bbf',
  16. },
  17. },
  18. {
  19. 'url': 'http://utv.unistra.fr/index.php?id_video=437',
  20. 'md5': '1ddddd6cccaae76f622ce29b8779636d',
  21. 'info_dict': {
  22. 'id': '437',
  23. 'ext': 'mp4',
  24. 'title': 'Prix Louise Weiss 2014',
  25. 'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a',
  26. },
  27. }
  28. ]
  29. def _real_extract(self, url):
  30. mobj = re.match(self._VALID_URL, url)
  31. video_id = mobj.group('id')
  32. webpage = self._download_webpage(url, video_id)
  33. files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))
  34. quality = qualities(['SD', 'HD'])
  35. formats = []
  36. for file_path in files:
  37. format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD'
  38. formats.append({
  39. 'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path,
  40. 'format_id': format_id,
  41. 'quality': quality(format_id)
  42. })
  43. self._sort_formats(formats)
  44. title = self._html_search_regex(
  45. r'<title>UTV - (.*?)</', webpage, 'title')
  46. description = self._html_search_regex(
  47. r'<meta name="Description" content="(.*?)"', webpage, 'description', flags=re.DOTALL)
  48. thumbnail = self._search_regex(
  49. r'image: "(.*?)"', webpage, 'thumbnail')
  50. return {
  51. 'id': video_id,
  52. 'title': title,
  53. 'description': description,
  54. 'thumbnail': thumbnail,
  55. 'formats': formats
  56. }