You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

93 lines
3.3 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..compat import compat_urlparse
  5. from ..utils import (
  6. int_or_none,
  7. orderedSet,
  8. parse_duration,
  9. qualities,
  10. unified_strdate,
  11. xpath_text
  12. )
  13. class EuropaIE(InfoExtractor):
  14. _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
  15. _TESTS = [{
  16. 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
  17. 'md5': '574f080699ddd1e19a675b0ddf010371',
  18. 'info_dict': {
  19. 'id': 'I107758',
  20. 'ext': 'mp4',
  21. 'title': 'TRADE - Wikileaks on TTIP',
  22. 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
  23. 'thumbnail': r're:^https?://.*\.jpg$',
  24. 'upload_date': '20150811',
  25. 'duration': 34,
  26. 'view_count': int,
  27. 'formats': 'mincount:3',
  28. }
  29. }, {
  30. 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
  31. 'only_matching': True,
  32. }, {
  33. 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
  34. 'only_matching': True,
  35. }]
  36. def _real_extract(self, url):
  37. video_id = self._match_id(url)
  38. playlist = self._download_xml(
  39. 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id)
  40. def get_item(type_, preference):
  41. items = {}
  42. for item in playlist.findall('./info/%s/item' % type_):
  43. lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None)
  44. if lang and label:
  45. items[lang] = label.strip()
  46. for p in preference:
  47. if items.get(p):
  48. return items[p]
  49. query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  50. preferred_lang = query.get('sitelang', ('en', ))[0]
  51. preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
  52. title = get_item('title', preferred_langs) or video_id
  53. description = get_item('description', preferred_langs)
  54. thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
  55. upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
  56. duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
  57. view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
  58. language_preference = qualities(preferred_langs[::-1])
  59. formats = []
  60. for file_ in playlist.findall('./files/file'):
  61. video_url = xpath_text(file_, './url')
  62. if not video_url:
  63. continue
  64. lang = xpath_text(file_, './lg')
  65. formats.append({
  66. 'url': video_url,
  67. 'format_id': lang,
  68. 'format_note': xpath_text(file_, './lglabel'),
  69. 'language_preference': language_preference(lang)
  70. })
  71. self._sort_formats(formats)
  72. return {
  73. 'id': video_id,
  74. 'title': title,
  75. 'description': description,
  76. 'thumbnail': thumbnmail,
  77. 'upload_date': upload_date,
  78. 'duration': duration,
  79. 'view_count': view_count,
  80. 'formats': formats
  81. }