You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

77 lines
2.9 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import json
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_parse_qs,
  8. compat_urlparse,
  9. )
  10. class FranceCultureIE(InfoExtractor):
  11. _VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
  12. _TEST = {
  13. 'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
  14. 'info_dict': {
  15. 'id': '4795174',
  16. 'ext': 'mp3',
  17. 'title': 'Rendez-vous au pays des geeks',
  18. 'vcodec': 'none',
  19. 'uploader': 'Colette Fellous',
  20. 'upload_date': '20140301',
  21. 'duration': 3601,
  22. 'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
  23. 'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
  24. }
  25. }
  26. def _real_extract(self, url):
  27. mobj = re.match(self._VALID_URL, url)
  28. video_id = mobj.group('id')
  29. baseurl = mobj.group('baseurl')
  30. webpage = self._download_webpage(url, video_id)
  31. params_code = self._search_regex(
  32. r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
  33. webpage, 'parameter code')
  34. params = compat_parse_qs(params_code)
  35. video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
  36. title = self._html_search_regex(
  37. r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
  38. uploader = self._html_search_regex(
  39. r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
  40. webpage, 'uploader', fatal=False)
  41. thumbnail_part = self._html_search_regex(
  42. r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
  43. 'thumbnail', fatal=False)
  44. if thumbnail_part is None:
  45. thumbnail = None
  46. else:
  47. thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
  48. description = self._html_search_regex(
  49. r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
  50. info = json.loads(params['infoData'][0])[0]
  51. duration = info.get('media_length')
  52. upload_date_candidate = info.get('media_section5')
  53. upload_date = (
  54. upload_date_candidate
  55. if (upload_date_candidate is not None and
  56. re.match(r'[0-9]{8}$', upload_date_candidate))
  57. else None)
  58. return {
  59. 'id': video_id,
  60. 'url': video_url,
  61. 'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
  62. 'duration': duration,
  63. 'uploader': uploader,
  64. 'upload_date': upload_date,
  65. 'title': title,
  66. 'thumbnail': thumbnail,
  67. 'description': description,
  68. }