You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

140 lines
5.5 KiB

  1. import re
  2. import json
  3. import itertools
  4. import socket
  5. from .common import InfoExtractor
  6. from .subtitles import SubtitlesInfoExtractor
  7. from ..utils import (
  8. compat_http_client,
  9. compat_urllib_error,
  10. compat_urllib_request,
  11. compat_str,
  12. get_element_by_attribute,
  13. get_element_by_id,
  14. ExtractorError,
  15. )
  16. class DailymotionIE(SubtitlesInfoExtractor):
  17. """Information Extractor for Dailymotion"""
  18. _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
  19. IE_NAME = u'dailymotion'
  20. _TEST = {
  21. u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
  22. u'file': u'x33vw9.mp4',
  23. u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
  24. u'info_dict': {
  25. u"uploader": u"Amphora Alex and Van .",
  26. u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
  27. }
  28. }
  29. def _real_extract(self, url):
  30. # Extract id and simplified title from URL
  31. mobj = re.match(self._VALID_URL, url)
  32. video_id = mobj.group(1).split('_')[0].split('?')[0]
  33. video_extension = 'mp4'
  34. url = 'http://www.dailymotion.com/video/%s' % video_id
  35. # Retrieve video webpage to extract further information
  36. request = compat_urllib_request.Request(url)
  37. request.add_header('Cookie', 'family_filter=off')
  38. webpage = self._download_webpage(request, video_id)
  39. # Extract URL, uploader and title from webpage
  40. self.report_extraction(video_id)
  41. video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
  42. # Looking for official user
  43. r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
  44. webpage, 'video uploader')
  45. video_upload_date = None
  46. mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
  47. if mobj is not None:
  48. video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
  49. embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
  50. embed_page = self._download_webpage(embed_url, video_id,
  51. u'Downloading embed page')
  52. info = self._search_regex(r'var info = ({.*?}),$', embed_page,
  53. 'video info', flags=re.MULTILINE)
  54. info = json.loads(info)
  55. # TODO: support choosing qualities
  56. for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
  57. 'stream_h264_hq_url','stream_h264_url',
  58. 'stream_h264_ld_url']:
  59. if info.get(key):#key in info and info[key]:
  60. max_quality = key
  61. self.to_screen(u'Using %s' % key)
  62. break
  63. else:
  64. raise ExtractorError(u'Unable to extract video URL')
  65. video_url = info[max_quality]
  66. # subtitles
  67. video_subtitles = self.extract_subtitles(video_id)
  68. if self._downloader.params.get('listsubtitles', False):
  69. self._list_available_subtitles(video_id)
  70. return
  71. return [{
  72. 'id': video_id,
  73. 'url': video_url,
  74. 'uploader': video_uploader,
  75. 'upload_date': video_upload_date,
  76. 'title': self._og_search_title(webpage),
  77. 'ext': video_extension,
  78. 'subtitles': video_subtitles,
  79. 'thumbnail': info['thumbnail_url']
  80. }]
  81. def _get_available_subtitles(self, video_id):
  82. request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
  83. try:
  84. sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
  85. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  86. self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
  87. return {}
  88. info = json.loads(sub_list)
  89. if (info['total'] > 0):
  90. sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
  91. return sub_lang_list
  92. self._downloader.report_warning(u'video doesn\'t have subtitles')
  93. return {}
  94. class DailymotionPlaylistIE(InfoExtractor):
  95. _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
  96. _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
  97. def _real_extract(self, url):
  98. mobj = re.match(self._VALID_URL, url)
  99. playlist_id = mobj.group('id')
  100. video_ids = []
  101. for pagenum in itertools.count(1):
  102. webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
  103. playlist_id, u'Downloading page %s' % pagenum)
  104. playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
  105. video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
  106. if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
  107. break
  108. entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
  109. for video_id in video_ids]
  110. return {'_type': 'playlist',
  111. 'id': playlist_id,
  112. 'title': get_element_by_id(u'playlist_name', webpage),
  113. 'entries': entries,
  114. }