You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

118 lines
4.6 KiB

  1. import re
  2. import json
  3. import socket
  4. from .common import InfoExtractor
  5. from .subtitles import NoAutoSubtitlesIE
  6. from ..utils import (
  7. compat_http_client,
  8. compat_urllib_error,
  9. compat_urllib_request,
  10. compat_str,
  11. get_element_by_attribute,
  12. get_element_by_id,
  13. ExtractorError,
  14. )
  15. class DailyMotionSubtitlesIE(NoAutoSubtitlesIE):
  16. def _get_available_subtitles(self, video_id):
  17. request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
  18. try:
  19. sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
  20. except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  21. self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
  22. return {}
  23. info = json.loads(sub_list)
  24. if (info['total'] > 0):
  25. sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
  26. return sub_lang_list
  27. self._downloader.report_warning(u'video doesn\'t have subtitles')
  28. return {}
  29. class DailymotionIE(DailyMotionSubtitlesIE):
  30. """Information Extractor for Dailymotion"""
  31. _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
  32. IE_NAME = u'dailymotion'
  33. _TEST = {
  34. u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
  35. u'file': u'x33vw9.mp4',
  36. u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
  37. u'info_dict': {
  38. u"uploader": u"Alex and Van .",
  39. u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
  40. }
  41. }
  42. def _real_extract(self, url):
  43. # Extract id and simplified title from URL
  44. mobj = re.match(self._VALID_URL, url)
  45. video_id = mobj.group(1).split('_')[0].split('?')[0]
  46. video_extension = 'mp4'
  47. # Retrieve video webpage to extract further information
  48. request = compat_urllib_request.Request(url)
  49. request.add_header('Cookie', 'family_filter=off')
  50. webpage = self._download_webpage(request, video_id)
  51. # Extract URL, uploader and title from webpage
  52. self.report_extraction(video_id)
  53. video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
  54. # Looking for official user
  55. r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
  56. webpage, 'video uploader')
  57. video_upload_date = None
  58. mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
  59. if mobj is not None:
  60. video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
  61. embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
  62. embed_page = self._download_webpage(embed_url, video_id,
  63. u'Downloading embed page')
  64. info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
  65. info = json.loads(info)
  66. # TODO: support choosing qualities
  67. for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
  68. 'stream_h264_hq_url', 'stream_h264_url',
  69. 'stream_h264_ld_url']:
  70. if info.get(key): # key in info and info[key]:
  71. max_quality = key
  72. self.to_screen(u'%s: Using %s' % (video_id, key))
  73. break
  74. else:
  75. raise ExtractorError(u'Unable to extract video URL')
  76. video_url = info[max_quality]
  77. # subtitles
  78. video_subtitles = None
  79. video_webpage = None
  80. if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
  81. video_subtitles = self._extract_subtitles(video_id)
  82. elif self._downloader.params.get('writeautomaticsub', False):
  83. video_subtitles = self._request_automatic_caption(video_id, video_webpage)
  84. if self._downloader.params.get('listsubtitles', False):
  85. self._list_available_subtitles(video_id)
  86. return
  87. return [{
  88. 'id': video_id,
  89. 'url': video_url,
  90. 'uploader': video_uploader,
  91. 'upload_date': video_upload_date,
  92. 'title': self._og_search_title(webpage),
  93. 'ext': video_extension,
  94. 'subtitles': video_subtitles,
  95. 'thumbnail': info['thumbnail_url']
  96. }]