You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

393 lines
15 KiB

10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import functools
  4. import json
  5. import re
  6. from .common import InfoExtractor
  7. from ..compat import compat_HTTPError
  8. from ..utils import (
  9. age_restricted,
  10. clean_html,
  11. ExtractorError,
  12. int_or_none,
  13. OnDemandPagedList,
  14. try_get,
  15. unescapeHTML,
  16. urlencode_postdata,
  17. )
  18. class DailymotionBaseInfoExtractor(InfoExtractor):
  19. _FAMILY_FILTER = None
  20. _HEADERS = {
  21. 'Content-Type': 'application/json',
  22. 'Origin': 'https://www.dailymotion.com',
  23. }
  24. _NETRC_MACHINE = 'dailymotion'
  25. def _get_dailymotion_cookies(self):
  26. return self._get_cookies('https://www.dailymotion.com/')
  27. @staticmethod
  28. def _get_cookie_value(cookies, name):
  29. cookie = cookies.get('name')
  30. if cookie:
  31. return cookie.value
  32. def _set_dailymotion_cookie(self, name, value):
  33. self._set_cookie('www.dailymotion.com', name, value)
  34. def _real_initialize(self):
  35. cookies = self._get_dailymotion_cookies()
  36. ff = self._get_cookie_value(cookies, 'ff')
  37. self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit'))
  38. self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
  39. def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
  40. if not self._HEADERS.get('Authorization'):
  41. cookies = self._get_dailymotion_cookies()
  42. token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
  43. if not token:
  44. data = {
  45. 'client_id': 'f1a362d288c1b98099c7',
  46. 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
  47. }
  48. username, password = self._get_login_info()
  49. if username:
  50. data.update({
  51. 'grant_type': 'password',
  52. 'password': password,
  53. 'username': username,
  54. })
  55. else:
  56. data['grant_type'] = 'client_credentials'
  57. try:
  58. token = self._download_json(
  59. 'https://graphql.api.dailymotion.com/oauth/token',
  60. None, 'Downloading Access Token',
  61. data=urlencode_postdata(data))['access_token']
  62. except ExtractorError as e:
  63. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  64. raise ExtractorError(self._parse_json(
  65. e.cause.read().decode(), xid)['error_description'], expected=True)
  66. raise
  67. self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
  68. self._HEADERS['Authorization'] = 'Bearer ' + token
  69. resp = self._download_json(
  70. 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
  71. 'query': '''{
  72. %s(xid: "%s"%s) {
  73. %s
  74. }
  75. }''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields),
  76. }).encode(), headers=self._HEADERS)
  77. obj = resp['data'][object_type]
  78. if not obj:
  79. raise ExtractorError(resp['errors'][0]['message'], expected=True)
  80. return obj
  81. class DailymotionIE(DailymotionBaseInfoExtractor):
  82. _VALID_URL = r'''(?ix)
  83. https?://
  84. (?:
  85. (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
  86. (?:www\.)?lequipe\.fr/video
  87. )
  88. /(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
  89. '''
  90. IE_NAME = 'dailymotion'
  91. _TESTS = [{
  92. 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
  93. 'md5': '074b95bdee76b9e3654137aee9c79dfe',
  94. 'info_dict': {
  95. 'id': 'x5kesuj',
  96. 'ext': 'mp4',
  97. 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
  98. 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
  99. 'duration': 187,
  100. 'timestamp': 1493651285,
  101. 'upload_date': '20170501',
  102. 'uploader': 'Deadline',
  103. 'uploader_id': 'x1xm8ri',
  104. 'age_limit': 0,
  105. },
  106. }, {
  107. 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
  108. 'md5': '2137c41a8e78554bb09225b8eb322406',
  109. 'info_dict': {
  110. 'id': 'x2iuewm',
  111. 'ext': 'mp4',
  112. 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
  113. 'description': 'Several come bundled with the Steam Controller.',
  114. 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
  115. 'duration': 74,
  116. 'timestamp': 1425657362,
  117. 'upload_date': '20150306',
  118. 'uploader': 'IGN',
  119. 'uploader_id': 'xijv66',
  120. 'age_limit': 0,
  121. 'view_count': int,
  122. },
  123. 'skip': 'video gone',
  124. }, {
  125. # Vevo video
  126. 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
  127. 'info_dict': {
  128. 'title': 'Roar (Official)',
  129. 'id': 'USUV71301934',
  130. 'ext': 'mp4',
  131. 'uploader': 'Katy Perry',
  132. 'upload_date': '20130905',
  133. },
  134. 'params': {
  135. 'skip_download': True,
  136. },
  137. 'skip': 'VEVO is only available in some countries',
  138. }, {
  139. # age-restricted video
  140. 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
  141. 'md5': '0d667a7b9cebecc3c89ee93099c4159d',
  142. 'info_dict': {
  143. 'id': 'xyh2zz',
  144. 'ext': 'mp4',
  145. 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
  146. 'uploader': 'HotWaves1012',
  147. 'age_limit': 18,
  148. },
  149. 'skip': 'video gone',
  150. }, {
  151. # geo-restricted, player v5
  152. 'url': 'http://www.dailymotion.com/video/xhza0o',
  153. 'only_matching': True,
  154. }, {
  155. # with subtitles
  156. 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
  157. 'only_matching': True,
  158. }, {
  159. 'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
  160. 'only_matching': True,
  161. }, {
  162. 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
  163. 'only_matching': True,
  164. }, {
  165. 'url': 'https://www.lequipe.fr/video/x791mem',
  166. 'only_matching': True,
  167. }, {
  168. 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
  169. 'only_matching': True,
  170. }, {
  171. 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
  172. 'only_matching': True,
  173. }]
  174. _GEO_BYPASS = False
  175. _COMMON_MEDIA_FIELDS = '''description
  176. geoblockedCountries {
  177. allowed
  178. }
  179. xid'''
  180. @staticmethod
  181. def _extract_urls(webpage):
  182. urls = []
  183. # Look for embedded Dailymotion player
  184. # https://developer.dailymotion.com/player#player-parameters
  185. for mobj in re.finditer(
  186. r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
  187. urls.append(unescapeHTML(mobj.group('url')))
  188. for mobj in re.finditer(
  189. r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
  190. urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
  191. return urls
  192. def _real_extract(self, url):
  193. video_id, playlist_id = re.match(self._VALID_URL, url).groups()
  194. if playlist_id:
  195. if not self._downloader.params.get('noplaylist'):
  196. self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
  197. return self.url_result(
  198. 'http://www.dailymotion.com/playlist/' + playlist_id,
  199. 'DailymotionPlaylist', playlist_id)
  200. self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
  201. password = self._downloader.params.get('videopassword')
  202. media = self._call_api(
  203. 'media', video_id, '''... on Video {
  204. %s
  205. stats {
  206. likes {
  207. total
  208. }
  209. views {
  210. total
  211. }
  212. }
  213. }
  214. ... on Live {
  215. %s
  216. audienceCount
  217. isOnAir
  218. }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
  219. 'password: "%s"' % self._downloader.params.get('videopassword') if password else None)
  220. xid = media['xid']
  221. metadata = self._download_json(
  222. 'https://www.dailymotion.com/player/metadata/video/' + xid,
  223. xid, 'Downloading metadata JSON',
  224. query={'app': 'com.dailymotion.neon'})
  225. error = metadata.get('error')
  226. if error:
  227. title = error.get('title') or error['raw_message']
  228. # See https://developer.dailymotion.com/api#access-error
  229. if error.get('code') == 'DM007':
  230. allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list)
  231. self.raise_geo_restricted(msg=title, countries=allowed_countries)
  232. raise ExtractorError(
  233. '%s said: %s' % (self.IE_NAME, title), expected=True)
  234. title = metadata['title']
  235. is_live = media.get('isOnAir')
  236. formats = []
  237. for quality, media_list in metadata['qualities'].items():
  238. for m in media_list:
  239. media_url = m.get('url')
  240. media_type = m.get('type')
  241. if not media_url or media_type == 'application/vnd.lumberjack.manifest':
  242. continue
  243. if media_type == 'application/x-mpegURL':
  244. formats.extend(self._extract_m3u8_formats(
  245. media_url, video_id, 'mp4',
  246. 'm3u8' if is_live else 'm3u8_native',
  247. m3u8_id='hls', fatal=False))
  248. else:
  249. f = {
  250. 'url': media_url,
  251. 'format_id': 'http-' + quality,
  252. }
  253. m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url)
  254. if m:
  255. width, height, fps = map(int_or_none, m.groups())
  256. f.update({
  257. 'fps': fps,
  258. 'height': height,
  259. 'width': width,
  260. })
  261. formats.append(f)
  262. for f in formats:
  263. f['url'] = f['url'].split('#')[0]
  264. if not f.get('fps') and f['format_id'].endswith('@60'):
  265. f['fps'] = 60
  266. self._sort_formats(formats)
  267. subtitles = {}
  268. subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
  269. for subtitle_lang, subtitle in subtitles_data.items():
  270. subtitles[subtitle_lang] = [{
  271. 'url': subtitle_url,
  272. } for subtitle_url in subtitle.get('urls', [])]
  273. thumbnails = []
  274. for height, poster_url in metadata.get('posters', {}).items():
  275. thumbnails.append({
  276. 'height': int_or_none(height),
  277. 'id': height,
  278. 'url': poster_url,
  279. })
  280. owner = metadata.get('owner') or {}
  281. stats = media.get('stats') or {}
  282. get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total']))
  283. return {
  284. 'id': video_id,
  285. 'title': self._live_title(title) if is_live else title,
  286. 'description': clean_html(media.get('description')),
  287. 'thumbnails': thumbnails,
  288. 'duration': int_or_none(metadata.get('duration')) or None,
  289. 'timestamp': int_or_none(metadata.get('created_time')),
  290. 'uploader': owner.get('screenname'),
  291. 'uploader_id': owner.get('id') or metadata.get('screenname'),
  292. 'age_limit': 18 if metadata.get('explicit') else 0,
  293. 'tags': metadata.get('tags'),
  294. 'view_count': get_count('view') or int_or_none(media.get('audienceCount')),
  295. 'like_count': get_count('like'),
  296. 'formats': formats,
  297. 'subtitles': subtitles,
  298. 'is_live': is_live,
  299. }
  300. class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor):
  301. _PAGE_SIZE = 100
  302. def _fetch_page(self, playlist_id, page):
  303. page += 1
  304. videos = self._call_api(
  305. self._OBJECT_TYPE, playlist_id,
  306. '''videos(allowExplicit: %s, first: %d, page: %d) {
  307. edges {
  308. node {
  309. xid
  310. url
  311. }
  312. }
  313. }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page),
  314. 'Downloading page %d' % page)['videos']
  315. for edge in videos['edges']:
  316. node = edge['node']
  317. yield self.url_result(
  318. node['url'], DailymotionIE.ie_key(), node['xid'])
  319. def _real_extract(self, url):
  320. playlist_id = self._match_id(url)
  321. entries = OnDemandPagedList(functools.partial(
  322. self._fetch_page, playlist_id), self._PAGE_SIZE)
  323. return self.playlist_result(
  324. entries, playlist_id)
  325. class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
  326. IE_NAME = 'dailymotion:playlist'
  327. _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
  328. _TESTS = [{
  329. 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
  330. 'info_dict': {
  331. 'id': 'xv4bw',
  332. },
  333. 'playlist_mincount': 20,
  334. }]
  335. _OBJECT_TYPE = 'collection'
  336. class DailymotionUserIE(DailymotionPlaylistBaseIE):
  337. IE_NAME = 'dailymotion:user'
  338. _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
  339. _TESTS = [{
  340. 'url': 'https://www.dailymotion.com/user/nqtv',
  341. 'info_dict': {
  342. 'id': 'nqtv',
  343. },
  344. 'playlist_mincount': 152,
  345. }, {
  346. 'url': 'http://www.dailymotion.com/user/UnderProject',
  347. 'info_dict': {
  348. 'id': 'UnderProject',
  349. },
  350. 'playlist_mincount': 1000,
  351. 'skip': 'Takes too long time',
  352. }, {
  353. 'url': 'https://www.dailymotion.com/user/nqtv',
  354. 'info_dict': {
  355. 'id': 'nqtv',
  356. },
  357. 'playlist_mincount': 148,
  358. 'params': {
  359. 'age_limit': 0,
  360. },
  361. }]
  362. _OBJECT_TYPE = 'channel'