You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

273 lines
10 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import time
  5. import hashlib
  6. import json
  7. from .adobepass import AdobePassIE
  8. from .youtube import YoutubeIE
  9. from .common import InfoExtractor
  10. from ..compat import compat_HTTPError
  11. from ..utils import (
  12. int_or_none,
  13. parse_age_limit,
  14. str_or_none,
  15. parse_duration,
  16. ExtractorError,
  17. extract_attributes,
  18. )
  19. class ViceBaseIE(AdobePassIE):
  20. def _extract_preplay_video(self, url, locale, webpage):
  21. watch_hub_data = extract_attributes(self._search_regex(
  22. r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
  23. video_id = watch_hub_data['vms-id']
  24. title = watch_hub_data['video-title']
  25. query = {}
  26. is_locked = watch_hub_data.get('video-locked') == '1'
  27. if is_locked:
  28. resource = self._get_mvpd_resource(
  29. 'VICELAND', title, video_id,
  30. watch_hub_data.get('video-rating'))
  31. query['tvetoken'] = self._extract_mvpd_auth(
  32. url, video_id, 'VICELAND', resource)
  33. # signature generation algorithm is reverse engineered from signatureGenerator in
  34. # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
  35. # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
  36. exp = int(time.time()) + 14400
  37. query.update({
  38. 'exp': exp,
  39. 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
  40. })
  41. try:
  42. host = 'www.viceland' if is_locked else self._PREPLAY_HOST
  43. preplay = self._download_json(
  44. 'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
  45. video_id, query=query)
  46. except ExtractorError as e:
  47. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  48. error = json.loads(e.cause.read().decode())
  49. raise ExtractorError('%s said: %s' % (
  50. self.IE_NAME, error['details']), expected=True)
  51. raise
  52. video_data = preplay['video']
  53. base = video_data['base']
  54. uplynk_preplay_url = preplay['preplayURL']
  55. episode = video_data.get('episode', {})
  56. channel = video_data.get('channel', {})
  57. subtitles = {}
  58. cc_url = preplay.get('ccURL')
  59. if cc_url:
  60. subtitles['en'] = [{
  61. 'url': cc_url,
  62. }]
  63. return {
  64. '_type': 'url_transparent',
  65. 'url': uplynk_preplay_url,
  66. 'id': video_id,
  67. 'title': title,
  68. 'description': base.get('body') or base.get('display_body'),
  69. 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
  70. 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')),
  71. 'timestamp': int_or_none(video_data.get('created_at'), 1000),
  72. 'age_limit': parse_age_limit(video_data.get('video_rating')),
  73. 'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
  74. 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
  75. 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
  76. 'season_number': int_or_none(watch_hub_data.get('season')),
  77. 'season_id': str_or_none(episode.get('season_id')),
  78. 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
  79. 'uploader_id': str_or_none(channel.get('id')),
  80. 'subtitles': subtitles,
  81. 'ie_key': 'UplynkPreplay',
  82. }
  83. class ViceIE(ViceBaseIE):
  84. IE_NAME = 'vice'
  85. _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
  86. _TESTS = [{
  87. 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
  88. 'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
  89. 'info_dict': {
  90. 'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
  91. 'ext': 'flv',
  92. 'title': 'Monkey Labs of Holland',
  93. 'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
  94. },
  95. 'add_ie': ['Ooyala'],
  96. }, {
  97. 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
  98. 'info_dict': {
  99. 'id': '5816510690b70e6c5fd39a56',
  100. 'ext': 'mp4',
  101. 'uploader': 'Waypoint',
  102. 'title': 'The Signal From Tölva',
  103. 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
  104. 'uploader_id': '57f7d621e05ca860fa9ccaf9',
  105. 'timestamp': 1477941983,
  106. 'upload_date': '20161031',
  107. },
  108. 'params': {
  109. # m3u8 download
  110. 'skip_download': True,
  111. },
  112. 'add_ie': ['UplynkPreplay'],
  113. }, {
  114. 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
  115. 'info_dict': {
  116. 'id': '581b12b60a0e1f4c0fb6ea2f',
  117. 'ext': 'mp4',
  118. 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
  119. 'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
  120. 'uploader': 'VICE',
  121. 'uploader_id': '57a204088cb727dec794c67b',
  122. 'timestamp': 1485368119,
  123. 'upload_date': '20170125',
  124. 'age_limit': 14,
  125. },
  126. 'params': {
  127. # AES-encrypted m3u8
  128. 'skip_download': True,
  129. },
  130. 'add_ie': ['UplynkPreplay'],
  131. }, {
  132. 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
  133. 'only_matching': True,
  134. }]
  135. _PREPLAY_HOST = 'video.vice'
  136. def _real_extract(self, url):
  137. locale, video_id = re.match(self._VALID_URL, url).groups()
  138. webpage, urlh = self._download_webpage_handle(url, video_id)
  139. embed_code = self._search_regex(
  140. r'embedCode=([^&\'"]+)', webpage,
  141. 'ooyala embed code', default=None)
  142. if embed_code:
  143. return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
  144. youtube_id = self._search_regex(
  145. r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
  146. if youtube_id:
  147. return self.url_result(youtube_id, 'Youtube')
  148. return self._extract_preplay_video(urlh.geturl(), locale, webpage)
  149. class ViceShowIE(InfoExtractor):
  150. IE_NAME = 'vice:show'
  151. _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
  152. _TEST = {
  153. 'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
  154. 'info_dict': {
  155. 'id': 'fuck-thats-delicious-2',
  156. 'title': "Fuck, That's Delicious",
  157. 'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
  158. },
  159. 'playlist_count': 17,
  160. }
  161. def _real_extract(self, url):
  162. show_id = self._match_id(url)
  163. webpage = self._download_webpage(url, show_id)
  164. entries = [
  165. self.url_result(video_url, ViceIE.ie_key())
  166. for video_url, _ in re.findall(
  167. r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
  168. % ViceIE._VALID_URL, webpage)]
  169. title = self._search_regex(
  170. r'<title>(.+?)</title>', webpage, 'title', default=None)
  171. if title:
  172. title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
  173. description = self._html_search_meta(
  174. 'description', webpage, 'description')
  175. return self.playlist_result(entries, show_id, title, description)
  176. class ViceArticleIE(InfoExtractor):
  177. IE_NAME = 'vice:article'
  178. _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
  179. _TESTS = [{
  180. 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
  181. 'info_dict': {
  182. 'id': '58dc0a3dee202d2a0ccfcbd8',
  183. 'ext': 'mp4',
  184. 'title': 'Mormon War on Porn ',
  185. 'description': 'md5:ad396a2481e7f8afb5ed486878421090',
  186. 'uploader': 'VICE',
  187. 'uploader_id': '57a204088cb727dec794c693',
  188. 'timestamp': 1489160690,
  189. 'upload_date': '20170310',
  190. },
  191. 'params': {
  192. # AES-encrypted m3u8
  193. 'skip_download': True,
  194. },
  195. 'add_ie': ['UplynkPreplay'],
  196. }, {
  197. 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
  198. 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
  199. 'info_dict': {
  200. 'id': '3jstaBeXgAs',
  201. 'ext': 'mp4',
  202. 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
  203. 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
  204. 'uploader_id': 'MotherboardTV',
  205. 'uploader': 'Motherboard',
  206. 'upload_date': '20140529',
  207. },
  208. 'add_ie': ['Youtube'],
  209. }, {
  210. 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
  211. 'only_matching': True,
  212. }, {
  213. 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
  214. 'only_matching': True,
  215. }]
  216. def _real_extract(self, url):
  217. display_id = self._match_id(url)
  218. webpage = self._download_webpage(url, display_id)
  219. prefetch_data = self._parse_json(self._search_regex(
  220. r'window\.__PREFETCH_DATA\s*=\s*({.*});',
  221. webpage, 'prefetch data'), display_id)
  222. body = prefetch_data['body']
  223. def _url_res(video_url, ie_key):
  224. return {
  225. '_type': 'url_transparent',
  226. 'url': video_url,
  227. 'display_id': display_id,
  228. 'ie_key': ie_key,
  229. }
  230. embed_code = self._search_regex(
  231. r'embedCode=([^&\'"]+)', body,
  232. 'ooyala embed code', default=None)
  233. if embed_code:
  234. return _url_res('ooyala:%s' % embed_code, 'Ooyala')
  235. youtube_url = YoutubeIE._extract_url(body)
  236. if youtube_url:
  237. return _url_res(youtube_url, YoutubeIE.ie_key())
  238. video_url = self._html_search_regex(
  239. r'data-video-url="([^"]+)"',
  240. prefetch_data['embed_code'], 'video URL')
  241. return _url_res(video_url, ViceIE.ie_key())