You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

339 lines
12 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import time
  5. import hashlib
  6. import json
  7. import random
  8. from .adobepass import AdobePassIE
  9. from .youtube import YoutubeIE
  10. from .common import InfoExtractor
  11. from ..compat import (
  12. compat_HTTPError,
  13. compat_str,
  14. )
  15. from ..utils import (
  16. ExtractorError,
  17. int_or_none,
  18. parse_age_limit,
  19. str_or_none,
  20. try_get,
  21. )
  22. class ViceIE(AdobePassIE):
  23. IE_NAME = 'vice'
  24. _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
  25. _TESTS = [{
  26. 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
  27. 'info_dict': {
  28. 'id': '5e647f0125e145c9aef2069412c0cbde',
  29. 'ext': 'mp4',
  30. 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
  31. 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
  32. 'uploader': 'vice',
  33. 'uploader_id': '57a204088cb727dec794c67b',
  34. 'timestamp': 1489664942,
  35. 'upload_date': '20170316',
  36. 'age_limit': 14,
  37. },
  38. 'params': {
  39. # m3u8 download
  40. 'skip_download': True,
  41. },
  42. 'add_ie': ['UplynkPreplay'],
  43. }, {
  44. # geo restricted to US
  45. 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
  46. 'info_dict': {
  47. 'id': '930c0ad1f47141cc955087eecaddb0e2',
  48. 'ext': 'mp4',
  49. 'uploader': 'waypoint',
  50. 'title': 'The Signal From Tölva',
  51. 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
  52. 'uploader_id': '57f7d621e05ca860fa9ccaf9',
  53. 'timestamp': 1477941983,
  54. 'upload_date': '20161031',
  55. },
  56. 'params': {
  57. # m3u8 download
  58. 'skip_download': True,
  59. },
  60. 'add_ie': ['UplynkPreplay'],
  61. }, {
  62. 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
  63. 'info_dict': {
  64. 'id': '581b12b60a0e1f4c0fb6ea2f',
  65. 'ext': 'mp4',
  66. 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
  67. 'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
  68. 'uploader': 'VICE',
  69. 'uploader_id': '57a204088cb727dec794c67b',
  70. 'timestamp': 1485368119,
  71. 'upload_date': '20170125',
  72. 'age_limit': 14,
  73. },
  74. 'params': {
  75. # AES-encrypted m3u8
  76. 'skip_download': True,
  77. 'proxy': '127.0.0.1:8118',
  78. },
  79. 'add_ie': ['UplynkPreplay'],
  80. }, {
  81. 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
  82. 'only_matching': True,
  83. }, {
  84. 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
  85. 'only_matching': True,
  86. }, {
  87. 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
  88. 'only_matching': True,
  89. }, {
  90. 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
  91. 'only_matching': True,
  92. }]
  93. _PREPLAY_HOST = 'vms.vice'
  94. @staticmethod
  95. def _extract_urls(webpage):
  96. return re.findall(
  97. r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
  98. webpage)
  99. @staticmethod
  100. def _extract_url(webpage):
  101. urls = ViceIE._extract_urls(webpage)
  102. return urls[0] if urls else None
  103. def _real_extract(self, url):
  104. locale, video_id = re.match(self._VALID_URL, url).groups()
  105. webpage = self._download_webpage(
  106. 'https://video.vice.com/%s/embed/%s' % (locale, video_id),
  107. video_id)
  108. video = self._parse_json(
  109. self._search_regex(
  110. r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
  111. 'app state'), video_id)['video']
  112. video_id = video.get('vms_id') or video.get('id') or video_id
  113. title = video['title']
  114. is_locked = video.get('locked')
  115. rating = video.get('rating')
  116. thumbnail = video.get('thumbnail_url')
  117. duration = int_or_none(video.get('duration'))
  118. series = try_get(
  119. video, lambda x: x['episode']['season']['show']['title'],
  120. compat_str)
  121. episode_number = try_get(
  122. video, lambda x: x['episode']['episode_number'])
  123. season_number = try_get(
  124. video, lambda x: x['episode']['season']['season_number'])
  125. uploader = None
  126. query = {}
  127. if is_locked:
  128. resource = self._get_mvpd_resource(
  129. 'VICELAND', title, video_id, rating)
  130. query['tvetoken'] = self._extract_mvpd_auth(
  131. url, video_id, 'VICELAND', resource)
  132. # signature generation algorithm is reverse engineered from signatureGenerator in
  133. # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
  134. # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
  135. # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
  136. exp = int(time.time()) + 1440
  137. query.update({
  138. 'exp': exp,
  139. 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
  140. '_ad_blocked': None,
  141. '_ad_unit': '',
  142. '_debug': '',
  143. 'platform': 'desktop',
  144. 'rn': random.randint(10000, 100000),
  145. 'fbprebidtoken': '',
  146. })
  147. try:
  148. host = 'www.viceland' if is_locked else self._PREPLAY_HOST
  149. preplay = self._download_json(
  150. 'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id),
  151. video_id, query=query)
  152. except ExtractorError as e:
  153. if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
  154. error = json.loads(e.cause.read().decode())
  155. error_message = error.get('error_description') or error['details']
  156. raise ExtractorError('%s said: %s' % (
  157. self.IE_NAME, error_message), expected=True)
  158. raise
  159. video_data = preplay['video']
  160. base = video_data['base']
  161. uplynk_preplay_url = preplay['preplayURL']
  162. episode = video_data.get('episode', {})
  163. channel = video_data.get('channel', {})
  164. subtitles = {}
  165. cc_url = preplay.get('ccURL')
  166. if cc_url:
  167. subtitles['en'] = [{
  168. 'url': cc_url,
  169. }]
  170. return {
  171. '_type': 'url_transparent',
  172. 'url': uplynk_preplay_url,
  173. 'id': video_id,
  174. 'title': title,
  175. 'description': base.get('body') or base.get('display_body'),
  176. 'thumbnail': thumbnail,
  177. 'duration': int_or_none(video_data.get('video_duration')) or duration,
  178. 'timestamp': int_or_none(video_data.get('created_at'), 1000),
  179. 'age_limit': parse_age_limit(video_data.get('video_rating')),
  180. 'series': video_data.get('show_title') or series,
  181. 'episode_number': int_or_none(episode.get('episode_number') or episode_number),
  182. 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
  183. 'season_number': int_or_none(season_number),
  184. 'season_id': str_or_none(episode.get('season_id')),
  185. 'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
  186. 'uploader_id': str_or_none(channel.get('id')),
  187. 'subtitles': subtitles,
  188. 'ie_key': 'UplynkPreplay',
  189. }
  190. class ViceShowIE(InfoExtractor):
  191. IE_NAME = 'vice:show'
  192. _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
  193. _TEST = {
  194. 'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
  195. 'info_dict': {
  196. 'id': 'fuck-thats-delicious-2',
  197. 'title': "Fuck, That's Delicious",
  198. 'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
  199. },
  200. 'playlist_count': 17,
  201. }
  202. def _real_extract(self, url):
  203. show_id = self._match_id(url)
  204. webpage = self._download_webpage(url, show_id)
  205. entries = [
  206. self.url_result(video_url, ViceIE.ie_key())
  207. for video_url, _ in re.findall(
  208. r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
  209. % ViceIE._VALID_URL, webpage)]
  210. title = self._search_regex(
  211. r'<title>(.+?)</title>', webpage, 'title', default=None)
  212. if title:
  213. title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
  214. description = self._html_search_meta(
  215. 'description', webpage, 'description')
  216. return self.playlist_result(entries, show_id, title, description)
  217. class ViceArticleIE(InfoExtractor):
  218. IE_NAME = 'vice:article'
  219. _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
  220. _TESTS = [{
  221. 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
  222. 'info_dict': {
  223. 'id': '41eae2a47b174a1398357cec55f1f6fc',
  224. 'ext': 'mp4',
  225. 'title': 'Mormon War on Porn ',
  226. 'description': 'md5:6394a8398506581d0346b9ab89093fef',
  227. 'uploader': 'vice',
  228. 'uploader_id': '57a204088cb727dec794c67b',
  229. 'timestamp': 1491883129,
  230. 'upload_date': '20170411',
  231. 'age_limit': 17,
  232. },
  233. 'params': {
  234. # AES-encrypted m3u8
  235. 'skip_download': True,
  236. },
  237. 'add_ie': ['UplynkPreplay'],
  238. }, {
  239. 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
  240. 'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
  241. 'info_dict': {
  242. 'id': '3jstaBeXgAs',
  243. 'ext': 'mp4',
  244. 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
  245. 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
  246. 'uploader': 'Motherboard',
  247. 'uploader_id': 'MotherboardTV',
  248. 'upload_date': '20140529',
  249. },
  250. 'add_ie': ['Youtube'],
  251. }, {
  252. 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
  253. 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
  254. 'info_dict': {
  255. 'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
  256. 'ext': 'mp4',
  257. 'title': "Making The World's First Male Sex Doll",
  258. 'description': 'md5:916078ef0e032d76343116208b6cc2c4',
  259. 'uploader': 'vice',
  260. 'uploader_id': '57a204088cb727dec794c67b',
  261. 'timestamp': 1476919911,
  262. 'upload_date': '20161019',
  263. 'age_limit': 17,
  264. },
  265. 'params': {
  266. 'skip_download': True,
  267. },
  268. 'add_ie': [ViceIE.ie_key()],
  269. }, {
  270. 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
  271. 'only_matching': True,
  272. }, {
  273. 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
  274. 'only_matching': True,
  275. }]
  276. def _real_extract(self, url):
  277. display_id = self._match_id(url)
  278. webpage = self._download_webpage(url, display_id)
  279. prefetch_data = self._parse_json(self._search_regex(
  280. r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
  281. webpage, 'app state'), display_id)['pageData']
  282. body = prefetch_data['body']
  283. def _url_res(video_url, ie_key):
  284. return {
  285. '_type': 'url_transparent',
  286. 'url': video_url,
  287. 'display_id': display_id,
  288. 'ie_key': ie_key,
  289. }
  290. vice_url = ViceIE._extract_url(webpage)
  291. if vice_url:
  292. return _url_res(vice_url, ViceIE.ie_key())
  293. embed_code = self._search_regex(
  294. r'embedCode=([^&\'"]+)', body,
  295. 'ooyala embed code', default=None)
  296. if embed_code:
  297. return _url_res('ooyala:%s' % embed_code, 'Ooyala')
  298. youtube_url = YoutubeIE._extract_url(body)
  299. if youtube_url:
  300. return _url_res(youtube_url, YoutubeIE.ie_key())
  301. video_url = self._html_search_regex(
  302. r'data-video-url="([^"]+)"',
  303. prefetch_data['embed_code'], 'video URL')
  304. return _url_res(video_url, ViceIE.ie_key())