You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

337 lines
12 KiB

  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import time
  5. import hashlib
  6. import json
  7. import random
  8. from .adobepass import AdobePassIE
  9. from .youtube import YoutubeIE
  10. from .common import InfoExtractor
  11. from ..compat import (
  12. compat_HTTPError,
  13. compat_str,
  14. )
  15. from ..utils import (
  16. ExtractorError,
  17. int_or_none,
  18. parse_age_limit,
  19. str_or_none,
  20. try_get,
  21. )
  22. class ViceIE(AdobePassIE):
  23. IE_NAME = 'vice'
  24. _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
  25. _TESTS = [{
  26. 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
  27. 'info_dict': {
  28. 'id': '5e647f0125e145c9aef2069412c0cbde',
  29. 'ext': 'mp4',
  30. 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
  31. 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
  32. 'uploader': 'vice',
  33. 'uploader_id': '57a204088cb727dec794c67b',
  34. 'timestamp': 1489664942,
  35. 'upload_date': '20170316',
  36. 'age_limit': 14,
  37. },
  38. 'params': {
  39. # m3u8 download
  40. 'skip_download': True,
  41. },
  42. 'add_ie': ['UplynkPreplay'],
  43. }, {
  44. # geo restricted to US
  45. 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
  46. 'info_dict': {
  47. 'id': '930c0ad1f47141cc955087eecaddb0e2',
  48. 'ext': 'mp4',
  49. 'uploader': 'waypoint',
  50. 'title': 'The Signal From Tölva',
  51. 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
  52. 'uploader_id': '57f7d621e05ca860fa9ccaf9',
  53. 'timestamp': 1477941983,
  54. 'upload_date': '20161031',
  55. },
  56. 'params': {
  57. # m3u8 download
  58. 'skip_download': True,
  59. },
  60. 'add_ie': ['UplynkPreplay'],
  61. }, {
  62. 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
  63. 'info_dict': {
  64. 'id': '581b12b60a0e1f4c0fb6ea2f',
  65. 'ext': 'mp4',
  66. 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
  67. 'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
  68. 'uploader': 'VICE',
  69. 'uploader_id': '57a204088cb727dec794c67b',
  70. 'timestamp': 1485368119,
  71. 'upload_date': '20170125',
  72. 'age_limit': 14,
  73. },
  74. 'params': {
  75. # AES-encrypted m3u8
  76. 'skip_download': True,
  77. 'proxy': '127.0.0.1:8118',
  78. },
  79. 'add_ie': ['UplynkPreplay'],
  80. }, {
  81. 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
  82. 'only_matching': True,
  83. }, {
  84. 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
  85. 'only_matching': True,
  86. }, {
  87. 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
  88. 'only_matching': True,
  89. }, {
  90. 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
  91. 'only_matching': True,
  92. }]
  93. @staticmethod
  94. def _extract_urls(webpage):
  95. return re.findall(
  96. r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
  97. webpage)
  98. @staticmethod
  99. def _extract_url(webpage):
  100. urls = ViceIE._extract_urls(webpage)
  101. return urls[0] if urls else None
  102. def _real_extract(self, url):
  103. locale, video_id = re.match(self._VALID_URL, url).groups()
  104. webpage = self._download_webpage(
  105. 'https://video.vice.com/%s/embed/%s' % (locale, video_id),
  106. video_id)
  107. video = self._parse_json(
  108. self._search_regex(
  109. r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
  110. 'app state'), video_id)['video']
  111. video_id = video.get('vms_id') or video.get('id') or video_id
  112. title = video['title']
  113. is_locked = video.get('locked')
  114. rating = video.get('rating')
  115. thumbnail = video.get('thumbnail_url')
  116. duration = int_or_none(video.get('duration'))
  117. series = try_get(
  118. video, lambda x: x['episode']['season']['show']['title'],
  119. compat_str)
  120. episode_number = try_get(
  121. video, lambda x: x['episode']['episode_number'])
  122. season_number = try_get(
  123. video, lambda x: x['episode']['season']['season_number'])
  124. uploader = None
  125. query = {}
  126. if is_locked:
  127. resource = self._get_mvpd_resource(
  128. 'VICELAND', title, video_id, rating)
  129. query['tvetoken'] = self._extract_mvpd_auth(
  130. url, video_id, 'VICELAND', resource)
  131. # signature generation algorithm is reverse engineered from signatureGenerator in
  132. # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
  133. # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
  134. # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
  135. exp = int(time.time()) + 1440
  136. query.update({
  137. 'exp': exp,
  138. 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
  139. '_ad_blocked': None,
  140. '_ad_unit': '',
  141. '_debug': '',
  142. 'platform': 'desktop',
  143. 'rn': random.randint(10000, 100000),
  144. 'fbprebidtoken': '',
  145. })
  146. try:
  147. preplay = self._download_json(
  148. 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
  149. video_id, query=query)
  150. except ExtractorError as e:
  151. if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
  152. error = json.loads(e.cause.read().decode())
  153. error_message = error.get('error_description') or error['details']
  154. raise ExtractorError('%s said: %s' % (
  155. self.IE_NAME, error_message), expected=True)
  156. raise
  157. video_data = preplay['video']
  158. base = video_data['base']
  159. uplynk_preplay_url = preplay['preplayURL']
  160. episode = video_data.get('episode', {})
  161. channel = video_data.get('channel', {})
  162. subtitles = {}
  163. cc_url = preplay.get('ccURL')
  164. if cc_url:
  165. subtitles['en'] = [{
  166. 'url': cc_url,
  167. }]
  168. return {
  169. '_type': 'url_transparent',
  170. 'url': uplynk_preplay_url,
  171. 'id': video_id,
  172. 'title': title,
  173. 'description': base.get('body') or base.get('display_body'),
  174. 'thumbnail': thumbnail,
  175. 'duration': int_or_none(video_data.get('video_duration')) or duration,
  176. 'timestamp': int_or_none(video_data.get('created_at'), 1000),
  177. 'age_limit': parse_age_limit(video_data.get('video_rating')),
  178. 'series': video_data.get('show_title') or series,
  179. 'episode_number': int_or_none(episode.get('episode_number') or episode_number),
  180. 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
  181. 'season_number': int_or_none(season_number),
  182. 'season_id': str_or_none(episode.get('season_id')),
  183. 'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
  184. 'uploader_id': str_or_none(channel.get('id')),
  185. 'subtitles': subtitles,
  186. 'ie_key': 'UplynkPreplay',
  187. }
  188. class ViceShowIE(InfoExtractor):
  189. IE_NAME = 'vice:show'
  190. _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
  191. _TEST = {
  192. 'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
  193. 'info_dict': {
  194. 'id': 'fuck-thats-delicious-2',
  195. 'title': "Fuck, That's Delicious",
  196. 'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
  197. },
  198. 'playlist_count': 17,
  199. }
  200. def _real_extract(self, url):
  201. show_id = self._match_id(url)
  202. webpage = self._download_webpage(url, show_id)
  203. entries = [
  204. self.url_result(video_url, ViceIE.ie_key())
  205. for video_url, _ in re.findall(
  206. r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
  207. % ViceIE._VALID_URL, webpage)]
  208. title = self._search_regex(
  209. r'<title>(.+?)</title>', webpage, 'title', default=None)
  210. if title:
  211. title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
  212. description = self._html_search_meta(
  213. 'description', webpage, 'description')
  214. return self.playlist_result(entries, show_id, title, description)
  215. class ViceArticleIE(InfoExtractor):
  216. IE_NAME = 'vice:article'
  217. _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
  218. _TESTS = [{
  219. 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
  220. 'info_dict': {
  221. 'id': '41eae2a47b174a1398357cec55f1f6fc',
  222. 'ext': 'mp4',
  223. 'title': 'Mormon War on Porn ',
  224. 'description': 'md5:6394a8398506581d0346b9ab89093fef',
  225. 'uploader': 'vice',
  226. 'uploader_id': '57a204088cb727dec794c67b',
  227. 'timestamp': 1491883129,
  228. 'upload_date': '20170411',
  229. 'age_limit': 17,
  230. },
  231. 'params': {
  232. # AES-encrypted m3u8
  233. 'skip_download': True,
  234. },
  235. 'add_ie': ['UplynkPreplay'],
  236. }, {
  237. 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
  238. 'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
  239. 'info_dict': {
  240. 'id': '3jstaBeXgAs',
  241. 'ext': 'mp4',
  242. 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
  243. 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
  244. 'uploader': 'Motherboard',
  245. 'uploader_id': 'MotherboardTV',
  246. 'upload_date': '20140529',
  247. },
  248. 'add_ie': ['Youtube'],
  249. }, {
  250. 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
  251. 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
  252. 'info_dict': {
  253. 'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
  254. 'ext': 'mp4',
  255. 'title': "Making The World's First Male Sex Doll",
  256. 'description': 'md5:916078ef0e032d76343116208b6cc2c4',
  257. 'uploader': 'vice',
  258. 'uploader_id': '57a204088cb727dec794c67b',
  259. 'timestamp': 1476919911,
  260. 'upload_date': '20161019',
  261. 'age_limit': 17,
  262. },
  263. 'params': {
  264. 'skip_download': True,
  265. },
  266. 'add_ie': [ViceIE.ie_key()],
  267. }, {
  268. 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
  269. 'only_matching': True,
  270. }, {
  271. 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
  272. 'only_matching': True,
  273. }]
  274. def _real_extract(self, url):
  275. display_id = self._match_id(url)
  276. webpage = self._download_webpage(url, display_id)
  277. prefetch_data = self._parse_json(self._search_regex(
  278. r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
  279. webpage, 'app state'), display_id)['pageData']
  280. body = prefetch_data['body']
  281. def _url_res(video_url, ie_key):
  282. return {
  283. '_type': 'url_transparent',
  284. 'url': video_url,
  285. 'display_id': display_id,
  286. 'ie_key': ie_key,
  287. }
  288. vice_url = ViceIE._extract_url(webpage)
  289. if vice_url:
  290. return _url_res(vice_url, ViceIE.ie_key())
  291. embed_code = self._search_regex(
  292. r'embedCode=([^&\'"]+)', body,
  293. 'ooyala embed code', default=None)
  294. if embed_code:
  295. return _url_res('ooyala:%s' % embed_code, 'Ooyala')
  296. youtube_url = YoutubeIE._extract_url(body)
  297. if youtube_url:
  298. return _url_res(youtube_url, YoutubeIE.ie_key())
  299. video_url = self._html_search_regex(
  300. r'data-video-url="([^"]+)"',
  301. prefetch_data['embed_code'], 'video URL')
  302. return _url_res(video_url, ViceIE.ie_key())