You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

392 lines
15 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import json
  5. import hashlib
  6. import uuid
  7. from .common import InfoExtractor
  8. from ..compat import (
  9. compat_urllib_parse,
  10. compat_urllib_request,
  11. )
  12. from ..utils import (
  13. ExtractorError,
  14. int_or_none,
  15. unified_strdate,
  16. )
  17. class SmotriIE(InfoExtractor):
  18. IE_DESC = 'Smotri.com'
  19. IE_NAME = 'smotri'
  20. _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
  21. _NETRC_MACHINE = 'smotri'
  22. _TESTS = [
  23. # real video id 2610366
  24. {
  25. 'url': 'http://smotri.com/video/view/?id=v261036632ab',
  26. 'md5': '2a7b08249e6f5636557579c368040eb9',
  27. 'info_dict': {
  28. 'id': 'v261036632ab',
  29. 'ext': 'mp4',
  30. 'title': 'катастрофа с камер видеонаблюдения',
  31. 'uploader': 'rbc2008',
  32. 'uploader_id': 'rbc08',
  33. 'upload_date': '20131118',
  34. 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
  35. },
  36. },
  37. # real video id 57591
  38. {
  39. 'url': 'http://smotri.com/video/view/?id=v57591cb20',
  40. 'md5': '830266dfc21f077eac5afd1883091bcd',
  41. 'info_dict': {
  42. 'id': 'v57591cb20',
  43. 'ext': 'flv',
  44. 'title': 'test',
  45. 'uploader': 'Support Photofile@photofile',
  46. 'uploader_id': 'support-photofile',
  47. 'upload_date': '20070704',
  48. 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
  49. },
  50. },
  51. # video-password
  52. {
  53. 'url': 'http://smotri.com/video/view/?id=v1390466a13c',
  54. 'md5': 'f6331cef33cad65a0815ee482a54440b',
  55. 'info_dict': {
  56. 'id': 'v1390466a13c',
  57. 'ext': 'mp4',
  58. 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
  59. 'uploader': 'timoxa40',
  60. 'uploader_id': 'timoxa40',
  61. 'upload_date': '20100404',
  62. 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
  63. },
  64. 'params': {
  65. 'videopassword': 'qwerty',
  66. },
  67. 'skip': 'Video is not approved by moderator',
  68. },
  69. # age limit + video-password
  70. {
  71. 'url': 'http://smotri.com/video/view/?id=v15408898bcf',
  72. 'md5': '91e909c9f0521adf5ee86fbe073aad70',
  73. 'info_dict': {
  74. 'id': 'v15408898bcf',
  75. 'ext': 'flv',
  76. 'title': 'этот ролик не покажут по ТВ',
  77. 'uploader': 'zzxxx',
  78. 'uploader_id': 'ueggb',
  79. 'upload_date': '20101001',
  80. 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
  81. 'age_limit': 18,
  82. },
  83. 'params': {
  84. 'videopassword': '333'
  85. },
  86. 'skip': 'Video is not approved by moderator',
  87. },
  88. # not approved by moderator, but available
  89. {
  90. 'url': 'http://smotri.com/video/view/?id=v28888533b73',
  91. 'md5': 'f44bc7adac90af518ef1ecf04893bb34',
  92. 'info_dict': {
  93. 'id': 'v28888533b73',
  94. 'ext': 'mp4',
  95. 'title': 'Russian Spies Killed By ISIL Child Soldier',
  96. 'uploader': 'Mopeder',
  97. 'uploader_id': 'mopeder',
  98. 'duration': 71,
  99. 'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
  100. 'upload_date': '20150114',
  101. },
  102. },
  103. # swf player
  104. {
  105. 'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
  106. 'md5': '4d47034979d9390d14acdf59c4935bc2',
  107. 'info_dict': {
  108. 'id': 'v9188090500',
  109. 'ext': 'mp4',
  110. 'title': 'Shakira - Don\'t Bother',
  111. 'uploader': 'HannahL',
  112. 'uploader_id': 'lisaha95',
  113. 'upload_date': '20090331',
  114. 'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
  115. },
  116. },
  117. ]
  118. @classmethod
  119. def _extract_url(cls, webpage):
  120. mobj = re.search(
  121. r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
  122. webpage)
  123. if mobj is not None:
  124. return mobj.group('url')
  125. mobj = re.search(
  126. r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
  127. <div\s+class="video_image">[^<]+</div>\s*
  128. <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
  129. if mobj is not None:
  130. return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
  131. def _search_meta(self, name, html, display_name=None):
  132. if display_name is None:
  133. display_name = name
  134. return self._html_search_regex(
  135. r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
  136. html, display_name, fatal=False)
  137. return self._html_search_meta(name, html, display_name)
  138. def _real_extract(self, url):
  139. video_id = self._match_id(url)
  140. video_form = {
  141. 'ticket': video_id,
  142. 'video_url': '1',
  143. 'frame_url': '1',
  144. 'devid': 'LoadupFlashPlayer',
  145. 'getvideoinfo': '1',
  146. }
  147. request = compat_urllib_request.Request(
  148. 'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form))
  149. request.add_header('Content-Type', 'application/x-www-form-urlencoded')
  150. video = self._download_json(request, video_id, 'Downloading video JSON')
  151. video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
  152. if not video_url:
  153. if video.get('_moderate_no') or not video.get('moderated'):
  154. raise ExtractorError(
  155. 'Video %s has not been approved by moderator' % video_id, expected=True)
  156. if video.get('error'):
  157. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  158. title = video['title']
  159. thumbnail = video['_imgURL']
  160. upload_date = unified_strdate(video['added'])
  161. uploader = video['userNick']
  162. uploader_id = video['userLogin']
  163. duration = int_or_none(video['duration'])
  164. # Video JSON does not provide enough meta data
  165. # We will extract some from the video web page instead
  166. webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
  167. webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
  168. # Warning if video is unavailable
  169. warning = self._html_search_regex(
  170. r'<div class="videoUnModer">(.*?)</div>', webpage,
  171. 'warning message', default=None)
  172. if warning is not None:
  173. self._downloader.report_warning(
  174. 'Video %s may not be available; smotri said: %s ' %
  175. (video_id, warning))
  176. # Adult content
  177. if re.search('EroConfirmText">', webpage) is not None:
  178. self.report_age_confirmation()
  179. confirm_string = self._html_search_regex(
  180. r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
  181. webpage, 'confirm string')
  182. confirm_url = webpage_url + '&confirm=%s' % confirm_string
  183. webpage = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
  184. adult_content = True
  185. else:
  186. adult_content = False
  187. view_count = self._html_search_regex(
  188. 'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
  189. webpage, 'view count', fatal=False, flags=re.MULTILINE | re.DOTALL)
  190. return {
  191. 'id': video_id,
  192. 'url': video_url,
  193. 'title': title,
  194. 'thumbnail': thumbnail,
  195. 'uploader': uploader,
  196. 'upload_date': upload_date,
  197. 'uploader_id': uploader_id,
  198. 'duration': duration,
  199. 'view_count': int_or_none(view_count),
  200. 'age_limit': 18 if adult_content else 0,
  201. }
  202. class SmotriCommunityIE(InfoExtractor):
  203. IE_DESC = 'Smotri.com community videos'
  204. IE_NAME = 'smotri:community'
  205. _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
  206. _TEST = {
  207. 'url': 'http://smotri.com/community/video/kommuna',
  208. 'info_dict': {
  209. 'id': 'kommuna',
  210. 'title': 'КПРФ',
  211. },
  212. 'playlist_mincount': 4,
  213. }
  214. def _real_extract(self, url):
  215. mobj = re.match(self._VALID_URL, url)
  216. community_id = mobj.group('communityid')
  217. url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
  218. rss = self._download_xml(url, community_id, 'Downloading community RSS')
  219. entries = [self.url_result(video_url.text, 'Smotri')
  220. for video_url in rss.findall('./channel/item/link')]
  221. description_text = rss.find('./channel/description').text
  222. community_title = self._html_search_regex(
  223. '^Видео сообщества "([^"]+)"$', description_text, 'community title')
  224. return self.playlist_result(entries, community_id, community_title)
  225. class SmotriUserIE(InfoExtractor):
  226. IE_DESC = 'Smotri.com user videos'
  227. IE_NAME = 'smotri:user'
  228. _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
  229. _TESTS = [{
  230. 'url': 'http://smotri.com/user/inspector',
  231. 'info_dict': {
  232. 'id': 'inspector',
  233. 'title': 'Inspector',
  234. },
  235. 'playlist_mincount': 9,
  236. }]
  237. def _real_extract(self, url):
  238. mobj = re.match(self._VALID_URL, url)
  239. user_id = mobj.group('userid')
  240. url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
  241. rss = self._download_xml(url, user_id, 'Downloading user RSS')
  242. entries = [self.url_result(video_url.text, 'Smotri')
  243. for video_url in rss.findall('./channel/item/link')]
  244. description_text = rss.find('./channel/description').text
  245. user_nickname = self._html_search_regex(
  246. '^Видео режиссера (.*)$', description_text,
  247. 'user nickname')
  248. return self.playlist_result(entries, user_id, user_nickname)
  249. class SmotriBroadcastIE(InfoExtractor):
  250. IE_DESC = 'Smotri.com broadcasts'
  251. IE_NAME = 'smotri:broadcast'
  252. _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
  253. def _real_extract(self, url):
  254. mobj = re.match(self._VALID_URL, url)
  255. broadcast_id = mobj.group('broadcastid')
  256. broadcast_url = 'http://' + mobj.group('url')
  257. broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
  258. if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
  259. raise ExtractorError(
  260. 'Broadcast %s does not exist' % broadcast_id, expected=True)
  261. # Adult content
  262. if re.search('EroConfirmText">', broadcast_page) is not None:
  263. (username, password) = self._get_login_info()
  264. if username is None:
  265. raise ExtractorError(
  266. 'Erotic broadcasts allowed only for registered users, '
  267. 'use --username and --password options to provide account credentials.',
  268. expected=True)
  269. login_form = {
  270. 'login-hint53': '1',
  271. 'confirm_erotic': '1',
  272. 'login': username,
  273. 'password': password,
  274. }
  275. request = compat_urllib_request.Request(
  276. broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
  277. request.add_header('Content-Type', 'application/x-www-form-urlencoded')
  278. broadcast_page = self._download_webpage(
  279. request, broadcast_id, 'Logging in and confirming age')
  280. if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
  281. raise ExtractorError('Unable to log in: bad username or password', expected=True)
  282. adult_content = True
  283. else:
  284. adult_content = False
  285. ticket = self._html_search_regex(
  286. r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
  287. broadcast_page, 'broadcast ticket')
  288. url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
  289. broadcast_password = self._downloader.params.get('videopassword', None)
  290. if broadcast_password:
  291. url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
  292. broadcast_json_page = self._download_webpage(
  293. url, broadcast_id, 'Downloading broadcast JSON')
  294. try:
  295. broadcast_json = json.loads(broadcast_json_page)
  296. protected_broadcast = broadcast_json['_pass_protected'] == 1
  297. if protected_broadcast and not broadcast_password:
  298. raise ExtractorError(
  299. 'This broadcast is protected by a password, use the --video-password option',
  300. expected=True)
  301. broadcast_offline = broadcast_json['is_play'] == 0
  302. if broadcast_offline:
  303. raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
  304. rtmp_url = broadcast_json['_server']
  305. mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
  306. if not mobj:
  307. raise ExtractorError('Unexpected broadcast rtmp URL')
  308. broadcast_playpath = broadcast_json['_streamName']
  309. broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
  310. broadcast_thumbnail = broadcast_json['_imgURL']
  311. broadcast_title = self._live_title(broadcast_json['title'])
  312. broadcast_description = broadcast_json['description']
  313. broadcaster_nick = broadcast_json['nick']
  314. broadcaster_login = broadcast_json['login']
  315. rtmp_conn = 'S:%s' % uuid.uuid4().hex
  316. except KeyError:
  317. if protected_broadcast:
  318. raise ExtractorError('Bad broadcast password', expected=True)
  319. raise ExtractorError('Unexpected broadcast JSON')
  320. return {
  321. 'id': broadcast_id,
  322. 'url': rtmp_url,
  323. 'title': broadcast_title,
  324. 'thumbnail': broadcast_thumbnail,
  325. 'description': broadcast_description,
  326. 'uploader': broadcaster_nick,
  327. 'uploader_id': broadcaster_login,
  328. 'age_limit': 18 if adult_content else 0,
  329. 'ext': 'flv',
  330. 'play_path': broadcast_playpath,
  331. 'player_url': 'http://pics.smotri.com/broadcast_play.swf',
  332. 'app': broadcast_app,
  333. 'rtmp_live': True,
  334. 'rtmp_conn': rtmp_conn,
  335. 'is_live': True,
  336. }