You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

420 lines
18 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import os.path
  4. import re
  5. import json
  6. import hashlib
  7. import uuid
  8. from .common import InfoExtractor
  9. from ..utils import (
  10. compat_urllib_parse,
  11. compat_urllib_request,
  12. ExtractorError,
  13. url_basename,
  14. int_or_none,
  15. )
  16. class SmotriIE(InfoExtractor):
  17. IE_DESC = 'Smotri.com'
  18. IE_NAME = 'smotri'
  19. _VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
  20. _NETRC_MACHINE = 'smotri'
  21. _TESTS = [
  22. # real video id 2610366
  23. {
  24. 'url': 'http://smotri.com/video/view/?id=v261036632ab',
  25. 'md5': '2a7b08249e6f5636557579c368040eb9',
  26. 'info_dict': {
  27. 'id': 'v261036632ab',
  28. 'ext': 'mp4',
  29. 'title': 'катастрофа с камер видеонаблюдения',
  30. 'uploader': 'rbc2008',
  31. 'uploader_id': 'rbc08',
  32. 'upload_date': '20131118',
  33. 'description': 'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
  34. 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
  35. },
  36. },
  37. # real video id 57591
  38. {
  39. 'url': 'http://smotri.com/video/view/?id=v57591cb20',
  40. 'md5': '830266dfc21f077eac5afd1883091bcd',
  41. 'info_dict': {
  42. 'id': 'v57591cb20',
  43. 'ext': 'flv',
  44. 'title': 'test',
  45. 'uploader': 'Support Photofile@photofile',
  46. 'uploader_id': 'support-photofile',
  47. 'upload_date': '20070704',
  48. 'description': 'test, видео test',
  49. 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
  50. },
  51. },
  52. # video-password
  53. {
  54. 'url': 'http://smotri.com/video/view/?id=v1390466a13c',
  55. 'md5': 'f6331cef33cad65a0815ee482a54440b',
  56. 'info_dict': {
  57. 'id': 'v1390466a13c',
  58. 'ext': 'mp4',
  59. 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
  60. 'uploader': 'timoxa40',
  61. 'uploader_id': 'timoxa40',
  62. 'upload_date': '20100404',
  63. 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
  64. 'description': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
  65. },
  66. 'params': {
  67. 'videopassword': 'qwerty',
  68. },
  69. },
  70. # age limit + video-password
  71. {
  72. 'url': 'http://smotri.com/video/view/?id=v15408898bcf',
  73. 'md5': '91e909c9f0521adf5ee86fbe073aad70',
  74. 'info_dict': {
  75. 'id': 'v15408898bcf',
  76. 'ext': 'flv',
  77. 'title': 'этот ролик не покажут по ТВ',
  78. 'uploader': 'zzxxx',
  79. 'uploader_id': 'ueggb',
  80. 'upload_date': '20101001',
  81. 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
  82. 'age_limit': 18,
  83. 'description': 'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
  84. },
  85. 'params': {
  86. 'videopassword': '333'
  87. }
  88. },
  89. # swf player
  90. {
  91. 'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
  92. 'md5': '4d47034979d9390d14acdf59c4935bc2',
  93. 'info_dict': {
  94. 'id': 'v9188090500',
  95. 'ext': 'mp4',
  96. 'title': 'Shakira - Don\'t Bother',
  97. 'uploader': 'HannahL',
  98. 'uploader_id': 'lisaha95',
  99. 'upload_date': '20090331',
  100. 'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
  101. 'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
  102. },
  103. },
  104. ]
  105. _SUCCESS = 0
  106. _PASSWORD_NOT_VERIFIED = 1
  107. _PASSWORD_DETECTED = 2
  108. _VIDEO_NOT_FOUND = 3
  109. @classmethod
  110. def _extract_url(cls, webpage):
  111. mobj = re.search(
  112. r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
  113. webpage)
  114. if mobj is not None:
  115. return mobj.group('url')
  116. mobj = re.search(
  117. r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
  118. <div\s+class="video_image">[^<]+</div>\s*
  119. <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
  120. if mobj is not None:
  121. return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
  122. def _search_meta(self, name, html, display_name=None):
  123. if display_name is None:
  124. display_name = name
  125. return self._html_search_regex(
  126. r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
  127. html, display_name, fatal=False)
  128. return self._html_search_meta(name, html, display_name)
  129. def _real_extract(self, url):
  130. mobj = re.match(self._VALID_URL, url)
  131. video_id = mobj.group('videoid')
  132. real_video_id = mobj.group('realvideoid')
  133. # Download video JSON data
  134. video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
  135. video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON')
  136. video_json = json.loads(video_json_page)
  137. status = video_json['status']
  138. if status == self._VIDEO_NOT_FOUND:
  139. raise ExtractorError('Video %s does not exist' % video_id, expected=True)
  140. elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
  141. # video-password set
  142. video_password = self._downloader.params.get('videopassword', None)
  143. if not video_password:
  144. raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
  145. video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
  146. video_json_page = self._download_webpage(video_json_url, video_id, 'Downloading video JSON (video-password set)')
  147. video_json = json.loads(video_json_page)
  148. status = video_json['status']
  149. if status == self._PASSWORD_NOT_VERIFIED:
  150. raise ExtractorError('Video password is invalid', expected=True)
  151. if status != self._SUCCESS:
  152. raise ExtractorError('Unexpected status value %s' % status)
  153. # Extract the URL of the video
  154. video_url = video_json['file_data']
  155. # Video JSON does not provide enough meta data
  156. # We will extract some from the video web page instead
  157. video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
  158. video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
  159. # Warning if video is unavailable
  160. warning = self._html_search_regex(
  161. r'<div class="videoUnModer">(.*?)</div>', video_page,
  162. 'warning message', default=None)
  163. if warning is not None:
  164. self._downloader.report_warning(
  165. 'Video %s may not be available; smotri said: %s ' %
  166. (video_id, warning))
  167. # Adult content
  168. if re.search('EroConfirmText">', video_page) is not None:
  169. self.report_age_confirmation()
  170. confirm_string = self._html_search_regex(
  171. r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
  172. video_page, 'confirm string')
  173. confirm_url = video_page_url + '&confirm=%s' % confirm_string
  174. video_page = self._download_webpage(confirm_url, video_id, 'Downloading video page (age confirmed)')
  175. adult_content = True
  176. else:
  177. adult_content = False
  178. # Extract the rest of meta data
  179. video_title = self._search_meta('name', video_page, 'title')
  180. if not video_title:
  181. video_title = os.path.splitext(url_basename(video_url))[0]
  182. video_description = self._search_meta('description', video_page)
  183. END_TEXT = ' на сайте Smotri.com'
  184. if video_description and video_description.endswith(END_TEXT):
  185. video_description = video_description[:-len(END_TEXT)]
  186. START_TEXT = 'Смотреть онлайн ролик '
  187. if video_description and video_description.startswith(START_TEXT):
  188. video_description = video_description[len(START_TEXT):]
  189. video_thumbnail = self._search_meta('thumbnail', video_page)
  190. upload_date_str = self._search_meta('uploadDate', video_page, 'upload date')
  191. if upload_date_str:
  192. upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
  193. video_upload_date = (
  194. (
  195. upload_date_m.group('year') +
  196. upload_date_m.group('month') +
  197. upload_date_m.group('day')
  198. )
  199. if upload_date_m else None
  200. )
  201. else:
  202. video_upload_date = None
  203. duration_str = self._search_meta('duration', video_page)
  204. if duration_str:
  205. duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
  206. video_duration = (
  207. (
  208. (int(duration_m.group('hours')) * 60 * 60) +
  209. (int(duration_m.group('minutes')) * 60) +
  210. int(duration_m.group('seconds'))
  211. )
  212. if duration_m else None
  213. )
  214. else:
  215. video_duration = None
  216. video_uploader = self._html_search_regex(
  217. '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
  218. video_page, 'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
  219. video_uploader_id = self._html_search_regex(
  220. '<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
  221. video_page, 'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
  222. video_view_count = self._html_search_regex(
  223. 'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
  224. video_page, 'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
  225. return {
  226. 'id': video_id,
  227. 'url': video_url,
  228. 'title': video_title,
  229. 'thumbnail': video_thumbnail,
  230. 'description': video_description,
  231. 'uploader': video_uploader,
  232. 'upload_date': video_upload_date,
  233. 'uploader_id': video_uploader_id,
  234. 'duration': video_duration,
  235. 'view_count': int_or_none(video_view_count),
  236. 'age_limit': 18 if adult_content else 0,
  237. 'video_page_url': video_page_url
  238. }
  239. class SmotriCommunityIE(InfoExtractor):
  240. IE_DESC = 'Smotri.com community videos'
  241. IE_NAME = 'smotri:community'
  242. _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
  243. _TEST = {
  244. 'url': 'http://smotri.com/community/video/kommuna',
  245. 'info_dict': {
  246. 'id': 'kommuna',
  247. 'title': 'КПРФ',
  248. },
  249. 'playlist_mincount': 4,
  250. }
  251. def _real_extract(self, url):
  252. mobj = re.match(self._VALID_URL, url)
  253. community_id = mobj.group('communityid')
  254. url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
  255. rss = self._download_xml(url, community_id, 'Downloading community RSS')
  256. entries = [self.url_result(video_url.text, 'Smotri')
  257. for video_url in rss.findall('./channel/item/link')]
  258. description_text = rss.find('./channel/description').text
  259. community_title = self._html_search_regex(
  260. '^Видео сообщества "([^"]+)"$', description_text, 'community title')
  261. return self.playlist_result(entries, community_id, community_title)
  262. class SmotriUserIE(InfoExtractor):
  263. IE_DESC = 'Smotri.com user videos'
  264. IE_NAME = 'smotri:user'
  265. _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
  266. _TESTS = [{
  267. 'url': 'http://smotri.com/user/inspector',
  268. 'info_dict': {
  269. 'id': 'inspector',
  270. 'title': 'Inspector',
  271. },
  272. 'playlist_mincount': 9,
  273. }]
  274. def _real_extract(self, url):
  275. mobj = re.match(self._VALID_URL, url)
  276. user_id = mobj.group('userid')
  277. url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
  278. rss = self._download_xml(url, user_id, 'Downloading user RSS')
  279. entries = [self.url_result(video_url.text, 'Smotri')
  280. for video_url in rss.findall('./channel/item/link')]
  281. description_text = rss.find('./channel/description').text
  282. user_nickname = self._html_search_regex(
  283. '^Видео режиссера (.*)$', description_text,
  284. 'user nickname')
  285. return self.playlist_result(entries, user_id, user_nickname)
  286. class SmotriBroadcastIE(InfoExtractor):
  287. IE_DESC = 'Smotri.com broadcasts'
  288. IE_NAME = 'smotri:broadcast'
  289. _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<broadcastid>[^/]+))/?.*'
  290. def _real_extract(self, url):
  291. mobj = re.match(self._VALID_URL, url)
  292. broadcast_id = mobj.group('broadcastid')
  293. broadcast_url = 'http://' + mobj.group('url')
  294. broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
  295. if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
  296. raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
  297. # Adult content
  298. if re.search('EroConfirmText">', broadcast_page) is not None:
  299. (username, password) = self._get_login_info()
  300. if username is None:
  301. raise ExtractorError('Erotic broadcasts allowed only for registered users, '
  302. 'use --username and --password options to provide account credentials.', expected=True)
  303. login_form = {
  304. 'login-hint53': '1',
  305. 'confirm_erotic': '1',
  306. 'login': username,
  307. 'password': password,
  308. }
  309. request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
  310. request.add_header('Content-Type', 'application/x-www-form-urlencoded')
  311. broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
  312. if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
  313. raise ExtractorError('Unable to log in: bad username or password', expected=True)
  314. adult_content = True
  315. else:
  316. adult_content = False
  317. ticket = self._html_search_regex(
  318. 'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
  319. broadcast_page, 'broadcast ticket')
  320. url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
  321. broadcast_password = self._downloader.params.get('videopassword', None)
  322. if broadcast_password:
  323. url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
  324. broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
  325. try:
  326. broadcast_json = json.loads(broadcast_json_page)
  327. protected_broadcast = broadcast_json['_pass_protected'] == 1
  328. if protected_broadcast and not broadcast_password:
  329. raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
  330. broadcast_offline = broadcast_json['is_play'] == 0
  331. if broadcast_offline:
  332. raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
  333. rtmp_url = broadcast_json['_server']
  334. if not rtmp_url.startswith('rtmp://'):
  335. raise ExtractorError('Unexpected broadcast rtmp URL')
  336. broadcast_playpath = broadcast_json['_streamName']
  337. broadcast_thumbnail = broadcast_json['_imgURL']
  338. broadcast_title = broadcast_json['title']
  339. broadcast_description = broadcast_json['description']
  340. broadcaster_nick = broadcast_json['nick']
  341. broadcaster_login = broadcast_json['login']
  342. rtmp_conn = 'S:%s' % uuid.uuid4().hex
  343. except KeyError:
  344. if protected_broadcast:
  345. raise ExtractorError('Bad broadcast password', expected=True)
  346. raise ExtractorError('Unexpected broadcast JSON')
  347. return {
  348. 'id': broadcast_id,
  349. 'url': rtmp_url,
  350. 'title': broadcast_title,
  351. 'thumbnail': broadcast_thumbnail,
  352. 'description': broadcast_description,
  353. 'uploader': broadcaster_nick,
  354. 'uploader_id': broadcaster_login,
  355. 'age_limit': 18 if adult_content else 0,
  356. 'ext': 'flv',
  357. 'play_path': broadcast_playpath,
  358. 'rtmp_live': True,
  359. 'rtmp_conn': rtmp_conn
  360. }