You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

252 lines
11 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # encoding: utf-8
  2. import re
  3. import json
  4. import hashlib
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. determine_ext,
  8. ExtractorError
  9. )
  10. class SmotriIE(InfoExtractor):
  11. IE_DESC = u'Smotri.com'
  12. IE_NAME = u'smotri'
  13. _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
  14. _TESTS = [
  15. # real video id 2610366
  16. {
  17. u'url': u'http://smotri.com/video/view/?id=v261036632ab',
  18. u'file': u'v261036632ab.mp4',
  19. u'md5': u'2a7b08249e6f5636557579c368040eb9',
  20. u'info_dict': {
  21. u'title': u'катастрофа с камер видеонаблюдения',
  22. u'uploader': u'rbc2008',
  23. u'uploader_id': u'rbc08',
  24. u'upload_date': u'20131118',
  25. u'description': u'катастрофа с камер видеонаблюдения, видео катастрофа с камер видеонаблюдения',
  26. u'thumbnail': u'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
  27. },
  28. },
  29. # real video id 57591
  30. {
  31. u'url': u'http://smotri.com/video/view/?id=v57591cb20',
  32. u'file': u'v57591cb20.flv',
  33. u'md5': u'830266dfc21f077eac5afd1883091bcd',
  34. u'info_dict': {
  35. u'title': u'test',
  36. u'uploader': u'Support Photofile@photofile',
  37. u'uploader_id': u'support-photofile',
  38. u'upload_date': u'20070704',
  39. u'description': u'test, видео test',
  40. u'thumbnail': u'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
  41. },
  42. },
  43. # video-password
  44. {
  45. u'url': u'http://smotri.com/video/view/?id=v1390466a13c',
  46. u'file': u'v1390466a13c.mp4',
  47. u'md5': u'f6331cef33cad65a0815ee482a54440b',
  48. u'info_dict': {
  49. u'title': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
  50. u'uploader': u'timoxa40',
  51. u'uploader_id': u'timoxa40',
  52. u'upload_date': u'20100404',
  53. u'thumbnail': u'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
  54. u'description': u'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1, видео TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
  55. },
  56. u'params': {
  57. u'videopassword': u'qwerty',
  58. },
  59. },
  60. # age limit + video-password
  61. {
  62. u'url': u'http://smotri.com/video/view/?id=v15408898bcf',
  63. u'file': u'v15408898bcf.flv',
  64. u'md5': u'91e909c9f0521adf5ee86fbe073aad70',
  65. u'info_dict': {
  66. u'title': u'этот ролик не покажут по ТВ',
  67. u'uploader': u'zzxxx',
  68. u'uploader_id': u'ueggb',
  69. u'upload_date': u'20101001',
  70. u'thumbnail': u'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
  71. u'age_limit': 18,
  72. u'description': u'этот ролик не покажут по ТВ, видео этот ролик не покажут по ТВ',
  73. },
  74. u'params': {
  75. u'videopassword': u'333'
  76. }
  77. }
  78. ]
  79. _SUCCESS = 0
  80. _PASSWORD_NOT_VERIFIED = 1
  81. _PASSWORD_DETECTED = 2
  82. _VIDEO_NOT_FOUND = 3
  83. def _search_meta(self, name, html, display_name=None):
  84. if display_name is None:
  85. display_name = name
  86. return self._html_search_regex(
  87. r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
  88. html, display_name, fatal=False)
  89. return self._html_search_meta(name, html, display_name)
  90. def _real_extract(self, url):
  91. mobj = re.match(self._VALID_URL, url)
  92. video_id = mobj.group('videoid')
  93. real_video_id = mobj.group('realvideoid')
  94. # Download video JSON data
  95. video_json_url = 'http://smotri.com/vt.php?id=%s' % real_video_id
  96. video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON')
  97. video_json = json.loads(video_json_page)
  98. status = video_json['status']
  99. if status == self._VIDEO_NOT_FOUND:
  100. raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
  101. elif status == self._PASSWORD_DETECTED: # The video is protected by a password, retry with
  102. # video-password set
  103. video_password = self._downloader.params.get('videopassword', None)
  104. if not video_password:
  105. raise ExtractorError(u'This video is protected by a password, use the --video-password option', expected=True)
  106. video_json_url += '&md5pass=%s' % hashlib.md5(video_password.encode('utf-8')).hexdigest()
  107. video_json_page = self._download_webpage(video_json_url, video_id, u'Downloading video JSON (video-password set)')
  108. video_json = json.loads(video_json_page)
  109. status = video_json['status']
  110. if status == self._PASSWORD_NOT_VERIFIED:
  111. raise ExtractorError(u'Video password is invalid', expected=True)
  112. if status != self._SUCCESS:
  113. raise ExtractorError(u'Unexpected status value %s' % status)
  114. # Extract the URL of the video
  115. video_url = video_json['file_data']
  116. # Video JSON does not provide enough meta data
  117. # We will extract some from the video web page instead
  118. video_page_url = 'http://' + mobj.group('url')
  119. video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
  120. # Adult content
  121. if re.search(u'EroConfirmText">', video_page) is not None:
  122. self.report_age_confirmation()
  123. confirm_string = self._html_search_regex(
  124. r'<a href="/video/view/\?id=%s&confirm=([^"]+)" title="[^"]+">' % video_id,
  125. video_page, u'confirm string')
  126. confirm_url = video_page_url + '&confirm=%s' % confirm_string
  127. video_page = self._download_webpage(confirm_url, video_id, u'Downloading video page (age confirmed)')
  128. adult_content = True
  129. else:
  130. adult_content = False
  131. # Extract the rest of meta data
  132. video_title = self._search_meta(u'name', video_page, u'title')
  133. if not video_title:
  134. video_title = video_url.rsplit('/', 1)[-1]
  135. video_description = self._search_meta(u'description', video_page)
  136. END_TEXT = u' на сайте Smotri.com'
  137. if video_description.endswith(END_TEXT):
  138. video_description = video_description[:-len(END_TEXT)]
  139. START_TEXT = u'Смотреть онлайн ролик '
  140. if video_description.startswith(START_TEXT):
  141. video_description = video_description[len(START_TEXT):]
  142. video_thumbnail = self._search_meta(u'thumbnail', video_page)
  143. upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
  144. upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
  145. video_upload_date = (
  146. (
  147. upload_date_m.group('year') +
  148. upload_date_m.group('month') +
  149. upload_date_m.group('day')
  150. )
  151. if upload_date_m else None
  152. )
  153. duration_str = self._search_meta(u'duration', video_page)
  154. duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
  155. video_duration = (
  156. (
  157. (int(duration_m.group('hours')) * 60 * 60) +
  158. (int(duration_m.group('minutes')) * 60) +
  159. int(duration_m.group('seconds'))
  160. )
  161. if duration_m else None
  162. )
  163. video_uploader = self._html_search_regex(
  164. u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
  165. video_page, u'uploader', fatal=False, flags=re.MULTILINE|re.DOTALL)
  166. video_uploader_id = self._html_search_regex(
  167. u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info\\(.*?\'([^\']+)\'\\);">',
  168. video_page, u'uploader id', fatal=False, flags=re.MULTILINE|re.DOTALL)
  169. video_view_count = self._html_search_regex(
  170. u'Общее количество просмотров.*?<span class="Number">(\\d+)</span>',
  171. video_page, u'view count', fatal=False, flags=re.MULTILINE|re.DOTALL)
  172. return {
  173. 'id': video_id,
  174. 'url': video_url,
  175. 'title': video_title,
  176. 'thumbnail': video_thumbnail,
  177. 'description': video_description,
  178. 'uploader': video_uploader,
  179. 'upload_date': video_upload_date,
  180. 'uploader_id': video_uploader_id,
  181. 'video_duration': video_duration,
  182. 'view_count': video_view_count,
  183. 'age_limit': 18 if adult_content else 0,
  184. 'video_page_url': video_page_url
  185. }
  186. class SmotriCommunityIE(InfoExtractor):
  187. IE_DESC = u'Smotri.com community videos'
  188. IE_NAME = u'smotri:community'
  189. _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
  190. def _real_extract(self, url):
  191. mobj = re.match(self._VALID_URL, url)
  192. community_id = mobj.group('communityid')
  193. url = 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id
  194. rss = self._download_xml(url, community_id, u'Downloading community RSS')
  195. entries = [self.url_result(video_url.text, 'Smotri')
  196. for video_url in rss.findall('./channel/item/link')]
  197. description_text = rss.find('./channel/description').text
  198. community_title = self._html_search_regex(
  199. u'^Видео сообщества "([^"]+)"$', description_text, u'community title')
  200. return self.playlist_result(entries, community_id, community_title)
  201. class SmotriUserIE(InfoExtractor):
  202. IE_DESC = u'Smotri.com user videos'
  203. IE_NAME = u'smotri:user'
  204. _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
  205. def _real_extract(self, url):
  206. mobj = re.match(self._VALID_URL, url)
  207. user_id = mobj.group('userid')
  208. url = 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id
  209. rss = self._download_xml(url, user_id, u'Downloading user RSS')
  210. entries = [self.url_result(video_url.text, 'Smotri')
  211. for video_url in rss.findall('./channel/item/link')]
  212. description_text = rss.find('./channel/description').text
  213. user_nickname = self._html_search_regex(
  214. u'^Видео режиссера (.*)$', description_text,
  215. u'user nickname')
  216. return self.playlist_result(entries, user_id, user_nickname)