You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
6.9 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import datetime
  4. import re
  5. import time
  6. from .common import InfoExtractor
  7. from ..compat import (
  8. compat_urllib_parse,
  9. compat_urllib_request,
  10. compat_urlparse,
  11. )
  12. from ..utils import (
  13. determine_ext,
  14. ExtractorError,
  15. parse_iso8601,
  16. )
  17. class LetvIE(InfoExtractor):
  18. IE_DESC = '乐视网'
  19. _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
  20. _TESTS = [{
  21. 'url': 'http://www.letv.com/ptv/vplay/22005890.html',
  22. 'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
  23. 'info_dict': {
  24. 'id': '22005890',
  25. 'ext': 'mp4',
  26. 'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
  27. 'timestamp': 1424747397,
  28. 'upload_date': '20150224',
  29. 'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
  30. }
  31. }, {
  32. 'url': 'http://www.letv.com/ptv/vplay/1415246.html',
  33. 'info_dict': {
  34. 'id': '1415246',
  35. 'ext': 'mp4',
  36. 'title': '美人天下01',
  37. 'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
  38. },
  39. }, {
  40. 'note': 'This video is available only in Mainland China, thus a proxy is needed',
  41. 'url': 'http://www.letv.com/ptv/vplay/1118082.html',
  42. 'md5': 'f80936fbe20fb2f58648e81386ff7927',
  43. 'info_dict': {
  44. 'id': '1118082',
  45. 'ext': 'mp4',
  46. 'title': '与龙共舞 完整版',
  47. 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
  48. },
  49. 'skip': 'Only available in China',
  50. }]
  51. @staticmethod
  52. def urshift(val, n):
  53. return val >> n if val >= 0 else (val + 0x100000000) >> n
  54. # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
  55. def ror(self, param1, param2):
  56. _loc3_ = 0
  57. while _loc3_ < param2:
  58. param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
  59. _loc3_ += 1
  60. return param1
  61. def calc_time_key(self, param1):
  62. _loc2_ = 773625421
  63. _loc3_ = self.ror(param1, _loc2_ % 13)
  64. _loc3_ = _loc3_ ^ _loc2_
  65. _loc3_ = self.ror(_loc3_, _loc2_ % 17)
  66. return _loc3_
  67. def _real_extract(self, url):
  68. media_id = self._match_id(url)
  69. page = self._download_webpage(url, media_id)
  70. params = {
  71. 'id': media_id,
  72. 'platid': 1,
  73. 'splatid': 101,
  74. 'format': 1,
  75. 'tkey': self.calc_time_key(int(time.time())),
  76. 'domain': 'www.letv.com'
  77. }
  78. play_json_req = compat_urllib_request.Request(
  79. 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
  80. )
  81. cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
  82. if cn_verification_proxy:
  83. play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
  84. play_json = self._download_json(
  85. play_json_req,
  86. media_id, 'Downloading playJson data')
  87. # Check for errors
  88. playstatus = play_json['playstatus']
  89. if playstatus['status'] == 0:
  90. flag = playstatus['flag']
  91. if flag == 1:
  92. msg = 'Country %s auth error' % playstatus['country']
  93. else:
  94. msg = 'Generic error. flag = %d' % flag
  95. raise ExtractorError(msg, expected=True)
  96. playurl = play_json['playurl']
  97. formats = ['350', '1000', '1300', '720p', '1080p']
  98. dispatch = playurl['dispatch']
  99. urls = []
  100. for format_id in formats:
  101. if format_id in dispatch:
  102. media_url = playurl['domain'][0] + dispatch[format_id][0]
  103. # Mimic what flvxz.com do
  104. url_parts = list(compat_urlparse.urlparse(media_url))
  105. qs = dict(compat_urlparse.parse_qs(url_parts[4]))
  106. qs.update({
  107. 'platid': '14',
  108. 'splatid': '1401',
  109. 'tss': 'no',
  110. 'retry': 1
  111. })
  112. url_parts[4] = compat_urllib_parse.urlencode(qs)
  113. media_url = compat_urlparse.urlunparse(url_parts)
  114. url_info_dict = {
  115. 'url': media_url,
  116. 'ext': determine_ext(dispatch[format_id][1]),
  117. 'format_id': format_id,
  118. }
  119. if format_id[-1:] == 'p':
  120. url_info_dict['height'] = format_id[:-1]
  121. urls.append(url_info_dict)
  122. publish_time = parse_iso8601(self._html_search_regex(
  123. r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
  124. delimiter=' ', timezone=datetime.timedelta(hours=8))
  125. description = self._html_search_meta('description', page, fatal=False)
  126. return {
  127. 'id': media_id,
  128. 'formats': urls,
  129. 'title': playurl['title'],
  130. 'thumbnail': playurl['pic'],
  131. 'description': description,
  132. 'timestamp': publish_time,
  133. }
  134. class LetvTvIE(InfoExtractor):
  135. _VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
  136. _TESTS = [{
  137. 'url': 'http://www.letv.com/tv/46177.html',
  138. 'info_dict': {
  139. 'id': '46177',
  140. 'title': '美人天下',
  141. 'description': 'md5:395666ff41b44080396e59570dbac01c'
  142. },
  143. 'playlist_count': 35
  144. }]
  145. def _real_extract(self, url):
  146. playlist_id = self._match_id(url)
  147. page = self._download_webpage(url, playlist_id)
  148. media_urls = list(set(re.findall(
  149. r'http://www.letv.com/ptv/vplay/\d+.html', page)))
  150. entries = [self.url_result(media_url, ie='Letv')
  151. for media_url in media_urls]
  152. title = self._html_search_meta('keywords', page,
  153. fatal=False).split('')[0]
  154. description = self._html_search_meta('description', page, fatal=False)
  155. return self.playlist_result(entries, playlist_id, playlist_title=title,
  156. playlist_description=description)
  157. class LetvPlaylistIE(LetvTvIE):
  158. _VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
  159. _TESTS = [{
  160. 'url': 'http://tv.letv.com/izt/wuzetian/index.html',
  161. 'info_dict': {
  162. 'id': 'wuzetian',
  163. 'title': '武媚娘传奇',
  164. 'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
  165. },
  166. # This playlist contains some extra videos other than the drama itself
  167. 'playlist_mincount': 96
  168. }, {
  169. 'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
  170. 'info_dict': {
  171. 'id': 'lswjzzjc',
  172. # The title should be "劲舞青春", but I can't find a simple way to
  173. # determine the playlist title
  174. 'title': '乐视午间自制剧场',
  175. 'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
  176. },
  177. 'playlist_mincount': 7
  178. }]