You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

207 lines
6.9 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import datetime
  4. import re
  5. import time
  6. from .common import InfoExtractor
  7. from ..compat import (
  8. compat_urllib_parse,
  9. compat_urllib_request,
  10. compat_urlparse,
  11. )
  12. from ..utils import (
  13. determine_ext,
  14. ExtractorError,
  15. parse_iso8601,
  16. )
  17. class LetvIE(InfoExtractor):
  18. _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
  19. _TESTS = [{
  20. 'url': 'http://www.letv.com/ptv/vplay/22005890.html',
  21. 'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
  22. 'info_dict': {
  23. 'id': '22005890',
  24. 'ext': 'mp4',
  25. 'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
  26. 'timestamp': 1424747397,
  27. 'upload_date': '20150224',
  28. 'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
  29. }
  30. }, {
  31. 'url': 'http://www.letv.com/ptv/vplay/1415246.html',
  32. 'info_dict': {
  33. 'id': '1415246',
  34. 'ext': 'mp4',
  35. 'title': '美人天下01',
  36. 'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
  37. },
  38. }, {
  39. 'note': 'This video is available only in Mainland China, thus a proxy is needed',
  40. 'url': 'http://www.letv.com/ptv/vplay/1118082.html',
  41. 'md5': 'f80936fbe20fb2f58648e81386ff7927',
  42. 'info_dict': {
  43. 'id': '1118082',
  44. 'ext': 'mp4',
  45. 'title': '与龙共舞 完整版',
  46. 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
  47. },
  48. 'params': {
  49. 'cn_verification_proxy': 'http://proxy.uku.im:8888'
  50. },
  51. }]
  52. @staticmethod
  53. def urshift(val, n):
  54. return val >> n if val >= 0 else (val + 0x100000000) >> n
  55. # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
  56. def ror(self, param1, param2):
  57. _loc3_ = 0
  58. while _loc3_ < param2:
  59. param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
  60. _loc3_ += 1
  61. return param1
  62. def calc_time_key(self, param1):
  63. _loc2_ = 773625421
  64. _loc3_ = self.ror(param1, _loc2_ % 13)
  65. _loc3_ = _loc3_ ^ _loc2_
  66. _loc3_ = self.ror(_loc3_, _loc2_ % 17)
  67. return _loc3_
  68. def _real_extract(self, url):
  69. media_id = self._match_id(url)
  70. page = self._download_webpage(url, media_id)
  71. params = {
  72. 'id': media_id,
  73. 'platid': 1,
  74. 'splatid': 101,
  75. 'format': 1,
  76. 'tkey': self.calc_time_key(int(time.time())),
  77. 'domain': 'www.letv.com'
  78. }
  79. play_json_req = compat_urllib_request.Request(
  80. 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
  81. )
  82. cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
  83. if cn_verification_proxy:
  84. play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
  85. play_json = self._download_json(
  86. play_json_req,
  87. media_id, 'Downloading playJson data')
  88. # Check for errors
  89. playstatus = play_json['playstatus']
  90. if playstatus['status'] == 0:
  91. flag = playstatus['flag']
  92. if flag == 1:
  93. msg = 'Country %s auth error' % playstatus['country']
  94. else:
  95. msg = 'Generic error. flag = %d' % flag
  96. raise ExtractorError(msg, expected=True)
  97. playurl = play_json['playurl']
  98. formats = ['350', '1000', '1300', '720p', '1080p']
  99. dispatch = playurl['dispatch']
  100. urls = []
  101. for format_id in formats:
  102. if format_id in dispatch:
  103. media_url = playurl['domain'][0] + dispatch[format_id][0]
  104. # Mimic what flvxz.com do
  105. url_parts = list(compat_urlparse.urlparse(media_url))
  106. qs = dict(compat_urlparse.parse_qs(url_parts[4]))
  107. qs.update({
  108. 'platid': '14',
  109. 'splatid': '1401',
  110. 'tss': 'no',
  111. 'retry': 1
  112. })
  113. url_parts[4] = compat_urllib_parse.urlencode(qs)
  114. media_url = compat_urlparse.urlunparse(url_parts)
  115. url_info_dict = {
  116. 'url': media_url,
  117. 'ext': determine_ext(dispatch[format_id][1]),
  118. 'format_id': format_id,
  119. }
  120. if format_id[-1:] == 'p':
  121. url_info_dict['height'] = format_id[:-1]
  122. urls.append(url_info_dict)
  123. publish_time = parse_iso8601(self._html_search_regex(
  124. r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
  125. delimiter=' ', timezone=datetime.timedelta(hours=8))
  126. description = self._html_search_meta('description', page, fatal=False)
  127. return {
  128. 'id': media_id,
  129. 'formats': urls,
  130. 'title': playurl['title'],
  131. 'thumbnail': playurl['pic'],
  132. 'description': description,
  133. 'timestamp': publish_time,
  134. }
  135. class LetvTvIE(InfoExtractor):
  136. _VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
  137. _TESTS = [{
  138. 'url': 'http://www.letv.com/tv/46177.html',
  139. 'info_dict': {
  140. 'id': '46177',
  141. 'title': '美人天下',
  142. 'description': 'md5:395666ff41b44080396e59570dbac01c'
  143. },
  144. 'playlist_count': 35
  145. }]
  146. def _real_extract(self, url):
  147. playlist_id = self._match_id(url)
  148. page = self._download_webpage(url, playlist_id)
  149. media_urls = list(set(re.findall(
  150. r'http://www.letv.com/ptv/vplay/\d+.html', page)))
  151. entries = [self.url_result(media_url, ie='Letv')
  152. for media_url in media_urls]
  153. title = self._html_search_meta('keywords', page,
  154. fatal=False).split('')[0]
  155. description = self._html_search_meta('description', page, fatal=False)
  156. return self.playlist_result(entries, playlist_id, playlist_title=title,
  157. playlist_description=description)
  158. class LetvPlaylistIE(LetvTvIE):
  159. _VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
  160. _TESTS = [{
  161. 'url': 'http://tv.letv.com/izt/wuzetian/index.html',
  162. 'info_dict': {
  163. 'id': 'wuzetian',
  164. 'title': '武媚娘传奇',
  165. 'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
  166. },
  167. # This playlist contains some extra videos other than the drama itself
  168. 'playlist_mincount': 96
  169. }, {
  170. 'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
  171. 'info_dict': {
  172. 'id': 'lswjzzjc',
  173. # The title should be "劲舞青春", but I can't find a simple way to
  174. # determine the playlist title
  175. 'title': '乐视午间自制剧场',
  176. 'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
  177. },
  178. 'playlist_mincount': 7
  179. }]