You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
7.7 KiB

11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
10 years ago
11 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_str,
  7. compat_urllib_parse,
  8. compat_urllib_request,
  9. )
  10. from ..utils import (
  11. ExtractorError,
  12. int_or_none,
  13. )
  14. class LyndaIE(InfoExtractor):
  15. IE_NAME = 'lynda'
  16. IE_DESC = 'lynda.com videos'
  17. _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
  18. _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
  19. _NETRC_MACHINE = 'lynda'
  20. _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
  21. _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
  22. ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
  23. _TEST = {
  24. 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
  25. 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
  26. 'info_dict': {
  27. 'id': '114408',
  28. 'ext': 'mp4',
  29. 'title': 'Using the exercise files',
  30. 'duration': 68
  31. }
  32. }
  33. def _real_initialize(self):
  34. self._login()
  35. def _real_extract(self, url):
  36. mobj = re.match(self._VALID_URL, url)
  37. video_id = mobj.group(1)
  38. page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
  39. 'Downloading video JSON')
  40. video_json = json.loads(page)
  41. if 'Status' in video_json:
  42. raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
  43. if video_json['HasAccess'] is False:
  44. raise ExtractorError(
  45. 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
  46. video_id = compat_str(video_json['ID'])
  47. duration = video_json['DurationInSeconds']
  48. title = video_json['Title']
  49. formats = []
  50. fmts = video_json.get('Formats')
  51. if fmts:
  52. formats.extend([
  53. {
  54. 'url': fmt['Url'],
  55. 'ext': fmt['Extension'],
  56. 'width': fmt['Width'],
  57. 'height': fmt['Height'],
  58. 'filesize': fmt['FileSize'],
  59. 'format_id': str(fmt['Resolution'])
  60. } for fmt in fmts])
  61. prioritized_streams = video_json.get('PrioritizedStreams')
  62. if prioritized_streams:
  63. formats.extend([
  64. {
  65. 'url': video_url,
  66. 'width': int_or_none(format_id),
  67. 'format_id': format_id,
  68. } for format_id, video_url in prioritized_streams['0'].items()
  69. ])
  70. self._check_formats(formats, video_id)
  71. self._sort_formats(formats)
  72. subtitles = self.extract_subtitles(video_id, page)
  73. return {
  74. 'id': video_id,
  75. 'title': title,
  76. 'duration': duration,
  77. 'subtitles': subtitles,
  78. 'formats': formats
  79. }
  80. def _login(self):
  81. (username, password) = self._get_login_info()
  82. if username is None:
  83. return
  84. login_form = {
  85. 'username': username,
  86. 'password': password,
  87. 'remember': 'false',
  88. 'stayPut': 'false'
  89. }
  90. request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
  91. login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
  92. # Not (yet) logged in
  93. m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
  94. if m is not None:
  95. response = m.group('json')
  96. response_json = json.loads(response)
  97. state = response_json['state']
  98. if state == 'notlogged':
  99. raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
  100. # This is when we get popup:
  101. # > You're already logged in to lynda.com on two devices.
  102. # > If you log in here, we'll log you out of another device.
  103. # So, we need to confirm this.
  104. if state == 'conflicted':
  105. confirm_form = {
  106. 'username': '',
  107. 'password': '',
  108. 'resolve': 'true',
  109. 'remember': 'false',
  110. 'stayPut': 'false',
  111. }
  112. request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
  113. login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
  114. if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
  115. raise ExtractorError('Unable to log in')
  116. def _fix_subtitles(self, subs):
  117. srt = ''
  118. for pos in range(0, len(subs) - 1):
  119. seq_current = subs[pos]
  120. m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
  121. if m_current is None:
  122. continue
  123. seq_next = subs[pos + 1]
  124. m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
  125. if m_next is None:
  126. continue
  127. appear_time = m_current.group('timecode')
  128. disappear_time = m_next.group('timecode')
  129. text = seq_current['Caption']
  130. srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
  131. if srt:
  132. return srt
  133. def _get_subtitles(self, video_id, webpage):
  134. url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
  135. subs = self._download_json(url, None, False)
  136. if subs:
  137. return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
  138. else:
  139. return {}
  140. class LyndaCourseIE(InfoExtractor):
  141. IE_NAME = 'lynda:course'
  142. IE_DESC = 'lynda.com online courses'
  143. # Course link equals to welcome/introduction video link of same course
  144. # We will recognize it as course link
  145. _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
  146. def _real_extract(self, url):
  147. mobj = re.match(self._VALID_URL, url)
  148. course_path = mobj.group('coursepath')
  149. course_id = mobj.group('courseid')
  150. page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
  151. course_id, 'Downloading course JSON')
  152. course_json = json.loads(page)
  153. if 'Status' in course_json and course_json['Status'] == 'NotFound':
  154. raise ExtractorError('Course %s does not exist' % course_id, expected=True)
  155. unaccessible_videos = 0
  156. videos = []
  157. (username, _) = self._get_login_info()
  158. # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
  159. # by single video API anymore
  160. for chapter in course_json['Chapters']:
  161. for video in chapter['Videos']:
  162. if username is None and video['HasAccess'] is False:
  163. unaccessible_videos += 1
  164. continue
  165. videos.append(video['ID'])
  166. if unaccessible_videos > 0:
  167. self._downloader.report_warning('%s videos are only available for members and will not be downloaded. '
  168. % unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT)
  169. entries = [
  170. self.url_result('http://www.lynda.com/%s/%s-4.html' %
  171. (course_path, video_id),
  172. 'Lynda')
  173. for video_id in videos]
  174. course_title = course_json['Title']
  175. return self.playlist_result(entries, course_id, course_title)