You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

218 lines
7.9 KiB

11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
10 years ago
11 years ago
10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. import json
  4. from .common import InfoExtractor
  5. from ..compat import (
  6. compat_str,
  7. compat_urllib_parse,
  8. compat_urllib_request,
  9. )
  10. from ..utils import (
  11. ExtractorError,
  12. int_or_none,
  13. )
  14. class LyndaIE(InfoExtractor):
  15. IE_NAME = 'lynda'
  16. IE_DESC = 'lynda.com videos'
  17. _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
  18. _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
  19. _NETRC_MACHINE = 'lynda'
  20. _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
  21. _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
  22. ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
  23. _TESTS = [{
  24. 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
  25. 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
  26. 'info_dict': {
  27. 'id': '114408',
  28. 'ext': 'mp4',
  29. 'title': 'Using the exercise files',
  30. 'duration': 68
  31. }
  32. }, {
  33. 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
  34. 'only_matching': True,
  35. }]
  36. def _real_initialize(self):
  37. self._login()
  38. def _real_extract(self, url):
  39. mobj = re.match(self._VALID_URL, url)
  40. video_id = mobj.group(1)
  41. page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
  42. 'Downloading video JSON')
  43. video_json = json.loads(page)
  44. if 'Status' in video_json:
  45. raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
  46. if video_json['HasAccess'] is False:
  47. raise ExtractorError(
  48. 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
  49. video_id = compat_str(video_json['ID'])
  50. duration = video_json['DurationInSeconds']
  51. title = video_json['Title']
  52. formats = []
  53. fmts = video_json.get('Formats')
  54. if fmts:
  55. formats.extend([
  56. {
  57. 'url': fmt['Url'],
  58. 'ext': fmt['Extension'],
  59. 'width': fmt['Width'],
  60. 'height': fmt['Height'],
  61. 'filesize': fmt['FileSize'],
  62. 'format_id': str(fmt['Resolution'])
  63. } for fmt in fmts])
  64. prioritized_streams = video_json.get('PrioritizedStreams')
  65. if prioritized_streams:
  66. formats.extend([
  67. {
  68. 'url': video_url,
  69. 'width': int_or_none(format_id),
  70. 'format_id': format_id,
  71. } for format_id, video_url in prioritized_streams['0'].items()
  72. ])
  73. self._check_formats(formats, video_id)
  74. self._sort_formats(formats)
  75. subtitles = self.extract_subtitles(video_id, page)
  76. return {
  77. 'id': video_id,
  78. 'title': title,
  79. 'duration': duration,
  80. 'subtitles': subtitles,
  81. 'formats': formats
  82. }
  83. def _login(self):
  84. (username, password) = self._get_login_info()
  85. if username is None:
  86. return
  87. login_form = {
  88. 'username': username,
  89. 'password': password,
  90. 'remember': 'false',
  91. 'stayPut': 'false'
  92. }
  93. request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
  94. login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
  95. # Not (yet) logged in
  96. m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
  97. if m is not None:
  98. response = m.group('json')
  99. response_json = json.loads(response)
  100. state = response_json['state']
  101. if state == 'notlogged':
  102. raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
  103. # This is when we get popup:
  104. # > You're already logged in to lynda.com on two devices.
  105. # > If you log in here, we'll log you out of another device.
  106. # So, we need to confirm this.
  107. if state == 'conflicted':
  108. confirm_form = {
  109. 'username': '',
  110. 'password': '',
  111. 'resolve': 'true',
  112. 'remember': 'false',
  113. 'stayPut': 'false',
  114. }
  115. request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
  116. login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
  117. if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
  118. raise ExtractorError('Unable to log in')
  119. def _fix_subtitles(self, subs):
  120. srt = ''
  121. for pos in range(0, len(subs) - 1):
  122. seq_current = subs[pos]
  123. m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
  124. if m_current is None:
  125. continue
  126. seq_next = subs[pos + 1]
  127. m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
  128. if m_next is None:
  129. continue
  130. appear_time = m_current.group('timecode')
  131. disappear_time = m_next.group('timecode')
  132. text = seq_current['Caption'].lstrip()
  133. srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
  134. if srt:
  135. return srt
  136. def _get_subtitles(self, video_id, webpage):
  137. url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
  138. subs = self._download_json(url, None, False)
  139. if subs:
  140. return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
  141. else:
  142. return {}
  143. class LyndaCourseIE(InfoExtractor):
  144. IE_NAME = 'lynda:course'
  145. IE_DESC = 'lynda.com online courses'
  146. # Course link equals to welcome/introduction video link of same course
  147. # We will recognize it as course link
  148. _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
  149. def _real_extract(self, url):
  150. mobj = re.match(self._VALID_URL, url)
  151. course_path = mobj.group('coursepath')
  152. course_id = mobj.group('courseid')
  153. page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
  154. course_id, 'Downloading course JSON')
  155. course_json = json.loads(page)
  156. if 'Status' in course_json and course_json['Status'] == 'NotFound':
  157. raise ExtractorError('Course %s does not exist' % course_id, expected=True)
  158. unaccessible_videos = 0
  159. videos = []
  160. (username, _) = self._get_login_info()
  161. # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
  162. # by single video API anymore
  163. for chapter in course_json['Chapters']:
  164. for video in chapter['Videos']:
  165. if username is None and video['HasAccess'] is False:
  166. unaccessible_videos += 1
  167. continue
  168. videos.append(video['ID'])
  169. if unaccessible_videos > 0:
  170. self._downloader.report_warning('%s videos are only available for members and will not be downloaded. '
  171. % unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT)
  172. entries = [
  173. self.url_result('http://www.lynda.com/%s/%s-4.html' %
  174. (course_path, video_id),
  175. 'Lynda')
  176. for video_id in videos]
  177. course_title = course_json['Title']
  178. return self.playlist_result(entries, course_id, course_title)