You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

179 lines
6.0 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from .vimeo import VimeoIE
  5. from ..compat import compat_str
  6. from ..utils import (
  7. ExtractorError,
  8. int_or_none,
  9. merge_dicts,
  10. try_get,
  11. unescapeHTML,
  12. unified_timestamp,
  13. urljoin,
  14. )
  15. class RayWenderlichIE(InfoExtractor):
  16. _VALID_URL = r'''(?x)
  17. https?://
  18. (?:
  19. videos\.raywenderlich\.com/courses|
  20. (?:www\.)?raywenderlich\.com
  21. )/
  22. (?P<course_id>[^/]+)/lessons/(?P<id>\d+)
  23. '''
  24. _TESTS = [{
  25. 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
  26. 'info_dict': {
  27. 'id': '248377018',
  28. 'ext': 'mp4',
  29. 'title': 'Introduction',
  30. 'description': 'md5:804d031b3efa9fcb49777d512d74f722',
  31. 'timestamp': 1513906277,
  32. 'upload_date': '20171222',
  33. 'duration': 133,
  34. 'uploader': 'Ray Wenderlich',
  35. 'uploader_id': 'user3304672',
  36. },
  37. 'params': {
  38. 'noplaylist': True,
  39. 'skip_download': True,
  40. },
  41. 'add_ie': [VimeoIE.ie_key()],
  42. 'expected_warnings': ['HTTP Error 403: Forbidden'],
  43. }, {
  44. 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
  45. 'only_matching': True,
  46. }]
  47. @staticmethod
  48. def _extract_video_id(data, lesson_id):
  49. if not data:
  50. return
  51. groups = try_get(data, lambda x: x['groups'], list) or []
  52. if not groups:
  53. return
  54. for group in groups:
  55. if not isinstance(group, dict):
  56. continue
  57. contents = try_get(data, lambda x: x['contents'], list) or []
  58. for content in contents:
  59. if not isinstance(content, dict):
  60. continue
  61. ordinal = int_or_none(content.get('ordinal'))
  62. if ordinal != lesson_id:
  63. continue
  64. video_id = content.get('identifier')
  65. if video_id:
  66. return compat_str(video_id)
  67. def _real_extract(self, url):
  68. mobj = re.match(self._VALID_URL, url)
  69. course_id, lesson_id = mobj.group('course_id', 'id')
  70. display_id = '%s/%s' % (course_id, lesson_id)
  71. webpage = self._download_webpage(url, display_id)
  72. thumbnail = self._og_search_thumbnail(
  73. webpage, default=None) or self._html_search_meta(
  74. 'twitter:image', webpage, 'thumbnail')
  75. if '>Subscribe to unlock' in webpage:
  76. raise ExtractorError(
  77. 'This content is only available for subscribers',
  78. expected=True)
  79. info = {
  80. 'thumbnail': thumbnail,
  81. }
  82. vimeo_id = self._search_regex(
  83. r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
  84. if not vimeo_id:
  85. data = self._parse_json(
  86. self._search_regex(
  87. r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
  88. 'data collection', default='{}', group='data'),
  89. display_id, transform_source=unescapeHTML, fatal=False)
  90. video_id = self._extract_video_id(
  91. data, lesson_id) or self._search_regex(
  92. r'/videos/(\d+)/', thumbnail, 'video id')
  93. headers = {
  94. 'Referer': url,
  95. 'X-Requested-With': 'XMLHttpRequest',
  96. }
  97. csrf_token = self._html_search_meta(
  98. 'csrf-token', webpage, 'csrf token', default=None)
  99. if csrf_token:
  100. headers['X-CSRF-Token'] = csrf_token
  101. video = self._download_json(
  102. 'https://videos.raywenderlich.com/api/v1/videos/%s.json'
  103. % video_id, display_id, headers=headers)['video']
  104. vimeo_id = video['clips'][0]['provider_id']
  105. info.update({
  106. '_type': 'url_transparent',
  107. 'title': video.get('name'),
  108. 'description': video.get('description') or video.get(
  109. 'meta_description'),
  110. 'duration': int_or_none(video.get('duration')),
  111. 'timestamp': unified_timestamp(video.get('created_at')),
  112. })
  113. return merge_dicts(info, self.url_result(
  114. VimeoIE._smuggle_referrer(
  115. 'https://player.vimeo.com/video/%s' % vimeo_id, url),
  116. ie=VimeoIE.ie_key(), video_id=vimeo_id))
  117. class RayWenderlichCourseIE(InfoExtractor):
  118. _VALID_URL = r'''(?x)
  119. https?://
  120. (?:
  121. videos\.raywenderlich\.com/courses|
  122. (?:www\.)?raywenderlich\.com
  123. )/
  124. (?P<id>[^/]+)
  125. '''
  126. _TEST = {
  127. 'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
  128. 'info_dict': {
  129. 'title': 'Testing in iOS',
  130. 'id': '3530-testing-in-ios',
  131. },
  132. 'params': {
  133. 'noplaylist': False,
  134. },
  135. 'playlist_count': 29,
  136. }
  137. @classmethod
  138. def suitable(cls, url):
  139. return False if RayWenderlichIE.suitable(url) else super(
  140. RayWenderlichCourseIE, cls).suitable(url)
  141. def _real_extract(self, url):
  142. course_id = self._match_id(url)
  143. webpage = self._download_webpage(url, course_id)
  144. entries = []
  145. lesson_urls = set()
  146. for lesson_url in re.findall(
  147. r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
  148. if lesson_url in lesson_urls:
  149. continue
  150. lesson_urls.add(lesson_url)
  151. entries.append(self.url_result(
  152. urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
  153. title = self._og_search_title(
  154. webpage, default=None) or self._html_search_meta(
  155. 'twitter:title', webpage, 'title', default=None)
  156. return self.playlist_result(entries, course_id, title)