You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
2.6 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. unified_strdate,
  6. )
  7. class KhanAcademyIE(InfoExtractor):
  8. _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
  9. IE_NAME = 'KhanAcademy'
  10. _TESTS = [{
  11. 'url': 'http://www.khanacademy.org/video/one-time-pad',
  12. 'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
  13. 'info_dict': {
  14. 'id': 'one-time-pad',
  15. 'ext': 'mp4',
  16. 'title': 'The one-time pad',
  17. 'description': 'The perfect cipher',
  18. 'duration': 176,
  19. 'uploader': 'Brit Cruise',
  20. 'upload_date': '20120411',
  21. }
  22. }, {
  23. 'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
  24. 'info_dict': {
  25. 'id': 'cryptography',
  26. 'title': 'Journey into cryptography',
  27. 'description': 'How have humans protected their secret messages through history? What has changed today?',
  28. },
  29. 'playlist_mincount': 3,
  30. }]
  31. def _real_extract(self, url):
  32. m = re.match(self._VALID_URL, url)
  33. video_id = m.group('id')
  34. if m.group('key') == 'video':
  35. data = self._download_json(
  36. 'http://api.khanacademy.org/api/v1/videos/' + video_id,
  37. video_id, 'Downloading video info')
  38. upload_date = unified_strdate(data['date_added'])
  39. uploader = ', '.join(data['author_names'])
  40. return {
  41. '_type': 'url_transparent',
  42. 'url': data['url'],
  43. 'id': video_id,
  44. 'title': data['title'],
  45. 'thumbnail': data['image_url'],
  46. 'duration': data['duration'],
  47. 'description': data['description'],
  48. 'uploader': uploader,
  49. 'upload_date': upload_date,
  50. }
  51. else:
  52. # topic
  53. data = self._download_json(
  54. 'http://api.khanacademy.org/api/v1/topic/' + video_id,
  55. video_id, 'Downloading topic info')
  56. entries = [
  57. {
  58. '_type': 'url',
  59. 'url': c['url'],
  60. 'id': c['id'],
  61. 'title': c['title'],
  62. }
  63. for c in data['children'] if c['kind'] in ('Video', 'Topic')]
  64. return {
  65. '_type': 'playlist',
  66. 'id': video_id,
  67. 'title': data['title'],
  68. 'description': data['description'],
  69. 'entries': entries,
  70. }