You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

36 lines
1.1 KiB

  1. import datetime
  2. import json
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. remove_start,
  7. )
  8. class AcademicEarthCourseIE(InfoExtractor):
  9. _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/courses/(?P<id>[^?#/]+)'
  10. IE_NAME = u'AcademicEarth:Course'
  11. def _real_extract(self, url):
  12. m = re.match(self._VALID_URL, url)
  13. playlist_id = m.group('id')
  14. webpage = self._download_webpage(url, playlist_id)
  15. title = self._html_search_regex(
  16. r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
  17. description = self._html_search_regex(
  18. r'<p class="excerpt">(.*?)</p>',
  19. webpage, u'description', fatal=False)
  20. urls = re.findall(
  21. r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
  22. webpage)
  23. entries = [self.url_result(u) for u in urls]
  24. return {
  25. '_type': 'playlist',
  26. 'id': playlist_id,
  27. 'title': title,
  28. 'description': description,
  29. 'entries': entries,
  30. }