You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
1.0 KiB

  1. import re
  2. from .common import InfoExtractor
  3. class AcademicEarthCourseIE(InfoExtractor):
  4. _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
  5. IE_NAME = u'AcademicEarth:Course'
  6. def _real_extract(self, url):
  7. m = re.match(self._VALID_URL, url)
  8. playlist_id = m.group('id')
  9. webpage = self._download_webpage(url, playlist_id)
  10. title = self._html_search_regex(
  11. r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
  12. description = self._html_search_regex(
  13. r'<p class="excerpt">(.*?)</p>',
  14. webpage, u'description', fatal=False)
  15. urls = re.findall(
  16. r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
  17. webpage)
  18. entries = [self.url_result(u) for u in urls]
  19. return {
  20. '_type': 'playlist',
  21. 'id': playlist_id,
  22. 'title': title,
  23. 'description': description,
  24. 'entries': entries,
  25. }