You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

70 lines
2.3 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. class AolIE(InfoExtractor):
  5. IE_NAME = 'on.aol.com'
  6. _VALID_URL = r'''(?x)
  7. (?:
  8. aol-video:|
  9. http://on\.aol\.com/
  10. (?:
  11. video/.*-|
  12. playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
  13. )
  14. )
  15. (?P<id>[0-9]+)
  16. (?:$|\?)
  17. '''
  18. _TESTS = [{
  19. 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
  20. 'md5': '18ef68f48740e86ae94b98da815eec42',
  21. 'info_dict': {
  22. 'id': '518167793',
  23. 'ext': 'mp4',
  24. 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
  25. },
  26. 'add_ie': ['FiveMin'],
  27. }, {
  28. 'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
  29. 'info_dict': {
  30. 'id': '152147',
  31. 'title': 'Brace Yourself - Today\'s Weirdest News',
  32. },
  33. 'playlist_mincount': 10,
  34. }]
  35. def _real_extract(self, url):
  36. mobj = re.match(self._VALID_URL, url)
  37. video_id = mobj.group('id')
  38. playlist_id = mobj.group('playlist_id')
  39. if not playlist_id or self._downloader.params.get('noplaylist'):
  40. return self.url_result('5min:%s' % video_id)
  41. self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  42. webpage = self._download_webpage(url, playlist_id)
  43. title = self._html_search_regex(
  44. r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
  45. playlist_html = self._search_regex(
  46. r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
  47. 'playlist HTML')
  48. entries = [{
  49. '_type': 'url',
  50. 'url': 'aol-video:%s' % m.group('id'),
  51. 'ie_key': 'Aol',
  52. } for m in re.finditer(
  53. r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
  54. playlist_html)]
  55. return {
  56. '_type': 'playlist',
  57. 'id': playlist_id,
  58. 'display_id': mobj.group('playlist_display_id'),
  59. 'title': title,
  60. 'entries': entries,
  61. }