You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.4 KiB

  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from .fivemin import FiveMinIE
  5. class AolIE(InfoExtractor):
  6. IE_NAME = 'on.aol.com'
  7. _VALID_URL = r'''(?x)
  8. (?:
  9. aol-video:|
  10. http://on\.aol\.com/
  11. (?:
  12. video/.*-|
  13. playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
  14. )
  15. )
  16. (?P<id>[0-9]+)
  17. (?:$|\?)
  18. '''
  19. _TESTS = [{
  20. 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
  21. 'md5': '18ef68f48740e86ae94b98da815eec42',
  22. 'info_dict': {
  23. 'id': '518167793',
  24. 'ext': 'mp4',
  25. 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
  26. },
  27. 'add_ie': ['FiveMin'],
  28. }, {
  29. 'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
  30. 'info_dict': {
  31. 'id': '152147',
  32. 'title': 'Brace Yourself - Today\'s Weirdest News',
  33. },
  34. 'playlist_mincount': 10,
  35. }]
  36. def _real_extract(self, url):
  37. mobj = re.match(self._VALID_URL, url)
  38. video_id = mobj.group('id')
  39. playlist_id = mobj.group('playlist_id')
  40. if playlist_id and not self._downloader.params.get('noplaylist'):
  41. self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  42. webpage = self._download_webpage(url, playlist_id)
  43. title = self._html_search_regex(
  44. r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
  45. playlist_html = self._search_regex(
  46. r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
  47. 'playlist HTML')
  48. entries = [{
  49. '_type': 'url',
  50. 'url': 'aol-video:%s' % m.group('id'),
  51. 'ie_key': 'Aol',
  52. } for m in re.finditer(
  53. r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
  54. playlist_html)]
  55. return {
  56. '_type': 'playlist',
  57. 'id': playlist_id,
  58. 'display_id': mobj.group('playlist_display_id'),
  59. 'title': title,
  60. 'entries': entries,
  61. }
  62. return FiveMinIE._build_result(video_id)