|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals |
|
|
|
|
|
|
|
import os |
|
|
|
import re |
|
|
|
import xml.etree.ElementTree |
|
|
|
|
|
|
|
from .common import InfoExtractor |
|
|
|
from .youtube import YoutubeIE |
|
|
@ -159,6 +160,25 @@ class GenericIE(InfoExtractor): |
|
|
|
raise ExtractorError('Invalid URL protocol') |
|
|
|
return response |
|
|
|
|
|
|
|
def _extract_rss(self, url, video_id, doc): |
|
|
|
playlist_title = doc.find('./channel/title').text |
|
|
|
playlist_desc_el = doc.find('./channel/description') |
|
|
|
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text |
|
|
|
|
|
|
|
entries = [{ |
|
|
|
'_type': 'url', |
|
|
|
'url': e.find('link').text, |
|
|
|
'title': e.find('title').text, |
|
|
|
} for e in doc.findall('./channel/item')] |
|
|
|
|
|
|
|
return { |
|
|
|
'_type': 'playlist', |
|
|
|
'id': url, |
|
|
|
'title': playlist_title, |
|
|
|
'description': playlist_desc, |
|
|
|
'entries': entries, |
|
|
|
} |
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
|
parsed_url = compat_urlparse.urlparse(url) |
|
|
|
if not parsed_url.scheme: |
|
|
@ -219,6 +239,14 @@ class GenericIE(InfoExtractor): |
|
|
|
|
|
|
|
self.report_extraction(video_id) |
|
|
|
|
|
|
|
# Is it an RSS feed? |
|
|
|
try: |
|
|
|
doc = xml.etree.ElementTree.fromstring(webpage) |
|
|
|
if doc.tag == 'rss': |
|
|
|
return self._extract_rss(url, video_id, doc) |
|
|
|
except xml.etree.ElementTree.ParseError: |
|
|
|
pass |
|
|
|
|
|
|
|
# it's tempting to parse this further, but you would |
|
|
|
# have to take into account all the variations like |
|
|
|
# Video Title - Site Name |
|
|
|