|
@ -1591,6 +1591,8 @@ class DailymotionIE(InfoExtractor): |
|
|
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) |
|
|
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id) |
|
|
|
|
|
|
|
|
def _real_extract(self, url): |
|
|
def _real_extract(self, url): |
|
|
|
|
|
htmlParser = HTMLParser.HTMLParser() |
|
|
|
|
|
|
|
|
# Extract id and simplified title from URL |
|
|
# Extract id and simplified title from URL |
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
mobj = re.match(self._VALID_URL, url) |
|
|
if mobj is None: |
|
|
if mobj is None: |
|
@ -1601,7 +1603,6 @@ class DailymotionIE(InfoExtractor): |
|
|
self._downloader.increment_downloads() |
|
|
self._downloader.increment_downloads() |
|
|
video_id = mobj.group(1) |
|
|
video_id = mobj.group(1) |
|
|
|
|
|
|
|
|
simple_title = mobj.group(2).decode('utf-8') |
|
|
|
|
|
video_extension = 'flv' |
|
|
video_extension = 'flv' |
|
|
|
|
|
|
|
|
# Retrieve video webpage to extract further information |
|
|
# Retrieve video webpage to extract further information |
|
@ -1631,12 +1632,13 @@ class DailymotionIE(InfoExtractor): |
|
|
|
|
|
|
|
|
video_url = mediaURL |
|
|
video_url = mediaURL |
|
|
|
|
|
|
|
|
mobj = re.search(r'(?im)<title>\s*(.+)\s*-\s*Video\s+Dailymotion</title>', webpage) |
|
|
|
|
|
|
|
|
mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage) |
|
|
if mobj is None: |
|
|
if mobj is None: |
|
|
self._downloader.trouble(u'ERROR: unable to extract title') |
|
|
self._downloader.trouble(u'ERROR: unable to extract title') |
|
|
return |
|
|
return |
|
|
video_title = mobj.group(1).decode('utf-8') |
|
|
|
|
|
|
|
|
video_title = htmlParser.unescape(mobj.group('title')).decode('utf-8') |
|
|
video_title = sanitize_title(video_title) |
|
|
video_title = sanitize_title(video_title) |
|
|
|
|
|
simple_title = _simplify_title(video_title) |
|
|
|
|
|
|
|
|
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage) |
|
|
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage) |
|
|
if mobj is None: |
|
|
if mobj is None: |
|
|