|
@ -637,70 +637,77 @@ class GenericIE(InfoExtractor): |
|
|
return self.url_result(smotri_url, 'Smotri') |
|
|
return self.url_result(smotri_url, 'Smotri') |
|
|
|
|
|
|
|
|
# Start with something easy: JW Player in SWFObject |
|
|
# Start with something easy: JW Player in SWFObject |
|
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) |
|
|
|
|
|
if mobj is None: |
|
|
|
|
|
|
|
|
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) |
|
|
|
|
|
if not found: |
|
|
# Look for gorilla-vid style embedding |
|
|
# Look for gorilla-vid style embedding |
|
|
mobj = re.search(r'''(?sx) |
|
|
|
|
|
|
|
|
found = re.findall(r'''(?sx) |
|
|
(?: |
|
|
(?: |
|
|
jw_plugins| |
|
|
jw_plugins| |
|
|
JWPlayerOptions| |
|
|
JWPlayerOptions| |
|
|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup |
|
|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup |
|
|
) |
|
|
) |
|
|
.*?file\s*:\s*["\'](.*?)["\']''', webpage) |
|
|
.*?file\s*:\s*["\'](.*?)["\']''', webpage) |
|
|
if mobj is None: |
|
|
|
|
|
|
|
|
if not found: |
|
|
# Broaden the search a little bit |
|
|
# Broaden the search a little bit |
|
|
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) |
|
|
|
|
|
if mobj is None: |
|
|
|
|
|
# Broaden the search a little bit: JWPlayer JS loader |
|
|
|
|
|
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) |
|
|
|
|
|
|
|
|
|
|
|
if mobj is None: |
|
|
|
|
|
|
|
|
found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) |
|
|
|
|
|
if not found: |
|
|
|
|
|
# Broaden the findall a little bit: JWPlayer JS loader |
|
|
|
|
|
found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) |
|
|
|
|
|
if not found: |
|
|
# Try to find twitter cards info |
|
|
# Try to find twitter cards info |
|
|
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) |
|
|
|
|
|
if mobj is None: |
|
|
|
|
|
|
|
|
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage) |
|
|
|
|
|
if not found: |
|
|
# We look for Open Graph info: |
|
|
# We look for Open Graph info: |
|
|
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) |
|
|
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am) |
|
|
m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) |
|
|
|
|
|
|
|
|
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) |
|
|
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player: |
|
|
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player: |
|
|
if m_video_type is not None: |
|
|
if m_video_type is not None: |
|
|
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) |
|
|
|
|
|
if mobj is None: |
|
|
|
|
|
|
|
|
found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) |
|
|
|
|
|
if not found: |
|
|
# HTML5 video |
|
|
# HTML5 video |
|
|
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) |
|
|
|
|
|
if mobj is None: |
|
|
|
|
|
mobj = re.search( |
|
|
|
|
|
|
|
|
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage) |
|
|
|
|
|
if not found: |
|
|
|
|
|
found = re.findall( |
|
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' |
|
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' |
|
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', |
|
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"', |
|
|
webpage) |
|
|
webpage) |
|
|
if mobj: |
|
|
|
|
|
new_url = mobj.group(1) |
|
|
|
|
|
|
|
|
if found: |
|
|
|
|
|
new_url = found.group(1) |
|
|
self.report_following_redirect(new_url) |
|
|
self.report_following_redirect(new_url) |
|
|
return { |
|
|
return { |
|
|
'_type': 'url', |
|
|
'_type': 'url', |
|
|
'url': new_url, |
|
|
'url': new_url, |
|
|
} |
|
|
} |
|
|
if mobj is None: |
|
|
|
|
|
|
|
|
if not found: |
|
|
raise ExtractorError('Unsupported URL: %s' % url) |
|
|
raise ExtractorError('Unsupported URL: %s' % url) |
|
|
|
|
|
|
|
|
# It's possible that one of the regexes |
|
|
|
|
|
# matched, but returned an empty group: |
|
|
|
|
|
if mobj.group(1) is None: |
|
|
|
|
|
raise ExtractorError('Did not find a valid video URL at %s' % url) |
|
|
|
|
|
|
|
|
entries = [] |
|
|
|
|
|
for video_url in found: |
|
|
|
|
|
video_url = compat_urlparse.urljoin(url, video_url) |
|
|
|
|
|
video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) |
|
|
|
|
|
|
|
|
video_url = mobj.group(1) |
|
|
|
|
|
video_url = compat_urlparse.urljoin(url, video_url) |
|
|
|
|
|
video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) |
|
|
|
|
|
|
|
|
# Sometimes, jwplayer extraction will result in a YouTube URL |
|
|
|
|
|
if YoutubeIE.suitable(video_url): |
|
|
|
|
|
entries.append(self.url_result(video_url, 'Youtube')) |
|
|
|
|
|
continue |
|
|
|
|
|
|
|
|
# Sometimes, jwplayer extraction will result in a YouTube URL |
|
|
|
|
|
if YoutubeIE.suitable(video_url): |
|
|
|
|
|
return self.url_result(video_url, 'Youtube') |
|
|
|
|
|
|
|
|
# here's a fun little line of code for you: |
|
|
|
|
|
video_id = os.path.splitext(video_id)[0] |
|
|
|
|
|
|
|
|
# here's a fun little line of code for you: |
|
|
|
|
|
video_id = os.path.splitext(video_id)[0] |
|
|
|
|
|
|
|
|
entries.append({ |
|
|
|
|
|
'id': video_id, |
|
|
|
|
|
'url': video_url, |
|
|
|
|
|
'uploader': video_uploader, |
|
|
|
|
|
'title': video_title, |
|
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
if len(entries) == 1: |
|
|
|
|
|
return entries[1] |
|
|
|
|
|
else: |
|
|
|
|
|
for num, e in enumerate(entries, start=1): |
|
|
|
|
|
e['title'] = '%s (%d)' % (e['title'], num) |
|
|
|
|
|
return { |
|
|
|
|
|
'_type': 'playlist', |
|
|
|
|
|
'entries': entries, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
return { |
|
|
|
|
|
'id': video_id, |
|
|
|
|
|
'url': video_url, |
|
|
|
|
|
'uploader': video_uploader, |
|
|
|
|
|
'title': video_title, |
|
|
|
|
|
} |
|
|
|