Conflicts: youtube_dl/extractor/mlb.pytotalwebcasting
@ -0,0 +1 @@ | |||
*.swf |
@ -0,0 +1,19 @@ | |||
// input: [["a", "b", "c", "d"]] | |||
// output: ["c", "b", "a", "d"] | |||
package { | |||
public class ArrayAccess { | |||
public static function main(ar:Array):Array { | |||
var aa:ArrayAccess = new ArrayAccess(); | |||
return aa.f(ar, 2); | |||
} | |||
private function f(ar:Array, num:Number):Array{ | |||
var x:String = ar[0]; | |||
var y:String = ar[num % ar.length]; | |||
ar[0] = y; | |||
ar[num] = x; | |||
return ar; | |||
} | |||
} | |||
} |
@ -0,0 +1,17 @@ | |||
// input: [] | |||
// output: 121 | |||
package { | |||
public class ClassCall { | |||
public static function main():int{ | |||
var f:OtherClass = new OtherClass(); | |||
return f.func(100,20); | |||
} | |||
} | |||
} | |||
class OtherClass { | |||
public function func(x: int, y: int):int { | |||
return x+y+1; | |||
} | |||
} |
@ -0,0 +1,15 @@ | |||
// input: [] | |||
// output: 0 | |||
package { | |||
public class ClassConstruction { | |||
public static function main():int{ | |||
var f:Foo = new Foo(); | |||
return 0; | |||
} | |||
} | |||
} | |||
class Foo { | |||
} |
@ -0,0 +1,13 @@ | |||
// input: [1, 2] | |||
// output: 3 | |||
package { | |||
public class LocalVars { | |||
public static function main(a:int, b:int):int{ | |||
var c:int = a + b + b; | |||
var d:int = c - b; | |||
var e:int = d; | |||
return e; | |||
} | |||
} | |||
} |
@ -0,0 +1,21 @@ | |||
// input: [] | |||
// output: 9 | |||
package { | |||
public class PrivateCall { | |||
public static function main():int{ | |||
var f:OtherClass = new OtherClass(); | |||
return f.func(); | |||
} | |||
} | |||
} | |||
class OtherClass { | |||
private function pf():int { | |||
return 9; | |||
} | |||
public function func():int { | |||
return this.pf(); | |||
} | |||
} |
@ -0,0 +1,13 @@ | |||
// input: [1] | |||
// output: 1 | |||
package { | |||
public class StaticAssignment { | |||
public static var v:int; | |||
public static function main(a:int):int{ | |||
v = a; | |||
return v; | |||
} | |||
} | |||
} |
@ -0,0 +1,16 @@ | |||
// input: [] | |||
// output: 1 | |||
package { | |||
public class StaticRetrieval { | |||
public static var v:int; | |||
public static function main():int{ | |||
if (v) { | |||
return 0; | |||
} else { | |||
return 1; | |||
} | |||
} | |||
} | |||
} |
@ -0,0 +1,77 @@ | |||
#!/usr/bin/env python | |||
# Allow direct execution | |||
import os | |||
import sys | |||
import unittest | |||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||
import errno | |||
import io | |||
import json | |||
import re | |||
import subprocess | |||
from youtube_dl.swfinterp import SWFInterpreter | |||
TEST_DIR = os.path.join( | |||
os.path.dirname(os.path.abspath(__file__)), 'swftests') | |||
class TestSWFInterpreter(unittest.TestCase): | |||
pass | |||
def _make_testfunc(testfile): | |||
m = re.match(r'^(.*)\.(as)$', testfile) | |||
if not m: | |||
return | |||
test_id = m.group(1) | |||
def test_func(self): | |||
as_file = os.path.join(TEST_DIR, testfile) | |||
swf_file = os.path.join(TEST_DIR, test_id + '.swf') | |||
if ((not os.path.exists(swf_file)) | |||
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | |||
# Recompile | |||
try: | |||
subprocess.check_call(['mxmlc', '-output', swf_file, as_file]) | |||
except OSError as ose: | |||
if ose.errno == errno.ENOENT: | |||
print('mxmlc not found! Skipping test.') | |||
return | |||
raise | |||
with open(swf_file, 'rb') as swf_f: | |||
swf_content = swf_f.read() | |||
swfi = SWFInterpreter(swf_content) | |||
with io.open(as_file, 'r', encoding='utf-8') as as_f: | |||
as_content = as_f.read() | |||
def _find_spec(key): | |||
m = re.search( | |||
r'(?m)^//\s*%s:\s*(.*?)\n' % re.escape(key), as_content) | |||
if not m: | |||
raise ValueError('Cannot find %s in %s' % (key, testfile)) | |||
return json.loads(m.group(1)) | |||
input_args = _find_spec('input') | |||
output = _find_spec('output') | |||
swf_class = swfi.extract_class(test_id) | |||
func = swfi.extract_function(swf_class, 'main') | |||
res = func(input_args) | |||
self.assertEqual(res, output) | |||
test_func.__name__ = str('test_swf_' + test_id) | |||
setattr(TestSWFInterpreter, test_func.__name__, test_func) | |||
for testfile in os.listdir(TEST_DIR): | |||
_make_testfunc(testfile) | |||
if __name__ == '__main__': | |||
unittest.main() |
@ -0,0 +1,139 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
class AdultSwimIE(InfoExtractor): | |||
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' | |||
_TEST = { | |||
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', | |||
'playlist': [ | |||
{ | |||
'md5': '4da359ec73b58df4575cd01a610ba5dc', | |||
'info_dict': { | |||
'id': '8a250ba1450996e901453d7f02ca02f5', | |||
'ext': 'flv', | |||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1', | |||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | |||
'uploader': 'Rick and Morty', | |||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | |||
} | |||
}, | |||
{ | |||
'md5': 'ffbdf55af9331c509d95350bd0cc1819', | |||
'info_dict': { | |||
'id': '8a250ba1450996e901453d7f4bd102f6', | |||
'ext': 'flv', | |||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2', | |||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | |||
'uploader': 'Rick and Morty', | |||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | |||
} | |||
}, | |||
{ | |||
'md5': 'b92409635540304280b4b6c36bd14a0a', | |||
'info_dict': { | |||
'id': '8a250ba1450996e901453d7fa73c02f7', | |||
'ext': 'flv', | |||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3', | |||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | |||
'uploader': 'Rick and Morty', | |||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | |||
} | |||
}, | |||
{ | |||
'md5': 'e8818891d60e47b29cd89d7b0278156d', | |||
'info_dict': { | |||
'id': '8a250ba1450996e901453d7fc8ba02f8', | |||
'ext': 'flv', | |||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4', | |||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | |||
'uploader': 'Rick and Morty', | |||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | |||
} | |||
} | |||
] | |||
} | |||
_video_extensions = { | |||
'3500': 'flv', | |||
'640': 'mp4', | |||
'150': 'mp4', | |||
'ipad': 'm3u8', | |||
'iphone': 'm3u8' | |||
} | |||
_video_dimensions = { | |||
'3500': (1280, 720), | |||
'640': (480, 270), | |||
'150': (320, 180) | |||
} | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_path = mobj.group('path') | |||
webpage = self._download_webpage(url, video_path) | |||
episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id') | |||
title = self._og_search_title(webpage) | |||
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id | |||
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') | |||
episode_el = idoc.find('.//episode') | |||
show_title = episode_el.attrib.get('collectionTitle') | |||
episode_title = episode_el.attrib.get('title') | |||
thumbnail = episode_el.attrib.get('thumbnailUrl') | |||
description = episode_el.find('./description').text.strip() | |||
entries = [] | |||
segment_els = episode_el.findall('./segments/segment') | |||
for part_num, segment_el in enumerate(segment_els): | |||
segment_id = segment_el.attrib.get('id') | |||
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1) | |||
thumbnail = segment_el.attrib.get('thumbnailUrl') | |||
duration = segment_el.attrib.get('duration') | |||
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id | |||
idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information') | |||
formats = [] | |||
file_els = idoc.findall('.//files/file') | |||
for file_el in file_els: | |||
bitrate = file_el.attrib.get('bitrate') | |||
type = file_el.attrib.get('type') | |||
width, height = self._video_dimensions.get(bitrate, (None, None)) | |||
formats.append({ | |||
'format_id': '%s-%s' % (bitrate, type), | |||
'url': file_el.text, | |||
'ext': self._video_extensions.get(bitrate, 'mp4'), | |||
# The bitrate may not be a number (for example: 'iphone') | |||
'tbr': int(bitrate) if bitrate.isdigit() else None, | |||
'height': height, | |||
'width': width | |||
}) | |||
self._sort_formats(formats) | |||
entries.append({ | |||
'id': segment_id, | |||
'title': segment_title, | |||
'formats': formats, | |||
'uploader': show_title, | |||
'thumbnail': thumbnail, | |||
'duration': duration, | |||
'description': description | |||
}) | |||
return { | |||
'_type': 'playlist', | |||
'id': episode_id, | |||
'display_id': video_path, | |||
'entries': entries, | |||
'title': '%s %s' % (show_title, episode_title), | |||
'description': description, | |||
'thumbnail': thumbnail | |||
} |
@ -0,0 +1,65 @@ | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..utils import ( | |||
parse_iso8601, | |||
str_to_int, | |||
) | |||
class CrackedIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html' | |||
_TEST = { | |||
'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html', | |||
'md5': '4b29a5eeec292cd5eca6388c7558db9e', | |||
'info_dict': { | |||
'id': '19006', | |||
'ext': 'mp4', | |||
'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies', | |||
'description': 'md5:3b909e752661db86007d10e5ec2df769', | |||
'timestamp': 1405659600, | |||
'upload_date': '20140718', | |||
} | |||
} | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_id = mobj.group('id') | |||
webpage = self._download_webpage(url, video_id) | |||
video_url = self._html_search_regex( | |||
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL') | |||
title = self._og_search_title(webpage) | |||
description = self._og_search_description(webpage) | |||
timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False) | |||
if timestamp: | |||
timestamp = parse_iso8601(timestamp[:-6]) | |||
view_count = str_to_int(self._html_search_regex( | |||
r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False)) | |||
comment_count = str_to_int(self._html_search_regex( | |||
r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False)) | |||
m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url) | |||
if m: | |||
width = int(m.group('width')) | |||
height = int(m.group('height')) | |||
else: | |||
width = height = None | |||
return { | |||
'id': video_id, | |||
'url':video_url, | |||
'title': title, | |||
'description': description, | |||
'timestamp': timestamp, | |||
'view_count': view_count, | |||
'comment_count': comment_count, | |||
'height': height, | |||
'width': width, | |||
} |
@ -0,0 +1,44 @@ | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
class DFBIE(InfoExtractor): | |||
IE_NAME = 'tv.dfb.de' | |||
_VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)' | |||
_TEST = { | |||
'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/', | |||
# The md5 is different each time | |||
'info_dict': { | |||
'id': '9070', | |||
'ext': 'flv', | |||
'title': 'Highlights des Empfangs in Berlin', | |||
'upload_date': '20140716', | |||
}, | |||
} | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_id = mobj.group('id') | |||
webpage = self._download_webpage(url, video_id) | |||
player_info = self._download_xml( | |||
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, | |||
video_id) | |||
video_info = player_info.find('video') | |||
f4m_info = self._download_xml(video_info.find('url').text, video_id) | |||
token_el = f4m_info.find('token') | |||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' | |||
return { | |||
'id': video_id, | |||
'title': video_info.find('title').text, | |||
'url': manifest_url, | |||
'ext': 'flv', | |||
'thumbnail': self._og_search_thumbnail(webpage), | |||
'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]), | |||
} |
@ -0,0 +1,119 @@ | |||
# encoding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..utils import ( | |||
parse_duration, | |||
unified_strdate, | |||
) | |||
class SapoIE(InfoExtractor): | |||
IE_DESC = 'SAPO Vídeos' | |||
_VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})' | |||
_TESTS = [ | |||
{ | |||
'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', | |||
'md5': '79ee523f6ecb9233ac25075dee0eda83', | |||
'note': 'SD video', | |||
'info_dict': { | |||
'id': 'UBz95kOtiWYUMTA5Ghfi', | |||
'ext': 'mp4', | |||
'title': 'Benfica - Marcas na Hitória', | |||
'description': 'md5:c9082000a128c3fd57bf0299e1367f22', | |||
'duration': 264, | |||
'uploader': 'tiago_1988', | |||
'upload_date': '20080229', | |||
'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], | |||
}, | |||
}, | |||
{ | |||
'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', | |||
'md5': '90a2f283cfb49193fe06e861613a72aa', | |||
'note': 'HD video', | |||
'info_dict': { | |||
'id': 'IyusNAZ791ZdoCY5H5IF', | |||
'ext': 'mp4', | |||
'title': 'Codebits VII - Report', | |||
'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', | |||
'duration': 144, | |||
'uploader': 'codebits', | |||
'upload_date': '20140427', | |||
'categories': ['codebits', 'codebits2014'], | |||
}, | |||
}, | |||
{ | |||
'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', | |||
'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', | |||
'note': 'v2 video', | |||
'info_dict': { | |||
'id': 'yLqjzPtbTimsn2wWBKHz', | |||
'ext': 'mp4', | |||
'title': 'Hipnose Condicionativa 4', | |||
'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', | |||
'duration': 692, | |||
'uploader': 'sapozen', | |||
'upload_date': '20090609', | |||
'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], | |||
}, | |||
}, | |||
] | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_id = mobj.group('id') | |||
item = self._download_xml( | |||
'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') | |||
title = item.find('./title').text | |||
description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text | |||
thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') | |||
duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) | |||
uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text | |||
upload_date = unified_strdate(item.find('./pubDate').text) | |||
view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) | |||
comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) | |||
tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text | |||
categories = tags.split() if tags else [] | |||
age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 | |||
video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text | |||
video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') | |||
formats = [{ | |||
'url': video_url, | |||
'ext': 'mp4', | |||
'format_id': 'sd', | |||
'width': int(video_size[0]), | |||
'height': int(video_size[1]), | |||
}] | |||
if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': | |||
formats.append({ | |||
'url': re.sub(r'/mov/1$', '/mov/39', video_url), | |||
'ext': 'mp4', | |||
'format_id': 'hd', | |||
'width': 1280, | |||
'height': 720, | |||
}) | |||
self._sort_formats(formats) | |||
return { | |||
'id': video_id, | |||
'title': title, | |||
'description': description, | |||
'thumbnail': thumbnail, | |||
'duration': duration, | |||
'uploader': uploader, | |||
'upload_date': upload_date, | |||
'view_count': view_count, | |||
'comment_count': comment_count, | |||
'categories': categories, | |||
'age_limit': age_limit, | |||
'formats': formats, | |||
} |
@ -0,0 +1,68 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
import re | |||
from .common import InfoExtractor | |||
from ..utils import ( | |||
float_or_none, | |||
str_to_int, | |||
parse_duration, | |||
) | |||
class SnotrIE(InfoExtractor): | |||
_VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)' | |||
_TESTS = [{ | |||
'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', | |||
'info_dict': { | |||
'id': '13708', | |||
'ext': 'flv', | |||
'title': 'Drone flying through fireworks!', | |||
'duration': 247, | |||
'filesize_approx': 98566144, | |||
'description': 'A drone flying through Fourth of July Fireworks', | |||
} | |||
}, { | |||
'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', | |||
'info_dict': { | |||
'id': '530', | |||
'ext': 'flv', | |||
'title': 'David Letteman - George W. Bush Top 10', | |||
'duration': 126, | |||
'filesize_approx': 8912896, | |||
'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', | |||
} | |||
}] | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_id = mobj.group('id') | |||
webpage = self._download_webpage(url, video_id) | |||
title = self._og_search_title(webpage) | |||
description = self._og_search_description(webpage) | |||
video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id | |||
view_count = str_to_int(self._html_search_regex( | |||
r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>', | |||
webpage, 'view count', fatal=False)) | |||
duration = parse_duration(self._html_search_regex( | |||
r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>', | |||
webpage, 'duration', fatal=False)) | |||
filesize_approx = float_or_none(self._html_search_regex( | |||
r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>', | |||
webpage, 'filesize', fatal=False), invscale=1024 * 1024) | |||
return { | |||
'id': video_id, | |||
'description': description, | |||
'title': title, | |||
'url': video_url, | |||
'view_count': view_count, | |||
'duration': duration, | |||
'filesize_approx': filesize_approx, | |||
} |
@ -0,0 +1,78 @@ | |||
# coding: utf-8 | |||
from __future__ import unicode_literals | |||
from ..utils import ( | |||
ExtractorError, | |||
compat_urllib_parse, | |||
compat_urllib_request, | |||
) | |||
import re | |||
from .common import InfoExtractor | |||
class SockshareIE(InfoExtractor): | |||
_VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)' | |||
_FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>' | |||
_TEST = { | |||
'url': 'http://www.sockshare.com/file/437BE28B89D799D7', | |||
'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd', | |||
'info_dict': { | |||
'id': '437BE28B89D799D7', | |||
'title': 'big_buck_bunny_720p_surround.avi', | |||
'ext': 'avi', | |||
'thumbnail': 're:^http://.*\.jpg$', | |||
} | |||
} | |||
def _real_extract(self, url): | |||
mobj = re.match(self._VALID_URL, url) | |||
video_id = mobj.group('id') | |||
url = 'http://sockshare.com/file/%s' % video_id | |||
webpage = self._download_webpage(url, video_id) | |||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None: | |||
raise ExtractorError('Video %s does not exist' % video_id, | |||
expected=True) | |||
confirm_hash = self._html_search_regex(r'''(?x)<input\s+ | |||
type="hidden"\s+ | |||
value="([^"]*)"\s+ | |||
name="hash" | |||
''', webpage, 'hash') | |||
fields = { | |||
"hash": confirm_hash, | |||
"confirm": "Continue as Free User" | |||
} | |||
post = compat_urllib_parse.urlencode(fields) | |||
req = compat_urllib_request.Request(url, post) | |||
# Apparently, this header is required for confirmation to work. | |||
req.add_header('Host', 'www.sockshare.com') | |||
req.add_header('Content-type', 'application/x-www-form-urlencoded') | |||
webpage = self._download_webpage( | |||
req, video_id, 'Downloading video page') | |||
video_url = self._html_search_regex( | |||
r'<a href="([^"]*)".+class="download_file_link"', | |||
webpage, 'file url') | |||
video_url = "http://www.sockshare.com" + video_url | |||
title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title') | |||
thumbnail = self._html_search_regex( | |||
r'<img\s+src="([^"]*)".+?name="bg"', | |||
webpage, 'thumbnail') | |||
formats = [{ | |||
'format_id': 'sd', | |||
'url': video_url, | |||
}] | |||
return { | |||
'id': video_id, | |||
'title': title, | |||
'thumbnail': thumbnail, | |||
'formats': formats, | |||
} |
@ -0,0 +1,609 @@ | |||
from __future__ import unicode_literals | |||
import collections | |||
import io | |||
import zlib | |||
from .utils import ( | |||
compat_str, | |||
ExtractorError, | |||
struct_unpack, | |||
) | |||
def _extract_tags(file_contents): | |||
if file_contents[1:3] != b'WS': | |||
raise ExtractorError( | |||
'Not an SWF file; header is %r' % file_contents[:3]) | |||
if file_contents[:1] == b'C': | |||
content = zlib.decompress(file_contents[8:]) | |||
else: | |||
raise NotImplementedError( | |||
'Unsupported compression format %r' % | |||
file_contents[:1]) | |||
# Determine number of bits in framesize rectangle | |||
framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 | |||
framesize_len = (5 + 4 * framesize_nbits + 7) // 8 | |||
pos = framesize_len + 2 + 2 | |||
while pos < len(content): | |||
header16 = struct_unpack('<H', content[pos:pos + 2])[0] | |||
pos += 2 | |||
tag_code = header16 >> 6 | |||
tag_len = header16 & 0x3f | |||
if tag_len == 0x3f: | |||
tag_len = struct_unpack('<I', content[pos:pos + 4])[0] | |||
pos += 4 | |||
assert pos + tag_len <= len(content), \ | |||
('Tag %d ends at %d+%d - that\'s longer than the file (%d)' | |||
% (tag_code, pos, tag_len, len(content))) | |||
yield (tag_code, content[pos:pos + tag_len]) | |||
pos += tag_len | |||
class _AVMClass_Object(object): | |||
def __init__(self, avm_class): | |||
self.avm_class = avm_class | |||
def __repr__(self): | |||
return '%s#%x' % (self.avm_class.name, id(self)) | |||
class _ScopeDict(dict): | |||
def __init__(self, avm_class): | |||
super(_ScopeDict, self).__init__() | |||
self.avm_class = avm_class | |||
def __repr__(self): | |||
return '%s__Scope(%s)' % ( | |||
self.avm_class.name, | |||
super(_ScopeDict, self).__repr__()) | |||
class _AVMClass(object): | |||
def __init__(self, name_idx, name): | |||
self.name_idx = name_idx | |||
self.name = name | |||
self.method_names = {} | |||
self.method_idxs = {} | |||
self.methods = {} | |||
self.method_pyfunctions = {} | |||
self.variables = _ScopeDict(self) | |||
def make_object(self): | |||
return _AVMClass_Object(self) | |||
def __repr__(self): | |||
return '_AVMClass(%s)' % (self.name) | |||
def register_methods(self, methods): | |||
self.method_names.update(methods.items()) | |||
self.method_idxs.update(dict( | |||
(idx, name) | |||
for name, idx in methods.items())) | |||
class _Multiname(object): | |||
def __init__(self, kind): | |||
self.kind = kind | |||
def __repr__(self): | |||
return '[MULTINAME kind: 0x%x]' % self.kind | |||
def _read_int(reader): | |||
res = 0 | |||
shift = 0 | |||
for _ in range(5): | |||
buf = reader.read(1) | |||
assert len(buf) == 1 | |||
b = struct_unpack('<B', buf)[0] | |||
res = res | ((b & 0x7f) << shift) | |||
if b & 0x80 == 0: | |||
break | |||
shift += 7 | |||
return res | |||
def _u30(reader): | |||
res = _read_int(reader) | |||
assert res & 0xf0000000 == 0 | |||
return res | |||
_u32 = _read_int | |||
def _s32(reader): | |||
v = _read_int(reader) | |||
if v & 0x80000000 != 0: | |||
v = - ((v ^ 0xffffffff) + 1) | |||
return v | |||
def _s24(reader): | |||
bs = reader.read(3) | |||
assert len(bs) == 3 | |||
last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' | |||
return struct_unpack('<i', bs + last_byte)[0] | |||
def _read_string(reader): | |||
slen = _u30(reader) | |||
resb = reader.read(slen) | |||
assert len(resb) == slen | |||
return resb.decode('utf-8') | |||
def _read_bytes(count, reader): | |||
assert count >= 0 | |||
resb = reader.read(count) | |||
assert len(resb) == count | |||
return resb | |||
def _read_byte(reader): | |||
resb = _read_bytes(1, reader=reader) | |||
res = struct_unpack('<B', resb)[0] | |||
return res | |||
class SWFInterpreter(object): | |||
def __init__(self, file_contents): | |||
code_tag = next(tag | |||
for tag_code, tag in _extract_tags(file_contents) | |||
if tag_code == 82) | |||
p = code_tag.index(b'\0', 4) + 1 | |||
code_reader = io.BytesIO(code_tag[p:]) | |||
# Parse ABC (AVM2 ByteCode) | |||
# Define a couple convenience methods | |||
u30 = lambda *args: _u30(*args, reader=code_reader) | |||
s32 = lambda *args: _s32(*args, reader=code_reader) | |||
u32 = lambda *args: _u32(*args, reader=code_reader) | |||
read_bytes = lambda *args: _read_bytes(*args, reader=code_reader) | |||
read_byte = lambda *args: _read_byte(*args, reader=code_reader) | |||
# minor_version + major_version | |||
read_bytes(2 + 2) | |||
# Constant pool | |||
int_count = u30() | |||
for _c in range(1, int_count): | |||
s32() | |||
uint_count = u30() | |||
for _c in range(1, uint_count): | |||
u32() | |||
double_count = u30() | |||
read_bytes(max(0, (double_count - 1)) * 8) | |||
string_count = u30() | |||
self.constant_strings = [''] | |||
for _c in range(1, string_count): | |||
s = _read_string(code_reader) | |||
self.constant_strings.append(s) | |||
namespace_count = u30() | |||
for _c in range(1, namespace_count): | |||
read_bytes(1) # kind | |||
u30() # name | |||
ns_set_count = u30() | |||
for _c in range(1, ns_set_count): | |||
count = u30() | |||
for _c2 in range(count): | |||
u30() | |||
multiname_count = u30() | |||
MULTINAME_SIZES = { | |||
0x07: 2, # QName | |||
0x0d: 2, # QNameA | |||
0x0f: 1, # RTQName | |||
0x10: 1, # RTQNameA | |||
0x11: 0, # RTQNameL | |||
0x12: 0, # RTQNameLA | |||
0x09: 2, # Multiname | |||
0x0e: 2, # MultinameA | |||
0x1b: 1, # MultinameL | |||
0x1c: 1, # MultinameLA | |||
} | |||
self.multinames = [''] | |||
for _c in range(1, multiname_count): | |||
kind = u30() | |||
assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind | |||
if kind == 0x07: | |||
u30() # namespace_idx | |||
name_idx = u30() | |||
self.multinames.append(self.constant_strings[name_idx]) | |||
else: | |||
self.multinames.append(_Multiname(kind)) | |||
for _c2 in range(MULTINAME_SIZES[kind]): | |||
u30() | |||
# Methods | |||
method_count = u30() | |||
MethodInfo = collections.namedtuple( | |||
'MethodInfo', | |||
['NEED_ARGUMENTS', 'NEED_REST']) | |||
method_infos = [] | |||
for method_id in range(method_count): | |||
param_count = u30() | |||
u30() # return type | |||
for _ in range(param_count): | |||
u30() # param type | |||
u30() # name index (always 0 for youtube) | |||
flags = read_byte() | |||
if flags & 0x08 != 0: | |||
# Options present | |||
option_count = u30() | |||
for c in range(option_count): | |||
u30() # val | |||
read_bytes(1) # kind | |||
if flags & 0x80 != 0: | |||
# Param names present | |||
for _ in range(param_count): | |||
u30() # param name | |||
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | |||
method_infos.append(mi) | |||
# Metadata | |||
metadata_count = u30() | |||
for _c in range(metadata_count): | |||
u30() # name | |||
item_count = u30() | |||
for _c2 in range(item_count): | |||
u30() # key | |||
u30() # value | |||
def parse_traits_info(): | |||
trait_name_idx = u30() | |||
kind_full = read_byte() | |||
kind = kind_full & 0x0f | |||
attrs = kind_full >> 4 | |||
methods = {} | |||
if kind in [0x00, 0x06]: # Slot or Const | |||
u30() # Slot id | |||
u30() # type_name_idx | |||
vindex = u30() | |||
if vindex != 0: | |||
read_byte() # vkind | |||
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter | |||
u30() # disp_id | |||
method_idx = u30() | |||
methods[self.multinames[trait_name_idx]] = method_idx | |||
elif kind == 0x04: # Class | |||
u30() # slot_id | |||
u30() # classi | |||
elif kind == 0x05: # Function | |||
u30() # slot_id | |||
function_idx = u30() | |||
methods[function_idx] = self.multinames[trait_name_idx] | |||
else: | |||
raise ExtractorError('Unsupported trait kind %d' % kind) | |||
if attrs & 0x4 != 0: # Metadata present | |||
metadata_count = u30() | |||
for _c3 in range(metadata_count): | |||
u30() # metadata index | |||
return methods | |||
# Classes | |||
class_count = u30() | |||
classes = [] | |||
for class_id in range(class_count): | |||
name_idx = u30() | |||
cname = self.multinames[name_idx] | |||
avm_class = _AVMClass(name_idx, cname) | |||
classes.append(avm_class) | |||
u30() # super_name idx | |||
flags = read_byte() | |||
if flags & 0x08 != 0: # Protected namespace is present | |||
u30() # protected_ns_idx | |||
intrf_count = u30() | |||
for _c2 in range(intrf_count): | |||
u30() | |||
u30() # iinit | |||
trait_count = u30() | |||
for _c2 in range(trait_count): | |||
trait_methods = parse_traits_info() | |||
avm_class.register_methods(trait_methods) | |||
assert len(classes) == class_count | |||
self._classes_by_name = dict((c.name, c) for c in classes) | |||
for avm_class in classes: | |||
u30() # cinit | |||
trait_count = u30() | |||
for _c2 in range(trait_count): | |||
trait_methods = parse_traits_info() | |||
avm_class.register_methods(trait_methods) | |||
# Scripts | |||
script_count = u30() | |||
for _c in range(script_count): | |||
u30() # init | |||
trait_count = u30() | |||
for _c2 in range(trait_count): | |||
parse_traits_info() | |||
# Method bodies | |||
method_body_count = u30() | |||
Method = collections.namedtuple('Method', ['code', 'local_count']) | |||
for _c in range(method_body_count): | |||
method_idx = u30() | |||
u30() # max_stack | |||
local_count = u30() | |||
u30() # init_scope_depth | |||
u30() # max_scope_depth | |||
code_length = u30() | |||
code = read_bytes(code_length) | |||
for avm_class in classes: | |||
if method_idx in avm_class.method_idxs: | |||
m = Method(code, local_count) | |||
avm_class.methods[avm_class.method_idxs[method_idx]] = m | |||
exception_count = u30() | |||
for _c2 in range(exception_count): | |||
u30() # from | |||
u30() # to | |||
u30() # target | |||
u30() # exc_type | |||
u30() # var_name | |||
trait_count = u30() | |||
for _c2 in range(trait_count): | |||
parse_traits_info() | |||
assert p + code_reader.tell() == len(code_tag) | |||
def extract_class(self, class_name): | |||
try: | |||
return self._classes_by_name[class_name] | |||
except KeyError: | |||
raise ExtractorError('Class %r not found' % class_name) | |||
def extract_function(self, avm_class, func_name): | |||
if func_name in avm_class.method_pyfunctions: | |||
return avm_class.method_pyfunctions[func_name] | |||
if func_name in self._classes_by_name: | |||
return self._classes_by_name[func_name].make_object() | |||
if func_name not in avm_class.methods: | |||
raise ExtractorError('Cannot find function %s.%s' % ( | |||
avm_class.name, func_name)) | |||
m = avm_class.methods[func_name] | |||
def resfunc(args): | |||
# Helper functions | |||
coder = io.BytesIO(m.code) | |||
s24 = lambda: _s24(coder) | |||
u30 = lambda: _u30(coder) | |||
registers = [avm_class.variables] + list(args) + [None] * m.local_count | |||
stack = [] | |||
scopes = collections.deque([ | |||
self._classes_by_name, avm_class.variables]) | |||
while True: | |||
opcode = _read_byte(coder) | |||
if opcode == 17: # iftrue | |||
offset = s24() | |||
value = stack.pop() | |||
if value: | |||
coder.seek(coder.tell() + offset) | |||
elif opcode == 18: # iffalse | |||
offset = s24() | |||
value = stack.pop() | |||
if not value: | |||
coder.seek(coder.tell() + offset) | |||
elif opcode == 36: # pushbyte | |||
v = _read_byte(coder) | |||
stack.append(v) | |||
elif opcode == 42: # dup | |||
value = stack[-1] | |||
stack.append(value) | |||
elif opcode == 44: # pushstring | |||
idx = u30() | |||
stack.append(self.constant_strings[idx]) | |||
elif opcode == 48: # pushscope | |||
new_scope = stack.pop() | |||
scopes.append(new_scope) | |||
elif opcode == 66: # construct | |||
arg_count = u30() | |||
args = list(reversed( | |||
[stack.pop() for _ in range(arg_count)])) | |||
obj = stack.pop() | |||
res = obj.avm_class.make_object() | |||
stack.append(res) | |||
elif opcode == 70: # callproperty | |||
index = u30() | |||
mname = self.multinames[index] | |||
arg_count = u30() | |||
args = list(reversed( | |||
[stack.pop() for _ in range(arg_count)])) | |||
obj = stack.pop() | |||
if isinstance(obj, _AVMClass_Object): | |||
func = self.extract_function(obj.avm_class, mname) | |||
res = func(args) | |||
stack.append(res) | |||
continue | |||
elif isinstance(obj, _ScopeDict): | |||
if mname in obj.avm_class.method_names: | |||
func = self.extract_function(obj.avm_class, mname) | |||
res = func(args) | |||
else: | |||
res = obj[mname] | |||
stack.append(res) | |||
continue | |||
elif isinstance(obj, compat_str): | |||
if mname == 'split': | |||
assert len(args) == 1 | |||
assert isinstance(args[0], compat_str) | |||
if args[0] == '': | |||
res = list(obj) | |||
else: | |||
res = obj.split(args[0]) | |||
stack.append(res) | |||
continue | |||
elif isinstance(obj, list): | |||
if mname == 'slice': | |||
assert len(args) == 1 | |||
assert isinstance(args[0], int) | |||
res = obj[args[0]:] | |||
stack.append(res) | |||
continue | |||
elif mname == 'join': | |||
assert len(args) == 1 | |||
assert isinstance(args[0], compat_str) | |||
res = args[0].join(obj) | |||
stack.append(res) | |||
continue | |||
raise NotImplementedError( | |||
'Unsupported property %r on %r' | |||
% (mname, obj)) | |||
elif opcode == 72: # returnvalue | |||
res = stack.pop() | |||
return res | |||
elif opcode == 74: # constructproperty | |||
index = u30() | |||
arg_count = u30() | |||
args = list(reversed( | |||
[stack.pop() for _ in range(arg_count)])) | |||
obj = stack.pop() | |||
mname = self.multinames[index] | |||
assert isinstance(obj, _AVMClass) | |||
# We do not actually call the constructor for now; | |||
# we just pretend it does nothing | |||
stack.append(obj.make_object()) | |||
elif opcode == 79: # callpropvoid | |||
index = u30() | |||
mname = self.multinames[index] | |||
arg_count = u30() | |||
args = list(reversed( | |||
[stack.pop() for _ in range(arg_count)])) | |||
obj = stack.pop() | |||
if mname == 'reverse': | |||
assert isinstance(obj, list) | |||
obj.reverse() | |||
else: | |||
raise NotImplementedError( | |||
'Unsupported (void) property %r on %r' | |||
% (mname, obj)) | |||
elif opcode == 86: # newarray | |||
arg_count = u30() | |||
arr = [] | |||
for i in range(arg_count): | |||
arr.append(stack.pop()) | |||
arr = arr[::-1] | |||
stack.append(arr) | |||
elif opcode == 93: # findpropstrict | |||
index = u30() | |||
mname = self.multinames[index] | |||
for s in reversed(scopes): | |||
if mname in s: | |||
res = s | |||
break | |||
else: | |||
res = scopes[0] | |||
stack.append(res[mname]) | |||
elif opcode == 94: # findproperty | |||
index = u30() | |||
mname = self.multinames[index] | |||
for s in reversed(scopes): | |||
if mname in s: | |||
res = s | |||
break | |||
else: | |||
res = avm_class.variables | |||
stack.append(res) | |||
elif opcode == 96: # getlex | |||
index = u30() | |||
mname = self.multinames[index] | |||
for s in reversed(scopes): | |||
if mname in s: | |||
scope = s | |||
break | |||
else: | |||
scope = avm_class.variables | |||
# I cannot find where static variables are initialized | |||
# so let's just return None | |||
res = scope.get(mname) | |||
stack.append(res) | |||
elif opcode == 97: # setproperty | |||
index = u30() | |||
value = stack.pop() | |||
idx = self.multinames[index] | |||
if isinstance(idx, _Multiname): | |||
idx = stack.pop() | |||
obj = stack.pop() | |||
obj[idx] = value | |||
elif opcode == 98: # getlocal | |||
index = u30() | |||
stack.append(registers[index]) | |||
elif opcode == 99: # setlocal | |||
index = u30() | |||
value = stack.pop() | |||
registers[index] = value | |||
elif opcode == 102: # getproperty | |||
index = u30() | |||
pname = self.multinames[index] | |||
if pname == 'length': | |||
obj = stack.pop() | |||
assert isinstance(obj, list) | |||
stack.append(len(obj)) | |||
else: # Assume attribute access | |||
idx = stack.pop() | |||
assert isinstance(idx, int) | |||
obj = stack.pop() | |||
assert isinstance(obj, list) | |||
stack.append(obj[idx]) | |||
elif opcode == 115: # convert_ | |||
value = stack.pop() | |||
intvalue = int(value) | |||
stack.append(intvalue) | |||
elif opcode == 128: # coerce | |||
u30() | |||
elif opcode == 133: # coerce_s | |||
assert isinstance(stack[-1], (type(None), compat_str)) | |||
elif opcode == 160: # add | |||
value2 = stack.pop() | |||
value1 = stack.pop() | |||
res = value1 + value2 | |||
stack.append(res) | |||
elif opcode == 161: # subtract | |||
value2 = stack.pop() | |||
value1 = stack.pop() | |||
res = value1 - value2 | |||
stack.append(res) | |||
elif opcode == 164: # modulo | |||
value2 = stack.pop() | |||
value1 = stack.pop() | |||
res = value1 % value2 | |||
stack.append(res) | |||
elif opcode == 175: # greaterequals | |||
value2 = stack.pop() | |||
value1 = stack.pop() | |||
result = value1 >= value2 | |||
stack.append(result) | |||
elif opcode == 208: # getlocal_0 | |||
stack.append(registers[0]) | |||
elif opcode == 209: # getlocal_1 | |||
stack.append(registers[1]) | |||
elif opcode == 210: # getlocal_2 | |||
stack.append(registers[2]) | |||
elif opcode == 211: # getlocal_3 | |||
stack.append(registers[3]) | |||
elif opcode == 212: # setlocal_0 | |||
registers[0] = stack.pop() | |||
elif opcode == 213: # setlocal_1 | |||
registers[1] = stack.pop() | |||
elif opcode == 214: # setlocal_2 | |||
registers[2] = stack.pop() | |||
elif opcode == 215: # setlocal_3 | |||
registers[3] = stack.pop() | |||
else: | |||
raise NotImplementedError( | |||
'Unsupported opcode %d' % opcode) | |||
avm_class.method_pyfunctions[func_name] = resfunc | |||
return resfunc | |||
@ -1,2 +1,2 @@ | |||
__version__ = '2014.07.15' | |||
__version__ = '2014.07.22' |