Conflicts: youtube_dl/extractor/mlb.pytotalwebcasting
@ -0,0 +1 @@ | |||||
*.swf |
@ -0,0 +1,19 @@ | |||||
// input: [["a", "b", "c", "d"]] | |||||
// output: ["c", "b", "a", "d"] | |||||
package { | |||||
public class ArrayAccess { | |||||
public static function main(ar:Array):Array { | |||||
var aa:ArrayAccess = new ArrayAccess(); | |||||
return aa.f(ar, 2); | |||||
} | |||||
private function f(ar:Array, num:Number):Array{ | |||||
var x:String = ar[0]; | |||||
var y:String = ar[num % ar.length]; | |||||
ar[0] = y; | |||||
ar[num] = x; | |||||
return ar; | |||||
} | |||||
} | |||||
} |
@ -0,0 +1,17 @@ | |||||
// input: [] | |||||
// output: 121 | |||||
package { | |||||
public class ClassCall { | |||||
public static function main():int{ | |||||
var f:OtherClass = new OtherClass(); | |||||
return f.func(100,20); | |||||
} | |||||
} | |||||
} | |||||
class OtherClass { | |||||
public function func(x: int, y: int):int { | |||||
return x+y+1; | |||||
} | |||||
} |
@ -0,0 +1,15 @@ | |||||
// input: [] | |||||
// output: 0 | |||||
package { | |||||
public class ClassConstruction { | |||||
public static function main():int{ | |||||
var f:Foo = new Foo(); | |||||
return 0; | |||||
} | |||||
} | |||||
} | |||||
class Foo { | |||||
} |
@ -0,0 +1,13 @@ | |||||
// input: [1, 2] | |||||
// output: 3 | |||||
package { | |||||
public class LocalVars { | |||||
public static function main(a:int, b:int):int{ | |||||
var c:int = a + b + b; | |||||
var d:int = c - b; | |||||
var e:int = d; | |||||
return e; | |||||
} | |||||
} | |||||
} |
@ -0,0 +1,21 @@ | |||||
// input: [] | |||||
// output: 9 | |||||
package { | |||||
public class PrivateCall { | |||||
public static function main():int{ | |||||
var f:OtherClass = new OtherClass(); | |||||
return f.func(); | |||||
} | |||||
} | |||||
} | |||||
class OtherClass { | |||||
private function pf():int { | |||||
return 9; | |||||
} | |||||
public function func():int { | |||||
return this.pf(); | |||||
} | |||||
} |
@ -0,0 +1,13 @@ | |||||
// input: [1] | |||||
// output: 1 | |||||
package { | |||||
public class StaticAssignment { | |||||
public static var v:int; | |||||
public static function main(a:int):int{ | |||||
v = a; | |||||
return v; | |||||
} | |||||
} | |||||
} |
@ -0,0 +1,16 @@ | |||||
// input: [] | |||||
// output: 1 | |||||
package { | |||||
public class StaticRetrieval { | |||||
public static var v:int; | |||||
public static function main():int{ | |||||
if (v) { | |||||
return 0; | |||||
} else { | |||||
return 1; | |||||
} | |||||
} | |||||
} | |||||
} |
@ -0,0 +1,77 @@ | |||||
#!/usr/bin/env python | |||||
# Allow direct execution | |||||
import os | |||||
import sys | |||||
import unittest | |||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||||
import errno | |||||
import io | |||||
import json | |||||
import re | |||||
import subprocess | |||||
from youtube_dl.swfinterp import SWFInterpreter | |||||
TEST_DIR = os.path.join( | |||||
os.path.dirname(os.path.abspath(__file__)), 'swftests') | |||||
class TestSWFInterpreter(unittest.TestCase): | |||||
pass | |||||
def _make_testfunc(testfile): | |||||
m = re.match(r'^(.*)\.(as)$', testfile) | |||||
if not m: | |||||
return | |||||
test_id = m.group(1) | |||||
def test_func(self): | |||||
as_file = os.path.join(TEST_DIR, testfile) | |||||
swf_file = os.path.join(TEST_DIR, test_id + '.swf') | |||||
if ((not os.path.exists(swf_file)) | |||||
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | |||||
# Recompile | |||||
try: | |||||
subprocess.check_call(['mxmlc', '-output', swf_file, as_file]) | |||||
except OSError as ose: | |||||
if ose.errno == errno.ENOENT: | |||||
print('mxmlc not found! Skipping test.') | |||||
return | |||||
raise | |||||
with open(swf_file, 'rb') as swf_f: | |||||
swf_content = swf_f.read() | |||||
swfi = SWFInterpreter(swf_content) | |||||
with io.open(as_file, 'r', encoding='utf-8') as as_f: | |||||
as_content = as_f.read() | |||||
def _find_spec(key): | |||||
m = re.search( | |||||
r'(?m)^//\s*%s:\s*(.*?)\n' % re.escape(key), as_content) | |||||
if not m: | |||||
raise ValueError('Cannot find %s in %s' % (key, testfile)) | |||||
return json.loads(m.group(1)) | |||||
input_args = _find_spec('input') | |||||
output = _find_spec('output') | |||||
swf_class = swfi.extract_class(test_id) | |||||
func = swfi.extract_function(swf_class, 'main') | |||||
res = func(input_args) | |||||
self.assertEqual(res, output) | |||||
test_func.__name__ = str('test_swf_' + test_id) | |||||
setattr(TestSWFInterpreter, test_func.__name__, test_func) | |||||
for testfile in os.listdir(TEST_DIR): | |||||
_make_testfunc(testfile) | |||||
if __name__ == '__main__': | |||||
unittest.main() |
@ -0,0 +1,139 @@ | |||||
# coding: utf-8 | |||||
from __future__ import unicode_literals | |||||
import re | |||||
from .common import InfoExtractor | |||||
class AdultSwimIE(InfoExtractor): | |||||
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' | |||||
_TEST = { | |||||
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', | |||||
'playlist': [ | |||||
{ | |||||
'md5': '4da359ec73b58df4575cd01a610ba5dc', | |||||
'info_dict': { | |||||
'id': '8a250ba1450996e901453d7f02ca02f5', | |||||
'ext': 'flv', | |||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1', | |||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | |||||
'uploader': 'Rick and Morty', | |||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | |||||
} | |||||
}, | |||||
{ | |||||
'md5': 'ffbdf55af9331c509d95350bd0cc1819', | |||||
'info_dict': { | |||||
'id': '8a250ba1450996e901453d7f4bd102f6', | |||||
'ext': 'flv', | |||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2', | |||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | |||||
'uploader': 'Rick and Morty', | |||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | |||||
} | |||||
}, | |||||
{ | |||||
'md5': 'b92409635540304280b4b6c36bd14a0a', | |||||
'info_dict': { | |||||
'id': '8a250ba1450996e901453d7fa73c02f7', | |||||
'ext': 'flv', | |||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3', | |||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | |||||
'uploader': 'Rick and Morty', | |||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | |||||
} | |||||
}, | |||||
{ | |||||
'md5': 'e8818891d60e47b29cd89d7b0278156d', | |||||
'info_dict': { | |||||
'id': '8a250ba1450996e901453d7fc8ba02f8', | |||||
'ext': 'flv', | |||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4', | |||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', | |||||
'uploader': 'Rick and Morty', | |||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' | |||||
} | |||||
} | |||||
] | |||||
} | |||||
_video_extensions = { | |||||
'3500': 'flv', | |||||
'640': 'mp4', | |||||
'150': 'mp4', | |||||
'ipad': 'm3u8', | |||||
'iphone': 'm3u8' | |||||
} | |||||
_video_dimensions = { | |||||
'3500': (1280, 720), | |||||
'640': (480, 270), | |||||
'150': (320, 180) | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_path = mobj.group('path') | |||||
webpage = self._download_webpage(url, video_path) | |||||
episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id') | |||||
title = self._og_search_title(webpage) | |||||
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id | |||||
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') | |||||
episode_el = idoc.find('.//episode') | |||||
show_title = episode_el.attrib.get('collectionTitle') | |||||
episode_title = episode_el.attrib.get('title') | |||||
thumbnail = episode_el.attrib.get('thumbnailUrl') | |||||
description = episode_el.find('./description').text.strip() | |||||
entries = [] | |||||
segment_els = episode_el.findall('./segments/segment') | |||||
for part_num, segment_el in enumerate(segment_els): | |||||
segment_id = segment_el.attrib.get('id') | |||||
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1) | |||||
thumbnail = segment_el.attrib.get('thumbnailUrl') | |||||
duration = segment_el.attrib.get('duration') | |||||
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id | |||||
idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information') | |||||
formats = [] | |||||
file_els = idoc.findall('.//files/file') | |||||
for file_el in file_els: | |||||
bitrate = file_el.attrib.get('bitrate') | |||||
type = file_el.attrib.get('type') | |||||
width, height = self._video_dimensions.get(bitrate, (None, None)) | |||||
formats.append({ | |||||
'format_id': '%s-%s' % (bitrate, type), | |||||
'url': file_el.text, | |||||
'ext': self._video_extensions.get(bitrate, 'mp4'), | |||||
# The bitrate may not be a number (for example: 'iphone') | |||||
'tbr': int(bitrate) if bitrate.isdigit() else None, | |||||
'height': height, | |||||
'width': width | |||||
}) | |||||
self._sort_formats(formats) | |||||
entries.append({ | |||||
'id': segment_id, | |||||
'title': segment_title, | |||||
'formats': formats, | |||||
'uploader': show_title, | |||||
'thumbnail': thumbnail, | |||||
'duration': duration, | |||||
'description': description | |||||
}) | |||||
return { | |||||
'_type': 'playlist', | |||||
'id': episode_id, | |||||
'display_id': video_path, | |||||
'entries': entries, | |||||
'title': '%s %s' % (show_title, episode_title), | |||||
'description': description, | |||||
'thumbnail': thumbnail | |||||
} |
@ -0,0 +1,65 @@ | |||||
from __future__ import unicode_literals | |||||
import re | |||||
from .common import InfoExtractor | |||||
from ..utils import ( | |||||
parse_iso8601, | |||||
str_to_int, | |||||
) | |||||
class CrackedIE(InfoExtractor): | |||||
_VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html' | |||||
_TEST = { | |||||
'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html', | |||||
'md5': '4b29a5eeec292cd5eca6388c7558db9e', | |||||
'info_dict': { | |||||
'id': '19006', | |||||
'ext': 'mp4', | |||||
'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies', | |||||
'description': 'md5:3b909e752661db86007d10e5ec2df769', | |||||
'timestamp': 1405659600, | |||||
'upload_date': '20140718', | |||||
} | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
webpage = self._download_webpage(url, video_id) | |||||
video_url = self._html_search_regex( | |||||
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL') | |||||
title = self._og_search_title(webpage) | |||||
description = self._og_search_description(webpage) | |||||
timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False) | |||||
if timestamp: | |||||
timestamp = parse_iso8601(timestamp[:-6]) | |||||
view_count = str_to_int(self._html_search_regex( | |||||
r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False)) | |||||
comment_count = str_to_int(self._html_search_regex( | |||||
r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False)) | |||||
m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url) | |||||
if m: | |||||
width = int(m.group('width')) | |||||
height = int(m.group('height')) | |||||
else: | |||||
width = height = None | |||||
return { | |||||
'id': video_id, | |||||
'url':video_url, | |||||
'title': title, | |||||
'description': description, | |||||
'timestamp': timestamp, | |||||
'view_count': view_count, | |||||
'comment_count': comment_count, | |||||
'height': height, | |||||
'width': width, | |||||
} |
@ -0,0 +1,44 @@ | |||||
from __future__ import unicode_literals | |||||
import re | |||||
from .common import InfoExtractor | |||||
class DFBIE(InfoExtractor): | |||||
IE_NAME = 'tv.dfb.de' | |||||
_VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)' | |||||
_TEST = { | |||||
'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/', | |||||
# The md5 is different each time | |||||
'info_dict': { | |||||
'id': '9070', | |||||
'ext': 'flv', | |||||
'title': 'Highlights des Empfangs in Berlin', | |||||
'upload_date': '20140716', | |||||
}, | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
webpage = self._download_webpage(url, video_id) | |||||
player_info = self._download_xml( | |||||
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, | |||||
video_id) | |||||
video_info = player_info.find('video') | |||||
f4m_info = self._download_xml(video_info.find('url').text, video_id) | |||||
token_el = f4m_info.find('token') | |||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' | |||||
return { | |||||
'id': video_id, | |||||
'title': video_info.find('title').text, | |||||
'url': manifest_url, | |||||
'ext': 'flv', | |||||
'thumbnail': self._og_search_thumbnail(webpage), | |||||
'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]), | |||||
} |
@ -0,0 +1,119 @@ | |||||
# encoding: utf-8 | |||||
from __future__ import unicode_literals | |||||
import re | |||||
from .common import InfoExtractor | |||||
from ..utils import ( | |||||
parse_duration, | |||||
unified_strdate, | |||||
) | |||||
class SapoIE(InfoExtractor): | |||||
IE_DESC = 'SAPO Vídeos' | |||||
_VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})' | |||||
_TESTS = [ | |||||
{ | |||||
'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', | |||||
'md5': '79ee523f6ecb9233ac25075dee0eda83', | |||||
'note': 'SD video', | |||||
'info_dict': { | |||||
'id': 'UBz95kOtiWYUMTA5Ghfi', | |||||
'ext': 'mp4', | |||||
'title': 'Benfica - Marcas na Hitória', | |||||
'description': 'md5:c9082000a128c3fd57bf0299e1367f22', | |||||
'duration': 264, | |||||
'uploader': 'tiago_1988', | |||||
'upload_date': '20080229', | |||||
'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], | |||||
}, | |||||
}, | |||||
{ | |||||
'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', | |||||
'md5': '90a2f283cfb49193fe06e861613a72aa', | |||||
'note': 'HD video', | |||||
'info_dict': { | |||||
'id': 'IyusNAZ791ZdoCY5H5IF', | |||||
'ext': 'mp4', | |||||
'title': 'Codebits VII - Report', | |||||
'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', | |||||
'duration': 144, | |||||
'uploader': 'codebits', | |||||
'upload_date': '20140427', | |||||
'categories': ['codebits', 'codebits2014'], | |||||
}, | |||||
}, | |||||
{ | |||||
'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', | |||||
'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', | |||||
'note': 'v2 video', | |||||
'info_dict': { | |||||
'id': 'yLqjzPtbTimsn2wWBKHz', | |||||
'ext': 'mp4', | |||||
'title': 'Hipnose Condicionativa 4', | |||||
'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', | |||||
'duration': 692, | |||||
'uploader': 'sapozen', | |||||
'upload_date': '20090609', | |||||
'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], | |||||
}, | |||||
}, | |||||
] | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
item = self._download_xml( | |||||
'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') | |||||
title = item.find('./title').text | |||||
description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text | |||||
thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') | |||||
duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) | |||||
uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text | |||||
upload_date = unified_strdate(item.find('./pubDate').text) | |||||
view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) | |||||
comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) | |||||
tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text | |||||
categories = tags.split() if tags else [] | |||||
age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 | |||||
video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text | |||||
video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') | |||||
formats = [{ | |||||
'url': video_url, | |||||
'ext': 'mp4', | |||||
'format_id': 'sd', | |||||
'width': int(video_size[0]), | |||||
'height': int(video_size[1]), | |||||
}] | |||||
if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': | |||||
formats.append({ | |||||
'url': re.sub(r'/mov/1$', '/mov/39', video_url), | |||||
'ext': 'mp4', | |||||
'format_id': 'hd', | |||||
'width': 1280, | |||||
'height': 720, | |||||
}) | |||||
self._sort_formats(formats) | |||||
return { | |||||
'id': video_id, | |||||
'title': title, | |||||
'description': description, | |||||
'thumbnail': thumbnail, | |||||
'duration': duration, | |||||
'uploader': uploader, | |||||
'upload_date': upload_date, | |||||
'view_count': view_count, | |||||
'comment_count': comment_count, | |||||
'categories': categories, | |||||
'age_limit': age_limit, | |||||
'formats': formats, | |||||
} |
@ -0,0 +1,68 @@ | |||||
# coding: utf-8 | |||||
from __future__ import unicode_literals | |||||
import re | |||||
from .common import InfoExtractor | |||||
from ..utils import ( | |||||
float_or_none, | |||||
str_to_int, | |||||
parse_duration, | |||||
) | |||||
class SnotrIE(InfoExtractor): | |||||
_VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)' | |||||
_TESTS = [{ | |||||
'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', | |||||
'info_dict': { | |||||
'id': '13708', | |||||
'ext': 'flv', | |||||
'title': 'Drone flying through fireworks!', | |||||
'duration': 247, | |||||
'filesize_approx': 98566144, | |||||
'description': 'A drone flying through Fourth of July Fireworks', | |||||
} | |||||
}, { | |||||
'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', | |||||
'info_dict': { | |||||
'id': '530', | |||||
'ext': 'flv', | |||||
'title': 'David Letteman - George W. Bush Top 10', | |||||
'duration': 126, | |||||
'filesize_approx': 8912896, | |||||
'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', | |||||
} | |||||
}] | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
webpage = self._download_webpage(url, video_id) | |||||
title = self._og_search_title(webpage) | |||||
description = self._og_search_description(webpage) | |||||
video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id | |||||
view_count = str_to_int(self._html_search_regex( | |||||
r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>', | |||||
webpage, 'view count', fatal=False)) | |||||
duration = parse_duration(self._html_search_regex( | |||||
r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>', | |||||
webpage, 'duration', fatal=False)) | |||||
filesize_approx = float_or_none(self._html_search_regex( | |||||
r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>', | |||||
webpage, 'filesize', fatal=False), invscale=1024 * 1024) | |||||
return { | |||||
'id': video_id, | |||||
'description': description, | |||||
'title': title, | |||||
'url': video_url, | |||||
'view_count': view_count, | |||||
'duration': duration, | |||||
'filesize_approx': filesize_approx, | |||||
} |
@ -0,0 +1,78 @@ | |||||
# coding: utf-8 | |||||
from __future__ import unicode_literals | |||||
from ..utils import ( | |||||
ExtractorError, | |||||
compat_urllib_parse, | |||||
compat_urllib_request, | |||||
) | |||||
import re | |||||
from .common import InfoExtractor | |||||
class SockshareIE(InfoExtractor): | |||||
_VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)' | |||||
_FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>' | |||||
_TEST = { | |||||
'url': 'http://www.sockshare.com/file/437BE28B89D799D7', | |||||
'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd', | |||||
'info_dict': { | |||||
'id': '437BE28B89D799D7', | |||||
'title': 'big_buck_bunny_720p_surround.avi', | |||||
'ext': 'avi', | |||||
'thumbnail': 're:^http://.*\.jpg$', | |||||
} | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
url = 'http://sockshare.com/file/%s' % video_id | |||||
webpage = self._download_webpage(url, video_id) | |||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None: | |||||
raise ExtractorError('Video %s does not exist' % video_id, | |||||
expected=True) | |||||
confirm_hash = self._html_search_regex(r'''(?x)<input\s+ | |||||
type="hidden"\s+ | |||||
value="([^"]*)"\s+ | |||||
name="hash" | |||||
''', webpage, 'hash') | |||||
fields = { | |||||
"hash": confirm_hash, | |||||
"confirm": "Continue as Free User" | |||||
} | |||||
post = compat_urllib_parse.urlencode(fields) | |||||
req = compat_urllib_request.Request(url, post) | |||||
# Apparently, this header is required for confirmation to work. | |||||
req.add_header('Host', 'www.sockshare.com') | |||||
req.add_header('Content-type', 'application/x-www-form-urlencoded') | |||||
webpage = self._download_webpage( | |||||
req, video_id, 'Downloading video page') | |||||
video_url = self._html_search_regex( | |||||
r'<a href="([^"]*)".+class="download_file_link"', | |||||
webpage, 'file url') | |||||
video_url = "http://www.sockshare.com" + video_url | |||||
title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title') | |||||
thumbnail = self._html_search_regex( | |||||
r'<img\s+src="([^"]*)".+?name="bg"', | |||||
webpage, 'thumbnail') | |||||
formats = [{ | |||||
'format_id': 'sd', | |||||
'url': video_url, | |||||
}] | |||||
return { | |||||
'id': video_id, | |||||
'title': title, | |||||
'thumbnail': thumbnail, | |||||
'formats': formats, | |||||
} |
@ -0,0 +1,609 @@ | |||||
from __future__ import unicode_literals | |||||
import collections | |||||
import io | |||||
import zlib | |||||
from .utils import ( | |||||
compat_str, | |||||
ExtractorError, | |||||
struct_unpack, | |||||
) | |||||
def _extract_tags(file_contents): | |||||
if file_contents[1:3] != b'WS': | |||||
raise ExtractorError( | |||||
'Not an SWF file; header is %r' % file_contents[:3]) | |||||
if file_contents[:1] == b'C': | |||||
content = zlib.decompress(file_contents[8:]) | |||||
else: | |||||
raise NotImplementedError( | |||||
'Unsupported compression format %r' % | |||||
file_contents[:1]) | |||||
# Determine number of bits in framesize rectangle | |||||
framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 | |||||
framesize_len = (5 + 4 * framesize_nbits + 7) // 8 | |||||
pos = framesize_len + 2 + 2 | |||||
while pos < len(content): | |||||
header16 = struct_unpack('<H', content[pos:pos + 2])[0] | |||||
pos += 2 | |||||
tag_code = header16 >> 6 | |||||
tag_len = header16 & 0x3f | |||||
if tag_len == 0x3f: | |||||
tag_len = struct_unpack('<I', content[pos:pos + 4])[0] | |||||
pos += 4 | |||||
assert pos + tag_len <= len(content), \ | |||||
('Tag %d ends at %d+%d - that\'s longer than the file (%d)' | |||||
% (tag_code, pos, tag_len, len(content))) | |||||
yield (tag_code, content[pos:pos + tag_len]) | |||||
pos += tag_len | |||||
class _AVMClass_Object(object): | |||||
def __init__(self, avm_class): | |||||
self.avm_class = avm_class | |||||
def __repr__(self): | |||||
return '%s#%x' % (self.avm_class.name, id(self)) | |||||
class _ScopeDict(dict): | |||||
def __init__(self, avm_class): | |||||
super(_ScopeDict, self).__init__() | |||||
self.avm_class = avm_class | |||||
def __repr__(self): | |||||
return '%s__Scope(%s)' % ( | |||||
self.avm_class.name, | |||||
super(_ScopeDict, self).__repr__()) | |||||
class _AVMClass(object): | |||||
def __init__(self, name_idx, name): | |||||
self.name_idx = name_idx | |||||
self.name = name | |||||
self.method_names = {} | |||||
self.method_idxs = {} | |||||
self.methods = {} | |||||
self.method_pyfunctions = {} | |||||
self.variables = _ScopeDict(self) | |||||
def make_object(self): | |||||
return _AVMClass_Object(self) | |||||
def __repr__(self): | |||||
return '_AVMClass(%s)' % (self.name) | |||||
def register_methods(self, methods): | |||||
self.method_names.update(methods.items()) | |||||
self.method_idxs.update(dict( | |||||
(idx, name) | |||||
for name, idx in methods.items())) | |||||
class _Multiname(object): | |||||
def __init__(self, kind): | |||||
self.kind = kind | |||||
def __repr__(self): | |||||
return '[MULTINAME kind: 0x%x]' % self.kind | |||||
def _read_int(reader): | |||||
res = 0 | |||||
shift = 0 | |||||
for _ in range(5): | |||||
buf = reader.read(1) | |||||
assert len(buf) == 1 | |||||
b = struct_unpack('<B', buf)[0] | |||||
res = res | ((b & 0x7f) << shift) | |||||
if b & 0x80 == 0: | |||||
break | |||||
shift += 7 | |||||
return res | |||||
def _u30(reader): | |||||
res = _read_int(reader) | |||||
assert res & 0xf0000000 == 0 | |||||
return res | |||||
_u32 = _read_int | |||||
def _s32(reader): | |||||
v = _read_int(reader) | |||||
if v & 0x80000000 != 0: | |||||
v = - ((v ^ 0xffffffff) + 1) | |||||
return v | |||||
def _s24(reader): | |||||
bs = reader.read(3) | |||||
assert len(bs) == 3 | |||||
last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' | |||||
return struct_unpack('<i', bs + last_byte)[0] | |||||
def _read_string(reader): | |||||
slen = _u30(reader) | |||||
resb = reader.read(slen) | |||||
assert len(resb) == slen | |||||
return resb.decode('utf-8') | |||||
def _read_bytes(count, reader): | |||||
assert count >= 0 | |||||
resb = reader.read(count) | |||||
assert len(resb) == count | |||||
return resb | |||||
def _read_byte(reader): | |||||
resb = _read_bytes(1, reader=reader) | |||||
res = struct_unpack('<B', resb)[0] | |||||
return res | |||||
class SWFInterpreter(object): | |||||
def __init__(self, file_contents): | |||||
code_tag = next(tag | |||||
for tag_code, tag in _extract_tags(file_contents) | |||||
if tag_code == 82) | |||||
p = code_tag.index(b'\0', 4) + 1 | |||||
code_reader = io.BytesIO(code_tag[p:]) | |||||
# Parse ABC (AVM2 ByteCode) | |||||
# Define a couple convenience methods | |||||
u30 = lambda *args: _u30(*args, reader=code_reader) | |||||
s32 = lambda *args: _s32(*args, reader=code_reader) | |||||
u32 = lambda *args: _u32(*args, reader=code_reader) | |||||
read_bytes = lambda *args: _read_bytes(*args, reader=code_reader) | |||||
read_byte = lambda *args: _read_byte(*args, reader=code_reader) | |||||
# minor_version + major_version | |||||
read_bytes(2 + 2) | |||||
# Constant pool | |||||
int_count = u30() | |||||
for _c in range(1, int_count): | |||||
s32() | |||||
uint_count = u30() | |||||
for _c in range(1, uint_count): | |||||
u32() | |||||
double_count = u30() | |||||
read_bytes(max(0, (double_count - 1)) * 8) | |||||
string_count = u30() | |||||
self.constant_strings = [''] | |||||
for _c in range(1, string_count): | |||||
s = _read_string(code_reader) | |||||
self.constant_strings.append(s) | |||||
namespace_count = u30() | |||||
for _c in range(1, namespace_count): | |||||
read_bytes(1) # kind | |||||
u30() # name | |||||
ns_set_count = u30() | |||||
for _c in range(1, ns_set_count): | |||||
count = u30() | |||||
for _c2 in range(count): | |||||
u30() | |||||
multiname_count = u30() | |||||
MULTINAME_SIZES = { | |||||
0x07: 2, # QName | |||||
0x0d: 2, # QNameA | |||||
0x0f: 1, # RTQName | |||||
0x10: 1, # RTQNameA | |||||
0x11: 0, # RTQNameL | |||||
0x12: 0, # RTQNameLA | |||||
0x09: 2, # Multiname | |||||
0x0e: 2, # MultinameA | |||||
0x1b: 1, # MultinameL | |||||
0x1c: 1, # MultinameLA | |||||
} | |||||
self.multinames = [''] | |||||
for _c in range(1, multiname_count): | |||||
kind = u30() | |||||
assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind | |||||
if kind == 0x07: | |||||
u30() # namespace_idx | |||||
name_idx = u30() | |||||
self.multinames.append(self.constant_strings[name_idx]) | |||||
else: | |||||
self.multinames.append(_Multiname(kind)) | |||||
for _c2 in range(MULTINAME_SIZES[kind]): | |||||
u30() | |||||
# Methods | |||||
method_count = u30() | |||||
MethodInfo = collections.namedtuple( | |||||
'MethodInfo', | |||||
['NEED_ARGUMENTS', 'NEED_REST']) | |||||
method_infos = [] | |||||
for method_id in range(method_count): | |||||
param_count = u30() | |||||
u30() # return type | |||||
for _ in range(param_count): | |||||
u30() # param type | |||||
u30() # name index (always 0 for youtube) | |||||
flags = read_byte() | |||||
if flags & 0x08 != 0: | |||||
# Options present | |||||
option_count = u30() | |||||
for c in range(option_count): | |||||
u30() # val | |||||
read_bytes(1) # kind | |||||
if flags & 0x80 != 0: | |||||
# Param names present | |||||
for _ in range(param_count): | |||||
u30() # param name | |||||
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | |||||
method_infos.append(mi) | |||||
# Metadata | |||||
metadata_count = u30() | |||||
for _c in range(metadata_count): | |||||
u30() # name | |||||
item_count = u30() | |||||
for _c2 in range(item_count): | |||||
u30() # key | |||||
u30() # value | |||||
def parse_traits_info(): | |||||
trait_name_idx = u30() | |||||
kind_full = read_byte() | |||||
kind = kind_full & 0x0f | |||||
attrs = kind_full >> 4 | |||||
methods = {} | |||||
if kind in [0x00, 0x06]: # Slot or Const | |||||
u30() # Slot id | |||||
u30() # type_name_idx | |||||
vindex = u30() | |||||
if vindex != 0: | |||||
read_byte() # vkind | |||||
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter | |||||
u30() # disp_id | |||||
method_idx = u30() | |||||
methods[self.multinames[trait_name_idx]] = method_idx | |||||
elif kind == 0x04: # Class | |||||
u30() # slot_id | |||||
u30() # classi | |||||
elif kind == 0x05: # Function | |||||
u30() # slot_id | |||||
function_idx = u30() | |||||
methods[function_idx] = self.multinames[trait_name_idx] | |||||
else: | |||||
raise ExtractorError('Unsupported trait kind %d' % kind) | |||||
if attrs & 0x4 != 0: # Metadata present | |||||
metadata_count = u30() | |||||
for _c3 in range(metadata_count): | |||||
u30() # metadata index | |||||
return methods | |||||
# Classes | |||||
class_count = u30() | |||||
classes = [] | |||||
for class_id in range(class_count): | |||||
name_idx = u30() | |||||
cname = self.multinames[name_idx] | |||||
avm_class = _AVMClass(name_idx, cname) | |||||
classes.append(avm_class) | |||||
u30() # super_name idx | |||||
flags = read_byte() | |||||
if flags & 0x08 != 0: # Protected namespace is present | |||||
u30() # protected_ns_idx | |||||
intrf_count = u30() | |||||
for _c2 in range(intrf_count): | |||||
u30() | |||||
u30() # iinit | |||||
trait_count = u30() | |||||
for _c2 in range(trait_count): | |||||
trait_methods = parse_traits_info() | |||||
avm_class.register_methods(trait_methods) | |||||
assert len(classes) == class_count | |||||
self._classes_by_name = dict((c.name, c) for c in classes) | |||||
for avm_class in classes: | |||||
u30() # cinit | |||||
trait_count = u30() | |||||
for _c2 in range(trait_count): | |||||
trait_methods = parse_traits_info() | |||||
avm_class.register_methods(trait_methods) | |||||
# Scripts | |||||
script_count = u30() | |||||
for _c in range(script_count): | |||||
u30() # init | |||||
trait_count = u30() | |||||
for _c2 in range(trait_count): | |||||
parse_traits_info() | |||||
# Method bodies | |||||
method_body_count = u30() | |||||
Method = collections.namedtuple('Method', ['code', 'local_count']) | |||||
for _c in range(method_body_count): | |||||
method_idx = u30() | |||||
u30() # max_stack | |||||
local_count = u30() | |||||
u30() # init_scope_depth | |||||
u30() # max_scope_depth | |||||
code_length = u30() | |||||
code = read_bytes(code_length) | |||||
for avm_class in classes: | |||||
if method_idx in avm_class.method_idxs: | |||||
m = Method(code, local_count) | |||||
avm_class.methods[avm_class.method_idxs[method_idx]] = m | |||||
exception_count = u30() | |||||
for _c2 in range(exception_count): | |||||
u30() # from | |||||
u30() # to | |||||
u30() # target | |||||
u30() # exc_type | |||||
u30() # var_name | |||||
trait_count = u30() | |||||
for _c2 in range(trait_count): | |||||
parse_traits_info() | |||||
assert p + code_reader.tell() == len(code_tag) | |||||
def extract_class(self, class_name): | |||||
try: | |||||
return self._classes_by_name[class_name] | |||||
except KeyError: | |||||
raise ExtractorError('Class %r not found' % class_name) | |||||
def extract_function(self, avm_class, func_name): | |||||
if func_name in avm_class.method_pyfunctions: | |||||
return avm_class.method_pyfunctions[func_name] | |||||
if func_name in self._classes_by_name: | |||||
return self._classes_by_name[func_name].make_object() | |||||
if func_name not in avm_class.methods: | |||||
raise ExtractorError('Cannot find function %s.%s' % ( | |||||
avm_class.name, func_name)) | |||||
m = avm_class.methods[func_name] | |||||
def resfunc(args): | |||||
# Helper functions | |||||
coder = io.BytesIO(m.code) | |||||
s24 = lambda: _s24(coder) | |||||
u30 = lambda: _u30(coder) | |||||
registers = [avm_class.variables] + list(args) + [None] * m.local_count | |||||
stack = [] | |||||
scopes = collections.deque([ | |||||
self._classes_by_name, avm_class.variables]) | |||||
while True: | |||||
opcode = _read_byte(coder) | |||||
if opcode == 17: # iftrue | |||||
offset = s24() | |||||
value = stack.pop() | |||||
if value: | |||||
coder.seek(coder.tell() + offset) | |||||
elif opcode == 18: # iffalse | |||||
offset = s24() | |||||
value = stack.pop() | |||||
if not value: | |||||
coder.seek(coder.tell() + offset) | |||||
elif opcode == 36: # pushbyte | |||||
v = _read_byte(coder) | |||||
stack.append(v) | |||||
elif opcode == 42: # dup | |||||
value = stack[-1] | |||||
stack.append(value) | |||||
elif opcode == 44: # pushstring | |||||
idx = u30() | |||||
stack.append(self.constant_strings[idx]) | |||||
elif opcode == 48: # pushscope | |||||
new_scope = stack.pop() | |||||
scopes.append(new_scope) | |||||
elif opcode == 66: # construct | |||||
arg_count = u30() | |||||
args = list(reversed( | |||||
[stack.pop() for _ in range(arg_count)])) | |||||
obj = stack.pop() | |||||
res = obj.avm_class.make_object() | |||||
stack.append(res) | |||||
elif opcode == 70: # callproperty | |||||
index = u30() | |||||
mname = self.multinames[index] | |||||
arg_count = u30() | |||||
args = list(reversed( | |||||
[stack.pop() for _ in range(arg_count)])) | |||||
obj = stack.pop() | |||||
if isinstance(obj, _AVMClass_Object): | |||||
func = self.extract_function(obj.avm_class, mname) | |||||
res = func(args) | |||||
stack.append(res) | |||||
continue | |||||
elif isinstance(obj, _ScopeDict): | |||||
if mname in obj.avm_class.method_names: | |||||
func = self.extract_function(obj.avm_class, mname) | |||||
res = func(args) | |||||
else: | |||||
res = obj[mname] | |||||
stack.append(res) | |||||
continue | |||||
elif isinstance(obj, compat_str): | |||||
if mname == 'split': | |||||
assert len(args) == 1 | |||||
assert isinstance(args[0], compat_str) | |||||
if args[0] == '': | |||||
res = list(obj) | |||||
else: | |||||
res = obj.split(args[0]) | |||||
stack.append(res) | |||||
continue | |||||
elif isinstance(obj, list): | |||||
if mname == 'slice': | |||||
assert len(args) == 1 | |||||
assert isinstance(args[0], int) | |||||
res = obj[args[0]:] | |||||
stack.append(res) | |||||
continue | |||||
elif mname == 'join': | |||||
assert len(args) == 1 | |||||
assert isinstance(args[0], compat_str) | |||||
res = args[0].join(obj) | |||||
stack.append(res) | |||||
continue | |||||
raise NotImplementedError( | |||||
'Unsupported property %r on %r' | |||||
% (mname, obj)) | |||||
elif opcode == 72: # returnvalue | |||||
res = stack.pop() | |||||
return res | |||||
elif opcode == 74: # constructproperty | |||||
index = u30() | |||||
arg_count = u30() | |||||
args = list(reversed( | |||||
[stack.pop() for _ in range(arg_count)])) | |||||
obj = stack.pop() | |||||
mname = self.multinames[index] | |||||
assert isinstance(obj, _AVMClass) | |||||
# We do not actually call the constructor for now; | |||||
# we just pretend it does nothing | |||||
stack.append(obj.make_object()) | |||||
elif opcode == 79: # callpropvoid | |||||
index = u30() | |||||
mname = self.multinames[index] | |||||
arg_count = u30() | |||||
args = list(reversed( | |||||
[stack.pop() for _ in range(arg_count)])) | |||||
obj = stack.pop() | |||||
if mname == 'reverse': | |||||
assert isinstance(obj, list) | |||||
obj.reverse() | |||||
else: | |||||
raise NotImplementedError( | |||||
'Unsupported (void) property %r on %r' | |||||
% (mname, obj)) | |||||
elif opcode == 86: # newarray | |||||
arg_count = u30() | |||||
arr = [] | |||||
for i in range(arg_count): | |||||
arr.append(stack.pop()) | |||||
arr = arr[::-1] | |||||
stack.append(arr) | |||||
elif opcode == 93: # findpropstrict | |||||
index = u30() | |||||
mname = self.multinames[index] | |||||
for s in reversed(scopes): | |||||
if mname in s: | |||||
res = s | |||||
break | |||||
else: | |||||
res = scopes[0] | |||||
stack.append(res[mname]) | |||||
elif opcode == 94: # findproperty | |||||
index = u30() | |||||
mname = self.multinames[index] | |||||
for s in reversed(scopes): | |||||
if mname in s: | |||||
res = s | |||||
break | |||||
else: | |||||
res = avm_class.variables | |||||
stack.append(res) | |||||
elif opcode == 96: # getlex | |||||
index = u30() | |||||
mname = self.multinames[index] | |||||
for s in reversed(scopes): | |||||
if mname in s: | |||||
scope = s | |||||
break | |||||
else: | |||||
scope = avm_class.variables | |||||
# I cannot find where static variables are initialized | |||||
# so let's just return None | |||||
res = scope.get(mname) | |||||
stack.append(res) | |||||
elif opcode == 97: # setproperty | |||||
index = u30() | |||||
value = stack.pop() | |||||
idx = self.multinames[index] | |||||
if isinstance(idx, _Multiname): | |||||
idx = stack.pop() | |||||
obj = stack.pop() | |||||
obj[idx] = value | |||||
elif opcode == 98: # getlocal | |||||
index = u30() | |||||
stack.append(registers[index]) | |||||
elif opcode == 99: # setlocal | |||||
index = u30() | |||||
value = stack.pop() | |||||
registers[index] = value | |||||
elif opcode == 102: # getproperty | |||||
index = u30() | |||||
pname = self.multinames[index] | |||||
if pname == 'length': | |||||
obj = stack.pop() | |||||
assert isinstance(obj, list) | |||||
stack.append(len(obj)) | |||||
else: # Assume attribute access | |||||
idx = stack.pop() | |||||
assert isinstance(idx, int) | |||||
obj = stack.pop() | |||||
assert isinstance(obj, list) | |||||
stack.append(obj[idx]) | |||||
elif opcode == 115: # convert_ | |||||
value = stack.pop() | |||||
intvalue = int(value) | |||||
stack.append(intvalue) | |||||
elif opcode == 128: # coerce | |||||
u30() | |||||
elif opcode == 133: # coerce_s | |||||
assert isinstance(stack[-1], (type(None), compat_str)) | |||||
elif opcode == 160: # add | |||||
value2 = stack.pop() | |||||
value1 = stack.pop() | |||||
res = value1 + value2 | |||||
stack.append(res) | |||||
elif opcode == 161: # subtract | |||||
value2 = stack.pop() | |||||
value1 = stack.pop() | |||||
res = value1 - value2 | |||||
stack.append(res) | |||||
elif opcode == 164: # modulo | |||||
value2 = stack.pop() | |||||
value1 = stack.pop() | |||||
res = value1 % value2 | |||||
stack.append(res) | |||||
elif opcode == 175: # greaterequals | |||||
value2 = stack.pop() | |||||
value1 = stack.pop() | |||||
result = value1 >= value2 | |||||
stack.append(result) | |||||
elif opcode == 208: # getlocal_0 | |||||
stack.append(registers[0]) | |||||
elif opcode == 209: # getlocal_1 | |||||
stack.append(registers[1]) | |||||
elif opcode == 210: # getlocal_2 | |||||
stack.append(registers[2]) | |||||
elif opcode == 211: # getlocal_3 | |||||
stack.append(registers[3]) | |||||
elif opcode == 212: # setlocal_0 | |||||
registers[0] = stack.pop() | |||||
elif opcode == 213: # setlocal_1 | |||||
registers[1] = stack.pop() | |||||
elif opcode == 214: # setlocal_2 | |||||
registers[2] = stack.pop() | |||||
elif opcode == 215: # setlocal_3 | |||||
registers[3] = stack.pop() | |||||
else: | |||||
raise NotImplementedError( | |||||
'Unsupported opcode %d' % opcode) | |||||
avm_class.method_pyfunctions[func_name] = resfunc | |||||
return resfunc | |||||
@ -1,2 +1,2 @@ | |||||
__version__ = '2014.07.15' | |||||
__version__ = '2014.07.22' |