@ -0,0 +1,202 @@ | |||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text'] | |||||
import base64 | |||||
from math import ceil | |||||
from .utils import bytes_to_intlist, intlist_to_bytes | |||||
BLOCK_SIZE_BYTES = 16 | |||||
def aes_ctr_decrypt(data, key, counter): | |||||
""" | |||||
Decrypt with aes in counter mode | |||||
@param {int[]} data cipher | |||||
@param {int[]} key 16/24/32-Byte cipher key | |||||
@param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block) | |||||
returns the next counter block | |||||
@returns {int[]} decrypted data | |||||
""" | |||||
expanded_key = key_expansion(key) | |||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) | |||||
decrypted_data=[] | |||||
for i in range(block_count): | |||||
counter_block = counter.next_value() | |||||
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES] | |||||
block += [0]*(BLOCK_SIZE_BYTES - len(block)) | |||||
cipher_counter_block = aes_encrypt(counter_block, expanded_key) | |||||
decrypted_data += xor(block, cipher_counter_block) | |||||
decrypted_data = decrypted_data[:len(data)] | |||||
return decrypted_data | |||||
def key_expansion(data): | |||||
""" | |||||
Generate key schedule | |||||
@param {int[]} data 16/24/32-Byte cipher key | |||||
@returns {int[]} 176/208/240-Byte expanded key | |||||
""" | |||||
data = data[:] # copy | |||||
rcon_iteration = 1 | |||||
key_size_bytes = len(data) | |||||
expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES | |||||
while len(data) < expanded_key_size_bytes: | |||||
temp = data[-4:] | |||||
temp = key_schedule_core(temp, rcon_iteration) | |||||
rcon_iteration += 1 | |||||
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) | |||||
for _ in range(3): | |||||
temp = data[-4:] | |||||
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) | |||||
if key_size_bytes == 32: | |||||
temp = data[-4:] | |||||
temp = sub_bytes(temp) | |||||
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) | |||||
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): | |||||
temp = data[-4:] | |||||
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) | |||||
data = data[:expanded_key_size_bytes] | |||||
return data | |||||
def aes_encrypt(data, expanded_key): | |||||
""" | |||||
Encrypt one block with aes | |||||
@param {int[]} data 16-Byte state | |||||
@param {int[]} expanded_key 176/208/240-Byte expanded key | |||||
@returns {int[]} 16-Byte cipher | |||||
""" | |||||
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 | |||||
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) | |||||
for i in range(1, rounds+1): | |||||
data = sub_bytes(data) | |||||
data = shift_rows(data) | |||||
if i != rounds: | |||||
data = mix_columns(data) | |||||
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) | |||||
return data | |||||
def aes_decrypt_text(data, password, key_size_bytes): | |||||
""" | |||||
Decrypt text | |||||
- The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter | |||||
- The cipher key is retrieved by encrypting the first 16 Byte of 'password' | |||||
with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) | |||||
- Mode of operation is 'counter' | |||||
@param {str} data Base64 encoded string | |||||
@param {str,unicode} password Password (will be encoded with utf-8) | |||||
@param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit | |||||
@returns {str} Decrypted data | |||||
""" | |||||
NONCE_LENGTH_BYTES = 8 | |||||
data = bytes_to_intlist(base64.b64decode(data)) | |||||
password = bytes_to_intlist(password.encode('utf-8')) | |||||
key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password)) | |||||
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) | |||||
nonce = data[:NONCE_LENGTH_BYTES] | |||||
cipher = data[NONCE_LENGTH_BYTES:] | |||||
class Counter: | |||||
__value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) | |||||
def next_value(self): | |||||
temp = self.__value | |||||
self.__value = inc(self.__value) | |||||
return temp | |||||
decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) | |||||
plaintext = intlist_to_bytes(decrypted_data) | |||||
return plaintext | |||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) | |||||
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, | |||||
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, | |||||
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, | |||||
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, | |||||
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, | |||||
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, | |||||
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, | |||||
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, | |||||
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, | |||||
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, | |||||
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, | |||||
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, | |||||
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, | |||||
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, | |||||
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, | |||||
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) | |||||
MIX_COLUMN_MATRIX = ((2,3,1,1), | |||||
(1,2,3,1), | |||||
(1,1,2,3), | |||||
(3,1,1,2)) | |||||
def sub_bytes(data): | |||||
return [SBOX[x] for x in data] | |||||
def rotate(data): | |||||
return data[1:] + [data[0]] | |||||
def key_schedule_core(data, rcon_iteration): | |||||
data = rotate(data) | |||||
data = sub_bytes(data) | |||||
data[0] = data[0] ^ RCON[rcon_iteration] | |||||
return data | |||||
def xor(data1, data2): | |||||
return [x^y for x, y in zip(data1, data2)] | |||||
def mix_column(data): | |||||
data_mixed = [] | |||||
for row in range(4): | |||||
mixed = 0 | |||||
for column in range(4): | |||||
addend = data[column] | |||||
if MIX_COLUMN_MATRIX[row][column] in (2,3): | |||||
addend <<= 1 | |||||
if addend > 0xff: | |||||
addend &= 0xff | |||||
addend ^= 0x1b | |||||
if MIX_COLUMN_MATRIX[row][column] == 3: | |||||
addend ^= data[column] | |||||
mixed ^= addend & 0xff | |||||
data_mixed.append(mixed) | |||||
return data_mixed | |||||
def mix_columns(data): | |||||
data_mixed = [] | |||||
for i in range(4): | |||||
column = data[i*4 : (i+1)*4] | |||||
data_mixed += mix_column(column) | |||||
return data_mixed | |||||
def shift_rows(data): | |||||
data_shifted = [] | |||||
for column in range(4): | |||||
for row in range(4): | |||||
data_shifted.append( data[((column + row) & 0b11) * 4 + row] ) | |||||
return data_shifted | |||||
def inc(data): | |||||
data = data[:] # copy | |||||
for i in range(len(data)-1,-1,-1): | |||||
if data[i] == 255: | |||||
data[i] = 0 | |||||
else: | |||||
data[i] = data[i] + 1 | |||||
break | |||||
return data |
@ -0,0 +1,75 @@ | |||||
import re | |||||
from .common import InfoExtractor | |||||
from ..utils import ( | |||||
compat_HTTPError, | |||||
compat_str, | |||||
compat_urllib_parse, | |||||
compat_urllib_parse_urlparse, | |||||
ExtractorError, | |||||
) | |||||
class AddAnimeIE(InfoExtractor): | |||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)' | |||||
IE_NAME = u'AddAnime' | |||||
_TEST = { | |||||
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', | |||||
u'file': u'24MR3YO5SAS9.flv', | |||||
u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1', | |||||
u'info_dict': { | |||||
u"description": u"One Piece 606", | |||||
u"title": u"One Piece 606" | |||||
} | |||||
} | |||||
def _real_extract(self, url): | |||||
try: | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('video_id') | |||||
webpage = self._download_webpage(url, video_id) | |||||
except ExtractorError as ee: | |||||
if not isinstance(ee.cause, compat_HTTPError): | |||||
raise | |||||
redir_webpage = ee.cause.read().decode('utf-8') | |||||
action = self._search_regex( | |||||
r'<form id="challenge-form" action="([^"]+)"', | |||||
redir_webpage, u'Redirect form') | |||||
vc = self._search_regex( | |||||
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>', | |||||
redir_webpage, u'redirect vc value') | |||||
av = re.search( | |||||
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', | |||||
redir_webpage) | |||||
if av is None: | |||||
raise ExtractorError(u'Cannot find redirect math task') | |||||
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3)) | |||||
parsed_url = compat_urllib_parse_urlparse(url) | |||||
av_val = av_res + len(parsed_url.netloc) | |||||
confirm_url = ( | |||||
parsed_url.scheme + u'://' + parsed_url.netloc + | |||||
action + '?' + | |||||
compat_urllib_parse.urlencode({ | |||||
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) | |||||
self._download_webpage( | |||||
confirm_url, video_id, | |||||
note=u'Confirming after redirect') | |||||
webpage = self._download_webpage(url, video_id) | |||||
video_url = self._search_regex(r"var normal_video_file = '(.*?)';", | |||||
webpage, u'video file URL') | |||||
video_title = self._og_search_title(webpage) | |||||
video_description = self._og_search_description(webpage) | |||||
return { | |||||
'_type': 'video', | |||||
'id': video_id, | |||||
'url': video_url, | |||||
'ext': 'flv', | |||||
'title': video_title, | |||||
'description': video_description | |||||
} |
@ -0,0 +1,166 @@ | |||||
import re | |||||
import xml.etree.ElementTree | |||||
from .common import InfoExtractor | |||||
from ..utils import ( | |||||
determine_ext, | |||||
) | |||||
class AppleTrailersIE(InfoExtractor): | |||||
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' | |||||
_TEST = { | |||||
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", | |||||
u"playlist": [ | |||||
{ | |||||
u"file": u"manofsteel-trailer4.mov", | |||||
u"md5": u"11874af099d480cc09e103b189805d5f", | |||||
u"info_dict": { | |||||
u"duration": 111, | |||||
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg", | |||||
u"title": u"Trailer 4", | |||||
u"upload_date": u"20130523", | |||||
u"uploader_id": u"wb", | |||||
}, | |||||
}, | |||||
{ | |||||
u"file": u"manofsteel-trailer3.mov", | |||||
u"md5": u"07a0a262aae5afe68120eed61137ab34", | |||||
u"info_dict": { | |||||
u"duration": 182, | |||||
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg", | |||||
u"title": u"Trailer 3", | |||||
u"upload_date": u"20130417", | |||||
u"uploader_id": u"wb", | |||||
}, | |||||
}, | |||||
{ | |||||
u"file": u"manofsteel-trailer.mov", | |||||
u"md5": u"e401fde0813008e3307e54b6f384cff1", | |||||
u"info_dict": { | |||||
u"duration": 148, | |||||
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg", | |||||
u"title": u"Trailer", | |||||
u"upload_date": u"20121212", | |||||
u"uploader_id": u"wb", | |||||
}, | |||||
}, | |||||
{ | |||||
u"file": u"manofsteel-teaser.mov", | |||||
u"md5": u"76b392f2ae9e7c98b22913c10a639c97", | |||||
u"info_dict": { | |||||
u"duration": 93, | |||||
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg", | |||||
u"title": u"Teaser", | |||||
u"upload_date": u"20120721", | |||||
u"uploader_id": u"wb", | |||||
}, | |||||
} | |||||
] | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
movie = mobj.group('movie') | |||||
uploader_id = mobj.group('company') | |||||
playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc' | |||||
playlist_snippet = self._download_webpage(playlist_url, movie) | |||||
playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet) | |||||
playlist_html = u'<html>' + playlist_cleaned + u'</html>' | |||||
size_cache = {} | |||||
doc = xml.etree.ElementTree.fromstring(playlist_html) | |||||
playlist = [] | |||||
for li in doc.findall('./div/ul/li'): | |||||
title = li.find('.//h3').text | |||||
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() | |||||
thumbnail = li.find('.//img').attrib['src'] | |||||
date_el = li.find('.//p') | |||||
upload_date = None | |||||
m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text) | |||||
if m: | |||||
upload_date = u'20' + m.group('year') + m.group('month') + m.group('day') | |||||
runtime_el = date_el.find('./br') | |||||
m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail) | |||||
duration = None | |||||
if m: | |||||
duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | |||||
formats = [] | |||||
for formats_el in li.findall('.//a'): | |||||
if formats_el.attrib['class'] != 'OverlayPanel': | |||||
continue | |||||
target = formats_el.attrib['target'] | |||||
format_code = formats_el.text | |||||
if 'Automatic' in format_code: | |||||
continue | |||||
size_q = formats_el.attrib['href'] | |||||
size_id = size_q.rpartition('#videos-')[2] | |||||
if size_id not in size_cache: | |||||
size_url = url + size_q | |||||
sizepage_html = self._download_webpage( | |||||
size_url, movie, | |||||
note=u'Downloading size info %s' % size_id, | |||||
errnote=u'Error while downloading size info %s' % size_id, | |||||
) | |||||
_doc = xml.etree.ElementTree.fromstring(sizepage_html) | |||||
size_cache[size_id] = _doc | |||||
sizepage_doc = size_cache[size_id] | |||||
links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a') | |||||
for vid_a in links: | |||||
href = vid_a.get('href') | |||||
if not href.endswith(target): | |||||
continue | |||||
detail_q = href.partition('#')[0] | |||||
detail_url = url + '/' + detail_q | |||||
m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q) | |||||
detail_id = m.group('detail_id') | |||||
detail_html = self._download_webpage( | |||||
detail_url, movie, | |||||
note=u'Downloading detail %s %s' % (detail_id, size_id), | |||||
errnote=u'Error while downloading detail %s %s' % (detail_id, size_id) | |||||
) | |||||
detail_doc = xml.etree.ElementTree.fromstring(detail_html) | |||||
movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a') | |||||
assert movie_link_el.get('class') == 'movieLink' | |||||
movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h') | |||||
ext = determine_ext(movie_link) | |||||
assert ext == 'mov' | |||||
formats.append({ | |||||
'format': format_code, | |||||
'ext': ext, | |||||
'url': movie_link, | |||||
}) | |||||
info = { | |||||
'_type': 'video', | |||||
'id': video_id, | |||||
'title': title, | |||||
'formats': formats, | |||||
'title': title, | |||||
'duration': duration, | |||||
'thumbnail': thumbnail, | |||||
'upload_date': upload_date, | |||||
'uploader_id': uploader_id, | |||||
'user_agent': 'QuickTime compatible (youtube-dl)', | |||||
} | |||||
# TODO: Remove when #980 has been merged | |||||
info['url'] = formats[-1]['url'] | |||||
info['ext'] = formats[-1]['ext'] | |||||
playlist.append(info) | |||||
return { | |||||
'_type': 'playlist', | |||||
'id': movie, | |||||
'entries': playlist, | |||||
} |
@ -0,0 +1,35 @@ | |||||
# coding: utf-8 | |||||
import re | |||||
from .common import InfoExtractor | |||||
class Canalc2IE(InfoExtractor): | |||||
_IE_NAME = 'canalc2.tv' | |||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui' | |||||
_TEST = { | |||||
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', | |||||
u'file': u'12163.mp4', | |||||
u'md5': u'060158428b650f896c542dfbb3d6487f', | |||||
u'info_dict': { | |||||
u'title': u'Terrasses du Numérique' | |||||
} | |||||
} | |||||
def _real_extract(self, url): | |||||
video_id = re.match(self._VALID_URL, url).group(1) | |||||
webpage = self._download_webpage(url, video_id) | |||||
file_name = self._search_regex( | |||||
r"so\.addVariable\('file','(.*?)'\);", | |||||
webpage, 'file name') | |||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name | |||||
title = self._html_search_regex( | |||||
r'class="evenement8">(.*?)</a>', webpage, u'title') | |||||
return {'id': video_id, | |||||
'ext': 'mp4', | |||||
'url': video_url, | |||||
'title': title, | |||||
} |
@ -0,0 +1,58 @@ | |||||
import re | |||||
import xml.etree.ElementTree | |||||
from .common import InfoExtractor | |||||
from ..utils import determine_ext | |||||
class CNNIE(InfoExtractor): | |||||
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/ | |||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' | |||||
_TESTS = [{ | |||||
u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', | |||||
u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4', | |||||
u'md5': u'3e6121ea48df7e2259fe73a0628605c4', | |||||
u'info_dict': { | |||||
u'title': u'Nadal wins 8th French Open title', | |||||
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', | |||||
}, | |||||
}, | |||||
{ | |||||
u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", | |||||
u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4", | |||||
u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e", | |||||
u"info_dict": { | |||||
u"title": "Student's epic speech stuns new freshmen", | |||||
u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"" | |||||
} | |||||
}] | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
path = mobj.group('path') | |||||
page_title = mobj.group('title') | |||||
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path | |||||
info_xml = self._download_webpage(info_url, page_title) | |||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | |||||
formats = [] | |||||
for f in info.findall('files/file'): | |||||
mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate']) | |||||
if mf is not None: | |||||
formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text)) | |||||
formats = sorted(formats) | |||||
(_,_,_, video_path) = formats[-1] | |||||
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path | |||||
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) | |||||
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] | |||||
return {'id': info.attrib['id'], | |||||
'title': info.find('headline').text, | |||||
'url': video_url, | |||||
'ext': determine_ext(video_url), | |||||
'thumbnail': thumbnails[-1][1], | |||||
'thumbnails': thumbs_dict, | |||||
'description': info.find('description').text, | |||||
} |
@ -0,0 +1,76 @@ | |||||
import re | |||||
import json | |||||
from .common import InfoExtractor | |||||
from ..utils import ( | |||||
clean_html, | |||||
get_element_by_id, | |||||
) | |||||
class TechTVMITIE(InfoExtractor): | |||||
IE_NAME = u'techtv.mit.edu' | |||||
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' | |||||
_TEST = { | |||||
u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', | |||||
u'file': u'25418.mp4', | |||||
u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f', | |||||
u'info_dict': { | |||||
u'title': u'MIT DNA Learning Center Set', | |||||
u'description': u'md5:82313335e8a8a3f243351ba55bc1b474', | |||||
}, | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
webpage = self._download_webpage( | |||||
'http://techtv.mit.edu/videos/%s' % video_id, video_id) | |||||
embed_page = self._download_webpage( | |||||
'http://techtv.mit.edu/embeds/%s/' % video_id, video_id, | |||||
note=u'Downloading embed page') | |||||
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', | |||||
embed_page, u'base url') | |||||
formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page, | |||||
u'video formats') | |||||
formats = json.loads(formats_json) | |||||
formats = sorted(formats, key=lambda f: f['bitrate']) | |||||
title = get_element_by_id('edit-title', webpage) | |||||
description = clean_html(get_element_by_id('edit-description', webpage)) | |||||
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', | |||||
embed_page, u'thumbnail', flags=re.DOTALL) | |||||
return {'id': video_id, | |||||
'title': title, | |||||
'url': base_url + formats[-1]['url'].replace('mp4:', ''), | |||||
'ext': 'mp4', | |||||
'description': description, | |||||
'thumbnail': thumbnail, | |||||
} | |||||
class MITIE(TechTVMITIE): | |||||
IE_NAME = u'video.mit.edu' | |||||
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' | |||||
_TEST = { | |||||
u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', | |||||
u'file': u'21783.mp4', | |||||
u'md5': u'7db01d5ccc1895fc5010e9c9e13648da', | |||||
u'info_dict': { | |||||
u'title': u'The Government is Profiling You', | |||||
u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd', | |||||
}, | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
page_title = mobj.group('title') | |||||
webpage = self._download_webpage(url, page_title) | |||||
self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME)) | |||||
embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage, | |||||
u'embed url') | |||||
return self.url_result(embed_url, ie='TechTVMIT') |
@ -0,0 +1,33 @@ | |||||
import re | |||||
import xml.etree.ElementTree | |||||
from .common import InfoExtractor | |||||
from ..utils import find_xpath_attr, compat_str | |||||
class NBCNewsIE(InfoExtractor): | |||||
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' | |||||
_TEST = { | |||||
u'url': u'http://www.nbcnews.com/video/nbc-news/52753292', | |||||
u'file': u'52753292.flv', | |||||
u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179', | |||||
u'info_dict': { | |||||
u'title': u'Crew emerges after four-month Mars food study', | |||||
u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1', | |||||
}, | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) | |||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video') | |||||
return {'id': video_id, | |||||
'title': info.find('headline').text, | |||||
'ext': 'flv', | |||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, | |||||
'description': compat_str(info.find('caption').text), | |||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | |||||
} |
@ -0,0 +1,90 @@ | |||||
# encoding: utf-8 | |||||
import json | |||||
import re | |||||
from .common import InfoExtractor | |||||
from ..utils import ExtractorError | |||||
class SohuIE(InfoExtractor): | |||||
_VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?' | |||||
_TEST = { | |||||
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super', | |||||
u'file': u'382479172.mp4', | |||||
u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7', | |||||
u'info_dict': { | |||||
u'title': u'MV:Far East Movement《The Illest》', | |||||
}, | |||||
} | |||||
def _real_extract(self, url): | |||||
def _fetch_data(vid_id): | |||||
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid=' | |||||
data_url = base_data_url + str(vid_id) | |||||
data_json = self._download_webpage( | |||||
data_url, video_id, | |||||
note=u'Downloading JSON data for ' + str(vid_id)) | |||||
return json.loads(data_json) | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('id') | |||||
webpage = self._download_webpage(url, video_id) | |||||
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>', | |||||
webpage, u'video title') | |||||
title = raw_title.partition('-')[0].strip() | |||||
vid = self._html_search_regex(r'var vid="(\d+)"', webpage, | |||||
u'video path') | |||||
data = _fetch_data(vid) | |||||
QUALITIES = ('ori', 'super', 'high', 'nor') | |||||
vid_ids = [data['data'][q + 'Vid'] | |||||
for q in QUALITIES | |||||
if data['data'][q + 'Vid'] != 0] | |||||
if not vid_ids: | |||||
raise ExtractorError(u'No formats available for this video') | |||||
# For now, we just pick the highest available quality | |||||
vid_id = vid_ids[-1] | |||||
format_data = data if vid == vid_id else _fetch_data(vid_id) | |||||
part_count = format_data['data']['totalBlocks'] | |||||
allot = format_data['allot'] | |||||
prot = format_data['prot'] | |||||
clipsURL = format_data['data']['clipsURL'] | |||||
su = format_data['data']['su'] | |||||
playlist = [] | |||||
for i in range(part_count): | |||||
part_url = ('http://%s/?prot=%s&file=%s&new=%s' % | |||||
(allot, prot, clipsURL[i], su[i])) | |||||
part_str = self._download_webpage( | |||||
part_url, video_id, | |||||
note=u'Downloading part %d of %d' % (i+1, part_count)) | |||||
part_info = part_str.split('|') | |||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) | |||||
video_info = { | |||||
'id': '%s_part%02d' % (video_id, i + 1), | |||||
'title': title, | |||||
'url': video_url, | |||||
'ext': 'mp4', | |||||
} | |||||
playlist.append(video_info) | |||||
if len(playlist) == 1: | |||||
info = playlist[0] | |||||
info['id'] = video_id | |||||
else: | |||||
info = { | |||||
'_type': 'playlist', | |||||
'entries': playlist, | |||||
'id': video_id, | |||||
} | |||||
return info |
@ -0,0 +1,73 @@ | |||||
import json | |||||
import re | |||||
import xml.etree.ElementTree | |||||
from .common import InfoExtractor | |||||
class TriluliluIE(InfoExtractor): | |||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)' | |||||
_TEST = { | |||||
u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1", | |||||
u'file': u"big-buck-bunny-1.mp4", | |||||
u'info_dict': { | |||||
u"title": u"Big Buck Bunny", | |||||
u"description": u":) pentru copilul din noi", | |||||
}, | |||||
# Server ignores Range headers (--test) | |||||
u"params": { | |||||
u"skip_download": True | |||||
} | |||||
} | |||||
def _real_extract(self, url): | |||||
mobj = re.match(self._VALID_URL, url) | |||||
video_id = mobj.group('video_id') | |||||
webpage = self._download_webpage(url, video_id) | |||||
title = self._og_search_title(webpage) | |||||
thumbnail = self._og_search_thumbnail(webpage) | |||||
description = self._og_search_description(webpage) | |||||
log_str = self._search_regex( | |||||
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info') | |||||
log = json.loads(log_str) | |||||
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/' | |||||
u'video-formats2' % log) | |||||
format_str = self._download_webpage( | |||||
format_url, video_id, | |||||
note=u'Downloading formats', | |||||
errnote=u'Error while downloading formats') | |||||
format_doc = xml.etree.ElementTree.fromstring(format_str) | |||||
video_url_template = ( | |||||
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video' | |||||
u'&source=site&hash=%(hash)s&username=%(userid)s&' | |||||
u'key=ministhebest&format=%%s&sig=&exp=' % | |||||
log) | |||||
formats = [ | |||||
{ | |||||
'format': fnode.text, | |||||
'url': video_url_template % fnode.text, | |||||
} | |||||
for fnode in format_doc.findall('./formats/format') | |||||
] | |||||
info = { | |||||
'_type': 'video', | |||||
'id': video_id, | |||||
'formats': formats, | |||||
'title': title, | |||||
'description': description, | |||||
'thumbnail': thumbnail, | |||||
} | |||||
# TODO: Remove when #980 has been merged | |||||
info['url'] = formats[-1]['url'] | |||||
info['ext'] = formats[-1]['format'].partition('-')[0] | |||||
return info |
@ -1,2 +1,2 @@ | |||||
__version__ = '2013.08.23' | |||||
__version__ = '2013.08.28' |