Browse Source

Merge remote-tracking branch 'upstream/master'

rtmp_test
Pierre Rudloff 11 years ago
parent
commit
847f582290
24 changed files with 557 additions and 54 deletions
  1. +5
    -1
      devscripts/bash-completion.in
  2. +4
    -4
      devscripts/youtube_genalgo.py
  3. +3
    -4
      test/test_download.py
  4. +2
    -2
      youtube_dl/PostProcessor.py
  5. +15
    -3
      youtube_dl/YoutubeDL.py
  6. +15
    -4
      youtube_dl/__init__.py
  7. +202
    -0
      youtube_dl/aes.py
  8. +3
    -0
      youtube_dl/extractor/__init__.py
  9. +0
    -1
      youtube_dl/extractor/addanime.py
  10. +0
    -1
      youtube_dl/extractor/appletrailers.py
  11. +12
    -2
      youtube_dl/extractor/common.py
  12. +2
    -10
      youtube_dl/extractor/generic.py
  13. +6
    -2
      youtube_dl/extractor/ign.py
  14. +4
    -2
      youtube_dl/extractor/kankan.py
  15. +74
    -0
      youtube_dl/extractor/mit.py
  16. +67
    -0
      youtube_dl/extractor/orf.py
  17. +90
    -0
      youtube_dl/extractor/sohu.py
  18. +0
    -3
      youtube_dl/extractor/trilulilu.py
  19. +1
    -1
      youtube_dl/extractor/unistra.py
  20. +0
    -1
      youtube_dl/extractor/wat.py
  21. +14
    -4
      youtube_dl/extractor/youporn.py
  22. +4
    -4
      youtube_dl/extractor/youtube.py
  23. +33
    -4
      youtube_dl/utils.py
  24. +1
    -1
      youtube_dl/version.py

+ 5
- 1
devscripts/bash-completion.in View File

@ -4,8 +4,12 @@ __youtube-dl()
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
opts="{{flags}}" opts="{{flags}}"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
if [[ ${cur} == * ]] ; then
if [[ ${cur} =~ : ]]; then
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
return 0
elif [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
return 0 return 0
fi fi


+ 4
- 4
devscripts/youtube_genalgo.py View File

@ -20,15 +20,15 @@ tests = [
# 87 # 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
# 86 - vflh9ybst 2013/08/23
# 86 - vflHOr_nV 2013/08/30
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
"?;}|[{=+._)(*&^%$#@!MNBqCXZASDFGHJKLPOIUYTREWQ<987654321mnbvcxzasdfghjklpoiuytrew"),
# 85 # 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
# 84 - vflh9ybst 2013/08/23 (sporadic)
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
# 83 # 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),


+ 3
- 4
test/test_download.py View File

@ -127,12 +127,11 @@ def generator(test_case):
info_dict = json.load(infof) info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items(): for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'): if isinstance(expected, compat_str) and expected.startswith('md5:'):
self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
got = 'md5:' + md5(info_dict.get(info_field))
else: else:
got = info_dict.get(info_field) got = info_dict.get(info_field)
self.assertEqual(
expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict # If checkable fields are missing from the test case, print the info_dict
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))


+ 2
- 2
youtube_dl/PostProcessor.py View File

@ -137,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try: try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err: except FFmpegPostProcessorError as err:
raise AudioConversionError(err.message)
raise AudioConversionError(err.msg)
def run(self, information): def run(self, information):
path = information['filepath'] path = information['filepath']
@ -207,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
except: except:
etype,e,tb = sys.exc_info() etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError): if isinstance(e, AudioConversionError):
msg = u'audio conversion failed: ' + e.message
msg = u'audio conversion failed: ' + e.msg
else: else:
msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
raise PostProcessingError(msg) raise PostProcessingError(msg)


+ 15
- 3
youtube_dl/YoutubeDL.py View File

@ -97,6 +97,7 @@ class YoutubeDL(object):
def __init__(self, params): def __init__(self, params):
"""Create a FileDownloader object with the given options.""" """Create a FileDownloader object with the given options."""
self._ies = [] self._ies = []
self._ies_instances = {}
self._pps = [] self._pps = []
self._progress_hooks = [] self._progress_hooks = []
self._download_retcode = 0 self._download_retcode = 0
@ -111,8 +112,21 @@ class YoutubeDL(object):
def add_info_extractor(self, ie): def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list.""" """Add an InfoExtractor object to the end of the list."""
self._ies.append(ie) self._ies.append(ie)
self._ies_instances[ie.ie_key()] = ie
ie.set_downloader(self) ie.set_downloader(self)
def get_info_extractor(self, ie_key):
"""
Get an instance of an IE with name ie_key, it will try to get one from
the _ies list, if there's no instance it will create a new one and add
it to the extractor list.
"""
ie = self._ies_instances.get(ie_key)
if ie is None:
ie = get_info_extractor(ie_key)()
self.add_info_extractor(ie)
return ie
def add_default_info_extractors(self): def add_default_info_extractors(self):
""" """
Add the InfoExtractors returned by gen_extractors to the end of the list Add the InfoExtractors returned by gen_extractors to the end of the list
@ -294,9 +308,7 @@ class YoutubeDL(object):
''' '''
if ie_key: if ie_key:
ie = get_info_extractor(ie_key)()
ie.set_downloader(self)
ies = [ie]
ies = [self.get_info_extractor(ie_key)]
else: else:
ies = self._ies ies = self._ies


+ 15
- 4
youtube_dl/__init__.py View File

@ -45,6 +45,7 @@ import sys
import warnings import warnings
import platform import platform
from .utils import * from .utils import *
from .update import update_self from .update import update_self
from .version import __version__ from .version import __version__
@ -99,6 +100,16 @@ def parseOpts(overrideArguments=None):
pass pass
return None return None
def _hide_login_info(opts):
opts = list(opts)
for private_opt in ['-p', '--password', '-u', '--username']:
try:
i = opts.index(private_opt)
opts[i+1] = '<PRIVATE>'
except ValueError:
pass
return opts
max_width = 80 max_width = 80
max_help_position = 80 max_help_position = 80
@ -357,9 +368,9 @@ def parseOpts(overrideArguments=None):
argv = systemConf + userConf + commandLineConf argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv) opts, args = parser.parse_args(argv)
if opts.verbose: if opts.verbose:
sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
return parser, opts, args return parser, opts, args
@ -611,7 +622,7 @@ def _real_main(argv=None):
sys.exc_clear() sys.exc_clear()
except: except:
pass pass
sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
ydl.add_default_info_extractors() ydl.add_default_info_extractors()


+ 202
- 0
youtube_dl/aes.py View File

@ -0,0 +1,202 @@
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
from .utils import bytes_to_intlist, intlist_to_bytes
BLOCK_SIZE_BYTES = 16
def aes_ctr_decrypt(data, key, counter):
"""
Decrypt with aes in counter mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
returns the next counter block
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
decrypted_data=[]
for i in range(block_count):
counter_block = counter.next_value()
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
block += [0]*(BLOCK_SIZE_BYTES - len(block))
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
decrypted_data += xor(block, cipher_counter_block)
decrypted_data = decrypted_data[:len(data)]
return decrypted_data
def key_expansion(data):
"""
Generate key schedule
@param {int[]} data 16/24/32-Byte cipher key
@returns {int[]} 176/208/240-Byte expanded key
"""
data = data[:] # copy
rcon_iteration = 1
key_size_bytes = len(data)
expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
while len(data) < expanded_key_size_bytes:
temp = data[-4:]
temp = key_schedule_core(temp, rcon_iteration)
rcon_iteration += 1
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
for _ in range(3):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
if key_size_bytes == 32:
temp = data[-4:]
temp = sub_bytes(temp)
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
data = data[:expanded_key_size_bytes]
return data
def aes_encrypt(data, expanded_key):
"""
Encrypt one block with aes
@param {int[]} data 16-Byte state
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds+1):
data = sub_bytes(data)
data = shift_rows(data)
if i != rounds:
data = mix_columns(data)
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
return data
def aes_decrypt_text(data, password, key_size_bytes):
"""
Decrypt text
- The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
- The cipher key is retrieved by encrypting the first 16 Byte of 'password'
with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
- Mode of operation is 'counter'
@param {str} data Base64 encoded string
@param {str,unicode} password Password (will be encoded with utf-8)
@param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
@returns {str} Decrypted data
"""
NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data))
password = bytes_to_intlist(password.encode('utf-8'))
key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
nonce = data[:NONCE_LENGTH_BYTES]
cipher = data[NONCE_LENGTH_BYTES:]
class Counter:
__value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return temp
decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
plaintext = intlist_to_bytes(decrypted_data)
return plaintext
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
MIX_COLUMN_MATRIX = ((2,3,1,1),
(1,2,3,1),
(1,1,2,3),
(3,1,1,2))
def sub_bytes(data):
return [SBOX[x] for x in data]
def rotate(data):
return data[1:] + [data[0]]
def key_schedule_core(data, rcon_iteration):
data = rotate(data)
data = sub_bytes(data)
data[0] = data[0] ^ RCON[rcon_iteration]
return data
def xor(data1, data2):
return [x^y for x, y in zip(data1, data2)]
def mix_column(data):
data_mixed = []
for row in range(4):
mixed = 0
for column in range(4):
addend = data[column]
if MIX_COLUMN_MATRIX[row][column] in (2,3):
addend <<= 1
if addend > 0xff:
addend &= 0xff
addend ^= 0x1b
if MIX_COLUMN_MATRIX[row][column] == 3:
addend ^= data[column]
mixed ^= addend & 0xff
data_mixed.append(mixed)
return data_mixed
def mix_columns(data):
data_mixed = []
for i in range(4):
column = data[i*4 : (i+1)*4]
data_mixed += mix_column(column)
return data_mixed
def shift_rows(data):
data_shifted = []
for column in range(4):
for row in range(4):
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
return data_shifted
def inc(data):
data = data[:] # copy
for i in range(len(data)-1,-1,-1):
if data[i] == 255:
data[i] = 0
else:
data[i] = data[i] + 1
break
return data

+ 3
- 0
youtube_dl/extractor/__init__.py View File

@ -50,6 +50,7 @@ from .keek import KeekIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import LivestreamIE from .livestream import LivestreamIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mtv import MTVIE from .mtv import MTVIE
from .muzu import MuzuTVIE from .muzu import MuzuTVIE
@ -58,6 +59,7 @@ from .myvideo import MyVideoIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import NBCNewsIE from .nbc import NBCNewsIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
@ -69,6 +71,7 @@ from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE from .slashdot import SlashdotIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE


+ 0
- 1
youtube_dl/extractor/addanime.py View File

@ -1,4 +1,3 @@
import ast
import re import re
from .common import InfoExtractor from .common import InfoExtractor


+ 0
- 1
youtube_dl/extractor/appletrailers.py View File

@ -4,7 +4,6 @@ import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError,
) )


+ 12
- 2
youtube_dl/extractor/common.py View File

@ -114,6 +114,11 @@ class InfoExtractor(object):
"""Real extraction process. Redefine in subclasses.""" """Real extraction process. Redefine in subclasses."""
pass pass
@classmethod
def ie_key(cls):
"""A string for getting the InfoExtractor with get_info_extractor"""
return cls.__name__[:-2]
@property @property
def IE_NAME(self): def IE_NAME(self):
return type(self).__name__[:-2] return type(self).__name__[:-2]
@ -140,12 +145,17 @@ class InfoExtractor(object):
urlh = self._request_webpage(url_or_request, video_id, note, errnote) urlh = self._request_webpage(url_or_request, video_id, note, errnote)
content_type = urlh.headers.get('Content-Type', '') content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m: if m:
encoding = m.group(1) encoding = m.group(1)
else: else:
encoding = 'utf-8'
webpage_bytes = urlh.read()
m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
webpage_bytes[:1024])
if m:
encoding = m.group(1).decode('ascii')
else:
encoding = 'utf-8'
if self._downloader.params.get('dump_intermediate_pages', False): if self._downloader.params.get('dump_intermediate_pages', False):
try: try:
url = url_or_request.get_full_url() url = url_or_request.get_full_url()


+ 2
- 10
youtube_dl/extractor/generic.py View File

@ -7,8 +7,8 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_error, compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
compat_urlparse,
ExtractorError, ExtractorError,
) )
@ -163,15 +163,7 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
video_url = compat_urllib_parse.unquote(mobj.group(1)) video_url = compat_urllib_parse.unquote(mobj.group(1))
if video_url.startswith('//'):
video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
if '://' not in video_url:
up = compat_urllib_parse_urlparse(url)
if video_url.startswith('/'):
video_url = up.scheme + '://' + up.netloc + video_url
else: # relative path
video_url = (up.scheme + '://' + up.netloc +
up.path.rpartition('/')[0] + '/' + video_url)
video_url = compat_urlparse.urljoin(url, video_url)
video_id = os.path.basename(video_url) video_id = os.path.basename(video_url)
# here's a fun little line of code for you: # here's a fun little line of code for you:


+ 6
- 2
youtube_dl/extractor/ign.py View File

@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
Some videos of it.ign.com are also supported Some videos of it.ign.com are also supported
""" """
_VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = u'ign.com' IE_NAME = u'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
name_or_id = mobj.group('name_or_id') name_or_id = mobj.group('name_or_id')
page_type = mobj.group('type')
webpage = self._download_webpage(url, name_or_id) webpage = self._download_webpage(url, name_or_id)
if page_type == 'articles':
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
return self.url_result(video_url, ie='IGN')
video_id = self._find_video_id(webpage) video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id) result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE, description = self._html_search_regex(self._DESCRIPTION_RE,
@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
class OneUPIE(IGNIE): class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system.""" """Extractor for 1up.com, it uses the ign videos system."""
_VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
_VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
IE_NAME = '1up.com' IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'


+ 4
- 2
youtube_dl/extractor/kankan.py View File

@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
gcid = gcids[-1]
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid, video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
video_id, u'Downloading video url info') video_id, u'Downloading video url info')


+ 74
- 0
youtube_dl/extractor/mit.py View File

@ -0,0 +1,74 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_id,
)
class TechTVMITIE(InfoExtractor):
IE_NAME = u'techtv.mit.edu'
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
_TEST = {
u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
u'file': u'25418.mp4',
u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
u'info_dict': {
u'title': u'MIT DNA Learning Center Set',
u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
raw_page = self._download_webpage(
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
raw_page, u'base url')
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
u'video formats')
formats = json.loads(formats_json)
formats = sorted(formats, key=lambda f: f['bitrate'])
title = get_element_by_id('edit-title', clean_page)
description = clean_html(get_element_by_id('edit-description', clean_page))
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
raw_page, u'thumbnail', flags=re.DOTALL)
return {'id': video_id,
'title': title,
'url': base_url + formats[-1]['url'].replace('mp4:', ''),
'ext': 'mp4',
'description': description,
'thumbnail': thumbnail,
}
class MITIE(TechTVMITIE):
IE_NAME = u'video.mit.edu'
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
_TEST = {
u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
u'file': u'21783.mp4',
u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
u'info_dict': {
u'title': u'The Government is Profiling You',
u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
u'embed url')
return self.url_result(embed_url, ie='TechTVMIT')

+ 67
- 0
youtube_dl/extractor/orf.py View File

@ -0,0 +1,67 @@
# coding: utf-8
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
ExtractorError,
find_xpath_attr,
)
class ORFIE(InfoExtractor):
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
_TEST = {
u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
u'file': u'6566957.flv',
u'info_dict': {
u'title': u'Wetter',
u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
},
u'params': {
# It uses rtmp
u'skip_download': True,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
playlist = json.loads(playlist_json)
videos = []
ns = '{http://tempuri.org/XMLSchema.xsd}'
xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
webpage_description = self._og_search_description(webpage)
for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
# Get best quality url
rtmp_url = None
for q in ['Q6A', 'Q4A', 'Q1A']:
video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
if video_url is not None:
rtmp_url = video_url.text
break
if rtmp_url is None:
raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
description = self._html_search_regex(
r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
u'description', default=webpage_description, flags=re.DOTALL)
videos.append({
'_type': 'video',
'id': info['id'],
'title': info['title'],
'url': rtmp_url,
'ext': 'flv',
'description': description,
})
return videos

+ 90
- 0
youtube_dl/extractor/sohu.py View File

@ -0,0 +1,90 @@
# encoding: utf-8
import json
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class SohuIE(InfoExtractor):
_VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
_TEST = {
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
u'file': u'382479172.mp4',
u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
u'info_dict': {
u'title': u'MV:Far East Movement《The Illest》',
},
}
def _real_extract(self, url):
def _fetch_data(vid_id):
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
data_url = base_data_url + str(vid_id)
data_json = self._download_webpage(
data_url, video_id,
note=u'Downloading JSON data for ' + str(vid_id))
return json.loads(data_json)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
webpage, u'video title')
title = raw_title.partition('-')[0].strip()
vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
u'video path')
data = _fetch_data(vid)
QUALITIES = ('ori', 'super', 'high', 'nor')
vid_ids = [data['data'][q + 'Vid']
for q in QUALITIES
if data['data'][q + 'Vid'] != 0]
if not vid_ids:
raise ExtractorError(u'No formats available for this video')
# For now, we just pick the highest available quality
vid_id = vid_ids[-1]
format_data = data if vid == vid_id else _fetch_data(vid_id)
part_count = format_data['data']['totalBlocks']
allot = format_data['allot']
prot = format_data['prot']
clipsURL = format_data['data']['clipsURL']
su = format_data['data']['su']
playlist = []
for i in range(part_count):
part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
(allot, prot, clipsURL[i], su[i]))
part_str = self._download_webpage(
part_url, video_id,
note=u'Downloading part %d of %d' % (i+1, part_count))
part_info = part_str.split('|')
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
video_info = {
'id': '%s_part%02d' % (video_id, i + 1),
'title': title,
'url': video_url,
'ext': 'mp4',
}
playlist.append(video_info)
if len(playlist) == 1:
info = playlist[0]
info['id'] = video_id
else:
info = {
'_type': 'playlist',
'entries': playlist,
'id': video_id,
}
return info

+ 0
- 3
youtube_dl/extractor/trilulilu.py View File

@ -3,9 +3,6 @@ import re
import xml.etree.ElementTree import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class TriluliluIE(InfoExtractor): class TriluliluIE(InfoExtractor):


+ 1
- 1
youtube_dl/extractor/unistra.py View File

@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):
u'md5': u'736f605cfdc96724d55bb543ab3ced24', u'md5': u'736f605cfdc96724d55bb543ab3ced24',
u'info_dict': { u'info_dict': {
u'title': u'M!ss Yella', u'title': u'M!ss Yella',
u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
}, },
} }


+ 0
- 1
youtube_dl/extractor/wat.py View File

@ -6,7 +6,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse,
unified_strdate, unified_strdate,
) )


+ 14
- 4
youtube_dl/extractor/youporn.py View File

@ -12,14 +12,16 @@ from ..utils import (
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
) )
from ..aes import (
aes_decrypt_text
)
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
_TEST = { _TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4', u'file': u'505835.mp4',
u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
u'info_dict': { u'info_dict': {
u"upload_date": u"20101221", u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
# Get all of the links from the page # Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">' LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
links = re.findall(LINK_RE, download_list_html) links = re.findall(LINK_RE, download_list_html)
if(len(links) == 0):
# Get link of hd video if available
mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
if mobj != None:
encrypted_video_url = mobj.group(u'encrypted_video_url')
video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
links = [video_url] + links
if not links:
raise ExtractorError(u'ERROR: no known formats available for video') raise ExtractorError(u'ERROR: no known formats available for video')
self.to_screen(u'Links found: %d' % len(links)) self.to_screen(u'Links found: %d' % len(links))
@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
self._print_formats(formats) self._print_formats(formats)
return return
req_format = self._downloader.params.get('format', None)
req_format = self._downloader.params.get('format', 'best')
self.to_screen(u'Format: %s' % req_format) self.to_screen(u'Format: %s' % req_format)
if req_format is None or req_format == 'best': if req_format is None or req_format == 'best':


+ 4
- 4
youtube_dl/extractor/youtube.py View File

@ -335,7 +335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
u"info_dict": { u"info_dict": {
u"upload_date": u"20120506", u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
u"uploader": u"Icona Pop", u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop" u"uploader_id": u"IconaPop"
} }
@ -423,11 +423,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif len(s) == 87: elif len(s) == 87:
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86: elif len(s) == 86:
return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86]
return s[81:73:-1] + s[84] + s[72:58:-1] + s[0] + s[57:35:-1] + s[85] + s[34:0:-1]
elif len(s) == 85: elif len(s) == 85:
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
elif len(s) == 84: elif len(s) == 84:
return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
return s[81:36:-1] + s[0] + s[35:2:-1]
elif len(s) == 83: elif len(s) == 83:
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82: elif len(s) == 82:
@ -1161,7 +1161,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites' IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
_LOGIN_REQUIRED = True _LOGIN_REQUIRED = True
def _real_extract(self, url): def _real_extract(self, url):


+ 33
- 4
youtube_dl/utils.py View File

@ -1,19 +1,20 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import datetime
import email.utils
import errno import errno
import gzip import gzip
import io import io
import json import json
import locale import locale
import os import os
import platform
import re import re
import socket
import sys import sys
import traceback import traceback
import zlib import zlib
import email.utils
import socket
import datetime
try: try:
import urllib.request as compat_urllib_request import urllib.request as compat_urllib_request
@ -212,7 +213,7 @@ if sys.version_info >= (2,7):
def find_xpath_attr(node, xpath, key, val): def find_xpath_attr(node, xpath, key, val):
""" Find the xpath xpath[@key=val] """ """ Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z]+$', key) assert re.match(r'^[a-zA-Z]+$', key)
assert re.match(r'^[a-zA-Z@\s]*$', val)
assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
expr = xpath + u"[@%s='%s']" % (key, val) expr = xpath + u"[@%s='%s']" % (key, val)
return node.find(expr) return node.find(expr)
else: else:
@ -732,3 +733,31 @@ class DateRange(object):
return self.start <= date <= self.end return self.start <= date <= self.end
def __str__(self): def __str__(self):
return '%s - %s' % ( self.start.isoformat(), self.end.isoformat()) return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
def platform_name():
""" Returns the platform name as a compat_str """
res = platform.platform()
if isinstance(res, bytes):
res = res.decode(preferredencoding())
assert isinstance(res, compat_str)
return res
def bytes_to_intlist(bs):
if not bs:
return []
if isinstance(bs[0], int): # Python 3
return list(bs)
else:
return [ord(c) for c in bs]
def intlist_to_bytes(xs):
if not xs:
return b''
if isinstance(chr(0), bytes): # Python 2
return ''.join([chr(x) for x in xs])
else:
return bytes(xs)

+ 1
- 1
youtube_dl/version.py View File

@ -1,2 +1,2 @@
__version__ = '2013.08.28'
__version__ = '2013.08.30'

Loading…
Cancel
Save