From 56c7366547462ecec0536df58971249a8a870ddd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 8 Jul 2013 15:14:27 +0200 Subject: [PATCH 01/36] YoutubeIE: reuse instances of InfoExtractors (closes #998) When a IE is added to the list, it's also added to a dictionary. When a IE is requested it first looks in the dictionary and if there's no instance it will create a new one. That way _real_initialize is only called once for each IE, saving time if it needs to login for example. --- youtube_dl/YoutubeDL.py | 18 +++++++++++++++--- youtube_dl/extractor/common.py | 5 +++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d3281fed2..cd3d6ea7b 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -97,6 +97,7 @@ class YoutubeDL(object): def __init__(self, params): """Create a FileDownloader object with the given options.""" self._ies = [] + self._ies_instances = {} self._pps = [] self._progress_hooks = [] self._download_retcode = 0 @@ -111,8 +112,21 @@ class YoutubeDL(object): def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) + self._ies_instances[ie.ie_key()] = ie ie.set_downloader(self) + def get_info_extractor(self, ie_key): + """ + Get an instance of an IE with name ie_key, it will try to get one from + the _ies list, if there's no instance it will create a new one and add + it to the extractor list. + """ + ie = self._ies_instances.get(ie_key) + if ie is None: + ie = get_info_extractor(ie_key)() + self.add_info_extractor(ie) + return ie + def add_default_info_extractors(self): """ Add the InfoExtractors returned by gen_extractors to the end of the list @@ -294,9 +308,7 @@ class YoutubeDL(object): ''' if ie_key: - ie = get_info_extractor(ie_key)() - ie.set_downloader(self) - ies = [ie] + ies = [self.get_info_extractor(ie_key)] else: ies = self._ies diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1d98222ce..236c7b12c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -106,6 +106,11 @@ class InfoExtractor(object): """Real extraction process. Redefine in subclasses.""" pass + @classmethod + def ie_key(cls): + """A string for getting the InfoExtractor with get_info_extractor""" + return cls.__name__[:-2] + @property def IE_NAME(self): return type(self).__name__[:-2] From 6d3a7d03e14fcbc704bf30d305fb95c5829e55a6 Mon Sep 17 00:00:00 2001 From: huohuarong Date: Fri, 2 Aug 2013 15:26:11 +0800 Subject: [PATCH 02/36] fix bug: kankan extractor not support http://vod.kankan.com/v/70/70309.shtml --- youtube_dl/extractor/kankan.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py index 8537ba584..445d46501 100644 --- a/youtube_dl/extractor/kankan.py +++ b/youtube_dl/extractor/kankan.py @@ -21,8 +21,10 @@ class KankanIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title') - gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid') + title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title') + surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0) + gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls) + gcid = gcids[-1] video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid, video_id, u'Downloading video url info') From 6624a2b07dafad4de895b4e84f4595214817518d Mon Sep 17 00:00:00 2001 From: huohuarong Date: Fri, 2 Aug 2013 17:58:46 +0800 Subject: [PATCH 03/36] add an extractor for tv.sohu.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/sohu.py | 97 ++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 youtube_dl/extractor/sohu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c20172a53..3a08d676f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -55,6 +55,7 @@ from .redtube import RedTubeIE from .ringtv import RingTVIE from .roxwel import RoxwelIE from .sina import SinaIE +from .sohu import SohuIE from .soundcloud import SoundcloudIE, SoundcloudSetIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py new file mode 100644 index 000000000..830814221 --- /dev/null +++ b/youtube_dl/extractor/sohu.py @@ -0,0 +1,97 @@ +# encoding: utf-8 + +import re +import json +import time +import logging +import urllib2 + +from .common import InfoExtractor +from ..utils import compat_urllib_request + + +class SohuIE(InfoExtractor): + _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P\d+)\.shtml.*?' + + _TEST = { + u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super', + u'file': u'382479172.flv', + u'md5': u'cc84eed6b6fbf0f2f9a8d3cb9da1939b', + u'info_dict': { + u'title': u'The Illest - Far East Movement Riff Raff', + }, + } + + def _clearn_html(self, string): + tags = re.findall(r'<.+?>', string) + for t in tags: + string = string.replace(t, ' ') + for i in range(2): + spaces = re.findall(r'\s+', string) + for s in spaces: + string = string.replace(s, ' ') + string = string.strip() + return string + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + pattern = r'

\n*?(.+?)\n*?

' + compiled = re.compile(pattern, re.DOTALL) + title = self._search_regex(compiled, webpage, u'video title').strip('\t\n') + title = self._clearn_html(title) + pattern = re.compile(r'var vid="(\d+)"') + result = re.search(pattern, webpage) + if not result: + logging.info('[Sohu] could not get vid') + return None + vid = result.group(1) + logging.info('vid: %s' % vid) + base_url_1 = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' + url_1 = base_url_1 + vid + logging.info('json url: %s' % url_1) + json_1 = json.loads(urllib2.urlopen(url_1).read()) + # get the highest definition video vid and json infomation. + vids = [] + qualities = ('oriVid', 'superVid', 'highVid', 'norVid') + for vid_name in qualities: + vids.append(json_1['data'][vid_name]) + clearest_vid = 0 + for i, v in enumerate(vids): + if v != 0: + clearest_vid = v + logging.info('quality definition: %s' % qualities[i][:-3]) + break + if not clearest_vid: + logging.warning('could not find valid clearest_vid') + return None + if vid != clearest_vid: + url_1 = '%s%d' % (base_url_1, clearest_vid) + logging.info('highest definition json url: %s' % url_1) + json_1 = json.loads(urllib2.urlopen(url_1).read()) + allot = json_1['allot'] + prot = json_1['prot'] + clipsURL = json_1['data']['clipsURL'] + su = json_1['data']['su'] + num_of_parts = json_1['data']['totalBlocks'] + logging.info('Total parts: %d' % num_of_parts) + base_url_3 = 'http://allot/?prot=prot&file=clipsURL[i]&new=su[i]' + files_info = [] + for i in range(num_of_parts): + middle_url = 'http://%s/?prot=%s&file=%s&new=%s' % (allot, prot, clipsURL[i], su[i]) + logging.info('middle url part %d: %s' % (i, middle_url)) + middle_info = urllib2.urlopen(middle_url).read().split('|') + middle_part_1 = middle_info[0] + download_url = '%s%s?key=%s' % (middle_info[0], su[i], middle_info[3]) + + info = { + 'id': '%s_part%02d' % (video_id, i + 1), + 'title': title, + 'url': download_url, + 'ext': 'mp4', + } + files_info.append(info) + time.sleep(1) + + return files_info From 4ec929dc9b55a2588b4a27e64871c5bfa900bf37 Mon Sep 17 00:00:00 2001 From: huohuarong Date: Sat, 3 Aug 2013 10:29:58 +0800 Subject: [PATCH 04/36] use ..utils/clean_html() --- youtube_dl/extractor/sohu.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 830814221..cf0ab5478 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -7,7 +7,7 @@ import logging import urllib2 from .common import InfoExtractor -from ..utils import compat_urllib_request +from ..utils import compat_urllib_request, clean_html class SohuIE(InfoExtractor): @@ -22,16 +22,6 @@ class SohuIE(InfoExtractor): }, } - def _clearn_html(self, string): - tags = re.findall(r'<.+?>', string) - for t in tags: - string = string.replace(t, ' ') - for i in range(2): - spaces = re.findall(r'\s+', string) - for s in spaces: - string = string.replace(s, ' ') - string = string.strip() - return string def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -40,7 +30,7 @@ class SohuIE(InfoExtractor): pattern = r'

\n*?(.+?)\n*?

' compiled = re.compile(pattern, re.DOTALL) title = self._search_regex(compiled, webpage, u'video title').strip('\t\n') - title = self._clearn_html(title) + title = clean_html(title) pattern = re.compile(r'var vid="(\d+)"') result = re.search(pattern, webpage) if not result: @@ -93,5 +83,8 @@ class SohuIE(InfoExtractor): } files_info.append(info) time.sleep(1) - + if num_of_parts == 1: + info = files_info[0] + info['id'] = video_id + return info return files_info From b5a6d408181c118bf51382f486a2492643ed74ec Mon Sep 17 00:00:00 2001 From: huohuarong Date: Mon, 5 Aug 2013 22:51:54 +0800 Subject: [PATCH 05/36] fix parse title bug --- youtube_dl/extractor/sohu.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index cf0ab5478..cd049b6f0 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -27,10 +27,10 @@ class SohuIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - pattern = r'

\n*?(.+?)\n*?

' + pattern = r'(.+?)' compiled = re.compile(pattern, re.DOTALL) - title = self._search_regex(compiled, webpage, u'video title').strip('\t\n') - title = clean_html(title) + title = self._search_regex(compiled, webpage, u'video title') + title = clean_html(title).split('-')[0].strip() pattern = re.compile(r'var vid="(\d+)"') result = re.search(pattern, webpage) if not result: @@ -41,7 +41,8 @@ class SohuIE(InfoExtractor): base_url_1 = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' url_1 = base_url_1 + vid logging.info('json url: %s' % url_1) - json_1 = json.loads(urllib2.urlopen(url_1).read()) + webpage = self._download_webpage(url_1, vid) + json_1 = json.loads(webpage) # get the highest definition video vid and json infomation. vids = [] qualities = ('oriVid', 'superVid', 'highVid', 'norVid') From d5b00ee6e0ba70fd5d87752e8772fc1c39e4bd59 Mon Sep 17 00:00:00 2001 From: huohuarong Date: Tue, 6 Aug 2013 10:26:57 +0800 Subject: [PATCH 06/36] improve sohu extractor --- youtube_dl/extractor/sohu.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index cd049b6f0..24fc3a5d7 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -31,6 +31,7 @@ class SohuIE(InfoExtractor): compiled = re.compile(pattern, re.DOTALL) title = self._search_regex(compiled, webpage, u'video title') title = clean_html(title).split('-')[0].strip() + self.to_screen('Title: %s' % title) pattern = re.compile(r'var vid="(\d+)"') result = re.search(pattern, webpage) if not result: @@ -70,6 +71,7 @@ class SohuIE(InfoExtractor): base_url_3 = 'http://allot/?prot=prot&file=clipsURL[i]&new=su[i]' files_info = [] for i in range(num_of_parts): + self.to_screen('Geting json infomation of part %s/%s' % (i + 1, num_of_parts)) middle_url = 'http://%s/?prot=%s&file=%s&new=%s' % (allot, prot, clipsURL[i], su[i]) logging.info('middle url part %d: %s' % (i, middle_url)) middle_info = urllib2.urlopen(middle_url).read().split('|') From f3bcebb1d2ebf6a69f06b72e1e365bc76970e1e2 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 9 Aug 2013 18:36:01 +0200 Subject: [PATCH 07/36] add an aes implementation --- youtube_dl/aes.py | 200 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 youtube_dl/aes.py diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py new file mode 100644 index 000000000..2fa9238e3 --- /dev/null +++ b/youtube_dl/aes.py @@ -0,0 +1,200 @@ +__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text'] + +import base64 +from math import ceil + +BLOCK_SIZE_BYTES = 16 + +def aes_ctr_decrypt(data, key, counter): + """ + Decrypt with aes in counter mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block) + returns the next counter block + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + decrypted_data=[] + for i in range(block_count): + counter_block = counter.next_value() + block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES] + block += [0]*(BLOCK_SIZE_BYTES - len(block)) + + cipher_counter_block = aes_encrypt(counter_block, expanded_key) + decrypted_data += xor(block, cipher_counter_block) + decrypted_data = decrypted_data[:len(data)] + + return decrypted_data + +def key_expansion(data): + """ + Generate key schedule + + @param {int[]} data 16/24/32-Byte cipher key + @returns {int[]} 176/208/240-Byte expanded key + """ + data = data[:] # copy + rcon_iteration = 1 + key_size_bytes = len(data) + expanded_key_size_bytes = (key_size_bytes/4 + 7) * BLOCK_SIZE_BYTES + + while len(data) < expanded_key_size_bytes: + temp = data[-4:] + temp = key_schedule_core(temp, rcon_iteration) + rcon_iteration += 1 + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + for _ in range(3): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + if key_size_bytes == 32: + temp = data[-4:] + temp = sub_bytes(temp) + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + + for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes : 4-key_size_bytes]) + data = data[:expanded_key_size_bytes] + + return data + +def aes_encrypt(data, expanded_key): + """ + Encrypt one block with aes + + @param {int[]} data 16-Byte state + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte cipher + """ + rounds = len(expanded_key) / BLOCK_SIZE_BYTES - 1 + + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + for i in range(1, rounds+1): + data = sub_bytes(data) + data = shift_rows(data) + if i != rounds: + data = mix_columns(data) + data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) + + return data + +def aes_decrypt_text(data, password, key_size_bytes): + """ + Decrypt text + - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter + - The cipher key is retrieved by encrypting the first 16 Byte of 'password' + with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) + - Mode of operation is 'counter' + + @param {str} data Base64 encoded string + @param {str,unicode} password Password (will be encoded with utf-8) + @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit + @returns {str} Decrypted data + """ + NONCE_LENGTH_BYTES = 8 + + data = map(lambda c: ord(c), base64.b64decode(data)) + password = map(lambda c: ord(c), password.encode('utf-8')) + + key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password)) + key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes / BLOCK_SIZE_BYTES) + + nonce = data[:NONCE_LENGTH_BYTES] + cipher = data[NONCE_LENGTH_BYTES:] + + class Counter: + __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) + def next_value(self): + temp = self.__value + self.__value = inc(self.__value) + return temp + + decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) + plaintext = ''.join(map(lambda x: chr(x), decrypted_data)) + + return plaintext + +RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) +SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) +MIX_COLUMN_MATRIX = ((2,3,1,1), + (1,2,3,1), + (1,1,2,3), + (3,1,1,2)) + +def sub_bytes(data): + return map(lambda x: SBOX[x], data) + +def rotate(data): + return data[1:] + [data[0]] + +def key_schedule_core(data, rcon_iteration): + data = rotate(data) + data = sub_bytes(data) + data[0] = data[0] ^ RCON[rcon_iteration] + + return data + +def xor(data1, data2): + return map(lambda (x,y): x^y, zip(data1, data2)) + +def mix_column(data): + data_mixed = [] + for row in range(4): + mixed = 0 + for column in range(4): + addend = data[column] + if MIX_COLUMN_MATRIX[row][column] in (2,3): + addend <<= 1 + if addend > 0xff: + addend &= 0xff + addend ^= 0x1b + if MIX_COLUMN_MATRIX[row][column] == 3: + addend ^= data[column] + mixed ^= addend & 0xff + data_mixed.append(mixed) + return data_mixed + +def mix_columns(data): + data_mixed = [] + for i in range(4): + column = data[i*4 : (i+1)*4] + data_mixed += mix_column(column) + return data_mixed + +def shift_rows(data): + data_shifted = [] + for column in range(4): + for row in range(4): + data_shifted.append( data[((column + row) & 0b11) * 4 + row] ) + return data_shifted + +def inc(data): + data = data[:] # copy + for i in range(len(data)-1,-1,-1): + if data[i] == 255: + data[i] = 0 + else: + data[i] = data[i] + 1 + break + return data From 97b3656c2e37e45d556816b8f1f15c20d14f1acd Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 9 Aug 2013 18:37:33 +0200 Subject: [PATCH 08/36] YoupornIE: Add support for hd videos and update Test --- youtube_dl/extractor/youporn.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d1156bf42..cc9c37027 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -12,14 +12,16 @@ from ..utils import ( unescapeHTML, unified_strdate, ) - +from ..aes import ( + aes_decrypt_text +) class YouPornIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P[0-9]+)/(?P[^/]+)' _TEST = { u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', u'file': u'505835.mp4', - u'md5': u'c37ddbaaa39058c76a7e86c6813423c1', + u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89', u'info_dict': { u"upload_date": u"20101221", u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", @@ -75,6 +77,14 @@ class YouPornIE(InfoExtractor): # Get all of the links from the page LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">' links = re.findall(LINK_RE, download_list_html) + + # Get link of hd video + encrypted_video_url = self._html_search_regex(r'var encryptedURL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', + webpage, u'encrypted_video_url') + video_url = unicode( aes_decrypt_text(encrypted_video_url, video_title, 32), 'utf-8') + if video_url.split('/')[6].split('_')[0] == u'720p': # only add if 720p to avoid duplicates + links = [video_url] + links + if(len(links) == 0): raise ExtractorError(u'ERROR: no known formats available for video') From a5caba1eb02665cdc982d6be4a933aafd79243de Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 28 Aug 2013 12:47:27 +0200 Subject: [PATCH 09/36] [generic] simply use urljoin --- youtube_dl/extractor/generic.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bfc9bff49..dc4dea4ad 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,8 +7,8 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_error, compat_urllib_parse, - compat_urllib_parse_urlparse, compat_urllib_request, + compat_urlparse, ExtractorError, ) @@ -163,15 +163,7 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_url = compat_urllib_parse.unquote(mobj.group(1)) - if video_url.startswith('//'): - video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url - if '://' not in video_url: - up = compat_urllib_parse_urlparse(url) - if video_url.startswith('/'): - video_url = up.scheme + '://' + up.netloc + video_url - else: # relative path - video_url = (up.scheme + '://' + up.netloc + - up.path.rpartition('/')[0] + '/' + video_url) + video_url = compat_urlparse.urljoin(url, video_url) video_id = os.path.basename(video_url) # here's a fun little line of code for you: From ce6a696e4d964aeb27de46a31a899b28d7ca7754 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 28 Aug 2013 12:47:38 +0200 Subject: [PATCH 10/36] Remove unused imports --- youtube_dl/extractor/addanime.py | 1 - youtube_dl/extractor/appletrailers.py | 1 - youtube_dl/extractor/trilulilu.py | 3 --- youtube_dl/extractor/wat.py | 1 - 4 files changed, 6 deletions(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 46db8262f..82a785a19 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -1,4 +1,3 @@ -import ast import re from .common import InfoExtractor diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index b3bdb2955..8b191c196 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -4,7 +4,6 @@ import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( determine_ext, - ExtractorError, ) diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index 1c46156c7..f278951ba 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -3,9 +3,6 @@ import re import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) class TriluliluIE(InfoExtractor): diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 7d228edac..29c25f0e3 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..utils import ( - compat_urllib_parse, unified_strdate, ) From 67b22dd03686d9e360d87a7751de74b321d3f231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 28 Aug 2013 12:51:22 +0200 Subject: [PATCH 11/36] Add extractors for video.mit.edu and techtv.mit.edu (closes #1327) video.mit.edu just embeds the videos from techtv.mit.edu --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mit.py | 76 ++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 youtube_dl/extractor/mit.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c76b99a81..21e9e5d37 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -50,6 +50,7 @@ from .keek import KeekIE from .liveleak import LiveLeakIE from .livestream import LivestreamIE from .metacafe import MetacafeIE +from .mit import TechTVMITIE, MITIE from .mixcloud import MixcloudIE from .mtv import MTVIE from .muzu import MuzuTVIE diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py new file mode 100644 index 000000000..d09d03e36 --- /dev/null +++ b/youtube_dl/extractor/mit.py @@ -0,0 +1,76 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( + clean_html, + get_element_by_id, +) + + +class TechTVMITIE(InfoExtractor): + IE_NAME = u'techtv.mit.edu' + _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' + + _TEST = { + u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', + u'file': u'25418.mp4', + u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f', + u'info_dict': { + u'title': u'MIT DNA Learning Center Set', + u'description': u'md5:82313335e8a8a3f243351ba55bc1b474', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage( + 'http://techtv.mit.edu/videos/%s' % video_id, video_id) + embed_page = self._download_webpage( + 'http://techtv.mit.edu/embeds/%s/' % video_id, video_id, + note=u'Downloading embed page') + + base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', + embed_page, u'base url') + formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page, + u'video formats') + formats = json.loads(formats_json) + formats = sorted(formats, key=lambda f: f['bitrate']) + + title = get_element_by_id('edit-title', webpage) + description = clean_html(get_element_by_id('edit-description', webpage)) + thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', + embed_page, u'thumbnail', flags=re.DOTALL) + + return {'id': video_id, + 'title': title, + 'url': base_url + formats[-1]['url'].replace('mp4:', ''), + 'ext': 'mp4', + 'description': description, + 'thumbnail': thumbnail, + } + + +class MITIE(TechTVMITIE): + IE_NAME = u'video.mit.edu' + _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' + + _TEST = { + u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', + u'file': u'21783.mp4', + u'md5': u'7db01d5ccc1895fc5010e9c9e13648da', + u'info_dict': { + u'title': u'The Government is Profiling You', + u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + page_title = mobj.group('title') + webpage = self._download_webpage(url, page_title) + self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME)) + embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage, + u'embed url') + return self.url_result(embed_url, ie='TechTVMIT') From c496ca96e7639e5dd0020074b7ada18c2bd4ae3e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 28 Aug 2013 12:57:10 +0200 Subject: [PATCH 12/36] Fix platform name in Python 2 with --verbose (Closes #1228) --- youtube_dl/__init__.py | 3 ++- youtube_dl/utils.py | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index bc6a6d180..b33a18a26 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -45,6 +45,7 @@ import sys import warnings import platform + from .utils import * from .update import update_self from .version import __version__ @@ -611,7 +612,7 @@ def _real_main(argv=None): sys.exc_clear() except: pass - sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n') + sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') ydl.add_default_info_extractors() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index be788cf5a..64ab30910 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,19 +1,20 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import datetime +import email.utils import errno import gzip import io import json import locale import os +import platform import re +import socket import sys import traceback import zlib -import email.utils -import socket -import datetime try: import urllib.request as compat_urllib_request @@ -732,3 +733,13 @@ class DateRange(object): return self.start <= date <= self.end def __str__(self): return '%s - %s' % ( self.start.isoformat(), self.end.isoformat()) + + +def platform_name(): + """ Returns the platform name as a compat_str """ + res = platform.platform() + if isinstance(res, bytes): + res = res.decode(preferredencoding()) + + assert isinstance(res, compat_str) + return res From 8ae97d76eee1bf9e9098797db3be2d7b816196b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 28 Aug 2013 13:37:31 +0200 Subject: [PATCH 13/36] PostProcessingError holds the message in the 'msg' property, not in 'message' (fixes #1323) Causes DeprecationWarning: http://www.python.org/dev/peps/pep-0352/ --- youtube_dl/PostProcessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index c02ed7148..ae56d2082 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -137,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): try: FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) except FFmpegPostProcessorError as err: - raise AudioConversionError(err.message) + raise AudioConversionError(err.msg) def run(self, information): path = information['filepath'] @@ -207,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): except: etype,e,tb = sys.exc_info() if isinstance(e, AudioConversionError): - msg = u'audio conversion failed: ' + e.message + msg = u'audio conversion failed: ' + e.msg else: msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') raise PostProcessingError(msg) From f143d86ad2fc0633d8e2da598cf21e73ff0f2872 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 28 Aug 2013 13:59:08 +0200 Subject: [PATCH 14/36] [sohu] Handle encoding, and fix tests --- youtube_dl/extractor/common.py | 9 ++- youtube_dl/extractor/sohu.py | 131 ++++++++++++++++----------------- 2 files changed, 71 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 77a13aea5..a2986cebe 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -145,12 +145,17 @@ class InfoExtractor(object): urlh = self._request_webpage(url_or_request, video_id, note, errnote) content_type = urlh.headers.get('Content-Type', '') + webpage_bytes = urlh.read() m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) if m: encoding = m.group(1) else: - encoding = 'utf-8' - webpage_bytes = urlh.read() + m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]', + webpage_bytes[:1024]) + if m: + encoding = m.group(1).decode('ascii') + else: + encoding = 'utf-8' if self._downloader.params.get('dump_intermediate_pages', False): try: url = url_or_request.get_full_url() diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 24fc3a5d7..77bb0a8dc 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -1,13 +1,10 @@ # encoding: utf-8 -import re import json -import time -import logging -import urllib2 +import re from .common import InfoExtractor -from ..utils import compat_urllib_request, clean_html +from ..utils import ExtractorError class SohuIE(InfoExtractor): @@ -15,79 +12,79 @@ class SohuIE(InfoExtractor): _TEST = { u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super', - u'file': u'382479172.flv', - u'md5': u'cc84eed6b6fbf0f2f9a8d3cb9da1939b', + u'file': u'382479172.mp4', + u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7', u'info_dict': { - u'title': u'The Illest - Far East Movement Riff Raff', + u'title': u'MV:Far East Movement《The Illest》', }, } - def _real_extract(self, url): + + def _fetch_data(vid_id): + base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid=' + data_url = base_data_url + str(vid_id) + data_json = self._download_webpage( + data_url, video_id, + note=u'Downloading JSON data for ' + str(vid_id)) + return json.loads(data_json) + mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) - pattern = r'<title>(.+?)' - compiled = re.compile(pattern, re.DOTALL) - title = self._search_regex(compiled, webpage, u'video title') - title = clean_html(title).split('-')[0].strip() - self.to_screen('Title: %s' % title) - pattern = re.compile(r'var vid="(\d+)"') - result = re.search(pattern, webpage) - if not result: - logging.info('[Sohu] could not get vid') - return None - vid = result.group(1) - logging.info('vid: %s' % vid) - base_url_1 = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' - url_1 = base_url_1 + vid - logging.info('json url: %s' % url_1) - webpage = self._download_webpage(url_1, vid) - json_1 = json.loads(webpage) - # get the highest definition video vid and json infomation. - vids = [] - qualities = ('oriVid', 'superVid', 'highVid', 'norVid') - for vid_name in qualities: - vids.append(json_1['data'][vid_name]) - clearest_vid = 0 - for i, v in enumerate(vids): - if v != 0: - clearest_vid = v - logging.info('quality definition: %s' % qualities[i][:-3]) - break - if not clearest_vid: - logging.warning('could not find valid clearest_vid') - return None - if vid != clearest_vid: - url_1 = '%s%d' % (base_url_1, clearest_vid) - logging.info('highest definition json url: %s' % url_1) - json_1 = json.loads(urllib2.urlopen(url_1).read()) - allot = json_1['allot'] - prot = json_1['prot'] - clipsURL = json_1['data']['clipsURL'] - su = json_1['data']['su'] - num_of_parts = json_1['data']['totalBlocks'] - logging.info('Total parts: %d' % num_of_parts) - base_url_3 = 'http://allot/?prot=prot&file=clipsURL[i]&new=su[i]' - files_info = [] - for i in range(num_of_parts): - self.to_screen('Geting json infomation of part %s/%s' % (i + 1, num_of_parts)) - middle_url = 'http://%s/?prot=%s&file=%s&new=%s' % (allot, prot, clipsURL[i], su[i]) - logging.info('middle url part %d: %s' % (i, middle_url)) - middle_info = urllib2.urlopen(middle_url).read().split('|') - middle_part_1 = middle_info[0] - download_url = '%s%s?key=%s' % (middle_info[0], su[i], middle_info[3]) + raw_title = self._html_search_regex(r'(?s)(.+?)', + webpage, u'video title') + title = raw_title.partition('-')[0].strip() - info = { + vid = self._html_search_regex(r'var vid="(\d+)"', webpage, + u'video path') + data = _fetch_data(vid) + + QUALITIES = ('ori', 'super', 'high', 'nor') + vid_ids = [data['data'][q + 'Vid'] + for q in QUALITIES + if data['data'][q + 'Vid'] != 0] + if not vid_ids: + raise ExtractorError(u'No formats available for this video') + + # For now, we just pick the highest available quality + vid_id = vid_ids[-1] + + format_data = data if vid == vid_id else _fetch_data(vid_id) + part_count = format_data['data']['totalBlocks'] + allot = format_data['allot'] + prot = format_data['prot'] + clipsURL = format_data['data']['clipsURL'] + su = format_data['data']['su'] + + playlist = [] + for i in range(part_count): + part_url = ('http://%s/?prot=%s&file=%s&new=%s' % + (allot, prot, clipsURL[i], su[i])) + part_str = self._download_webpage( + part_url, video_id, + note=u'Downloading part %d of %d' % (i+1, part_count)) + + part_info = part_str.split('|') + video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) + + video_info = { 'id': '%s_part%02d' % (video_id, i + 1), 'title': title, - 'url': download_url, + 'url': video_url, 'ext': 'mp4', } - files_info.append(info) - time.sleep(1) - if num_of_parts == 1: - info = files_info[0] + playlist.append(video_info) + + if len(playlist) == 1: + info = playlist[0] info['id'] = video_id - return info - return files_info + else: + info = { + '_type': 'playlist', + 'entries': playlist, + 'id': video_id, + } + + return info From 48ea9cea77e7ea24ee867027f03ca37dd1b935d8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 28 Aug 2013 14:28:55 +0200 Subject: [PATCH 15/36] Allow changes to run under Python 3 --- youtube_dl/aes.py | 18 ++++++++++-------- youtube_dl/extractor/youporn.py | 12 ++++++++---- youtube_dl/utils.py | 10 ++++++++++ 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 2fa9238e3..278f8bb82 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -3,6 +3,8 @@ __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text' import base64 from math import ceil +from .utils import bytes_to_intlist + BLOCK_SIZE_BYTES = 16 def aes_ctr_decrypt(data, key, counter): @@ -16,7 +18,7 @@ def aes_ctr_decrypt(data, key, counter): @returns {int[]} decrypted data """ expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + block_count = int(ceil(float(len(data)) // BLOCK_SIZE_BYTES)) decrypted_data=[] for i in range(block_count): @@ -40,7 +42,7 @@ def key_expansion(data): data = data[:] # copy rcon_iteration = 1 key_size_bytes = len(data) - expanded_key_size_bytes = (key_size_bytes/4 + 7) * BLOCK_SIZE_BYTES + expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES while len(data) < expanded_key_size_bytes: temp = data[-4:] @@ -72,7 +74,7 @@ def aes_encrypt(data, expanded_key): @param {int[]} expanded_key 176/208/240-Byte expanded key @returns {int[]} 16-Byte cipher """ - rounds = len(expanded_key) / BLOCK_SIZE_BYTES - 1 + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) for i in range(1, rounds+1): @@ -99,11 +101,11 @@ def aes_decrypt_text(data, password, key_size_bytes): """ NONCE_LENGTH_BYTES = 8 - data = map(lambda c: ord(c), base64.b64decode(data)) - password = map(lambda c: ord(c), password.encode('utf-8')) + data = bytes_to_intlist(base64.b64decode(data)) + password = bytes_to_intlist(password.encode('utf-8')) key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password)) - key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes / BLOCK_SIZE_BYTES) + key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) nonce = data[:NONCE_LENGTH_BYTES] cipher = data[NONCE_LENGTH_BYTES:] @@ -143,7 +145,7 @@ MIX_COLUMN_MATRIX = ((2,3,1,1), (3,1,1,2)) def sub_bytes(data): - return map(lambda x: SBOX[x], data) + return [SBOX[x] for x in data] def rotate(data): return data[1:] + [data[0]] @@ -156,7 +158,7 @@ def key_schedule_core(data, rcon_iteration): return data def xor(data1, data2): - return map(lambda (x,y): x^y, zip(data1, data2)) + return [x^y for x, y in zip(data1, data2)] def mix_column(data): data_mixed = [] diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index cc9c37027..19360e273 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -5,6 +5,7 @@ import sys from .common import InfoExtractor from ..utils import ( + compat_str, compat_urllib_parse_urlparse, compat_urllib_request, @@ -79,13 +80,16 @@ class YouPornIE(InfoExtractor): links = re.findall(LINK_RE, download_list_html) # Get link of hd video - encrypted_video_url = self._html_search_regex(r'var encryptedURL = \'(?P[a-zA-Z0-9+/]+={0,2})\';', + encrypted_video_url = self._html_search_regex( + r'var encrypted(?:Quality[0-9]+)?URL = \'(?P[a-zA-Z0-9+/]+={0,2})\';', webpage, u'encrypted_video_url') - video_url = unicode( aes_decrypt_text(encrypted_video_url, video_title, 32), 'utf-8') + video_url = aes_decrypt_text(encrypted_video_url, video_title, 32) + print(video_url) + assert isinstance(video_url, compat_str) if video_url.split('/')[6].split('_')[0] == u'720p': # only add if 720p to avoid duplicates links = [video_url] + links - if(len(links) == 0): + if not links: raise ExtractorError(u'ERROR: no known formats available for video') self.to_screen(u'Links found: %d' % len(links)) @@ -122,7 +126,7 @@ class YouPornIE(InfoExtractor): self._print_formats(formats) return - req_format = self._downloader.params.get('format', None) + req_format = self._downloader.params.get('format', 'best') self.to_screen(u'Format: %s' % req_format) if req_format is None or req_format == 'best': diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 59eeaf4a8..07b40da6c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -708,3 +708,13 @@ class DateRange(object): return self.start <= date <= self.end def __str__(self): return '%s - %s' % ( self.start.isoformat(), self.end.isoformat()) + + +def bytes_to_intlist(bs): + if not bs: + return [] + if isinstance(bs[0], int): # Python 3 + return list(bs) + else: + return [ord(c) for c in bs] + From 920ef0779b6bcd5131e237e5c2ca28361f6d45d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 20 Jul 2013 12:49:24 +0200 Subject: [PATCH 16/36] Hide the password and username in verbose mode (closes #1089) --- youtube_dl/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index b33a18a26..431460c57 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -100,6 +100,16 @@ def parseOpts(overrideArguments=None): pass return None + def _hide_login_info(opts): + opts = list(opts) + for private_opt in ['-p', '--password', '-u', '--username']: + try: + i = opts.index(private_opt) + opts[i+1] = '' + except ValueError: + pass + return opts + max_width = 80 max_help_position = 80 @@ -358,9 +368,9 @@ def parseOpts(overrideArguments=None): argv = systemConf + userConf + commandLineConf opts, args = parser.parse_args(argv) if opts.verbose: - sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n') - sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n') - sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n') + sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') + sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') + sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') return parser, opts, args From cba892fa1fd6a7f1278e637c338921c5ae236840 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Wed, 28 Aug 2013 15:59:07 +0200 Subject: [PATCH 17/36] Add intlist_to_bytes to utils.py --- youtube_dl/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 07b40da6c..ee8df6a5b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -718,3 +718,10 @@ def bytes_to_intlist(bs): else: return [ord(c) for c in bs] +def intlist_to_bytes(xs): + if not xs: + return b'' + if isinstance(chr(0), bytes): # Python 2 + return ''.join([chr(x) for x in xs]) + else: + return bytes(xs) From 6e74bc41ca07bda56107cfff9ceb98d6f8d28e53 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Wed, 28 Aug 2013 16:01:43 +0200 Subject: [PATCH 18/36] Fix division bug in aes.py --- youtube_dl/aes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 278f8bb82..9913d59a4 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -18,7 +18,7 @@ def aes_ctr_decrypt(data, key, counter): @returns {int[]} decrypted data """ expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) // BLOCK_SIZE_BYTES)) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) decrypted_data=[] for i in range(block_count): From 0012690aae977d76e9162e2334989498366a8e94 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Wed, 28 Aug 2013 16:03:35 +0200 Subject: [PATCH 19/36] Let aes_decrypt_text return bytes instead of unicode --- youtube_dl/aes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 9913d59a4..9a0c93fa6 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -3,7 +3,7 @@ __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text' import base64 from math import ceil -from .utils import bytes_to_intlist +from .utils import bytes_to_intlist, intlist_to_bytes BLOCK_SIZE_BYTES = 16 @@ -118,7 +118,7 @@ def aes_decrypt_text(data, password, key_size_bytes): return temp decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) - plaintext = ''.join(map(lambda x: chr(x), decrypted_data)) + plaintext = intlist_to_bytes(decrypted_data) return plaintext From 878e83c5a4c84c7abbf3484366e76fbe906c8947 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Wed, 28 Aug 2013 16:04:48 +0200 Subject: [PATCH 20/36] YoupornIE: Clean up extraction of hd video --- youtube_dl/extractor/youporn.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 19360e273..c85fd4b5a 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -5,7 +5,6 @@ import sys from .common import InfoExtractor from ..utils import ( - compat_str, compat_urllib_parse_urlparse, compat_urllib_request, @@ -79,14 +78,11 @@ class YouPornIE(InfoExtractor): LINK_RE = r'(?s)' links = re.findall(LINK_RE, download_list_html) - # Get link of hd video - encrypted_video_url = self._html_search_regex( - r'var encrypted(?:Quality[0-9]+)?URL = \'(?P[a-zA-Z0-9+/]+={0,2})\';', - webpage, u'encrypted_video_url') - video_url = aes_decrypt_text(encrypted_video_url, video_title, 32) - print(video_url) - assert isinstance(video_url, compat_str) - if video_url.split('/')[6].split('_')[0] == u'720p': # only add if 720p to avoid duplicates + # Get link of hd video if available + mobj = re.search(r'var encryptedQuality720URL = \'(?P[a-zA-Z0-9+/]+={0,2})\';', webpage) + if mobj != None: + encrypted_video_url = mobj.group(u'encrypted_video_url') + video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8') links = [video_url] + links if not links: From 2891932bf0a01acc025246438f890dca57f91c6b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 28 Aug 2013 19:00:17 +0200 Subject: [PATCH 21/36] release 2013.08.28.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0b56e48dc..2ba75258d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.28' +__version__ = '2013.08.28.1' From b5ba7b9dcfed5ded96c841a0ebbbf12132de838f Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Wed, 28 Aug 2013 14:00:59 -0500 Subject: [PATCH 22/36] Fix MIT extractor for Python 2.6 The HTML for the MIT page does not parse cleanly for Python 2.6 due to script tags within an actual script element. The offending piece is inside a comment block, so removing all such comment blocks fixes the parsing. --- youtube_dl/extractor/mit.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index d09d03e36..52be9232f 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -25,23 +25,21 @@ class TechTVMITIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage( + raw_page = self._download_webpage( 'http://techtv.mit.edu/videos/%s' % video_id, video_id) - embed_page = self._download_webpage( - 'http://techtv.mit.edu/embeds/%s/' % video_id, video_id, - note=u'Downloading embed page') + clean_page = re.compile(u'', re.S).sub(u'', raw_page) base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', - embed_page, u'base url') - formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page, + raw_page, u'base url') + formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, u'video formats') formats = json.loads(formats_json) formats = sorted(formats, key=lambda f: f['bitrate']) - title = get_element_by_id('edit-title', webpage) - description = clean_html(get_element_by_id('edit-description', webpage)) + title = get_element_by_id('edit-title', clean_page) + description = clean_html(get_element_by_id('edit-description', clean_page)) thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', - embed_page, u'thumbnail', flags=re.DOTALL) + raw_page, u'thumbnail', flags=re.DOTALL) return {'id': video_id, 'title': title, From 0d75ae2ce313c5738b2bdd9602ab3cc15e78810d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Aug 2013 11:35:15 +0200 Subject: [PATCH 23/36] Fix detection of the webpage charset if it's declared using ' instead of " Like in "" --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a2986cebe..77726ee24 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -150,7 +150,7 @@ class InfoExtractor(object): if m: encoding = m.group(1) else: - m = re.search(br']+charset="?([^"]+)[ /">]', + m = re.search(br']+charset=[\'"]?([^\'")]+)[ /\'">]', webpage_bytes[:1024]) if m: encoding = m.group(1).decode('ascii') From b7052e508787e49aa1e141a15f16284bbf1f634b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Aug 2013 12:15:45 +0200 Subject: [PATCH 24/36] Also print the field that fails if it is a md5 checksum --- test/test_download.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index 21cb2e694..23a66254d 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -127,12 +127,11 @@ def generator(test_case): info_dict = json.load(infof) for (info_field, expected) in tc.get('info_dict', {}).items(): if isinstance(expected, compat_str) and expected.startswith('md5:'): - self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field))) + got = 'md5:' + md5(info_dict.get(info_field)) else: got = info_dict.get(info_field) - self.assertEqual( - expected, got, - u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) + self.assertEqual(expected, got, + u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) # If checkable fields are missing from the test case, print the info_dict test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) From c7bf7366bc0d4d1c4fc9c81ee5d33bf3c3512aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Aug 2013 13:41:59 +0200 Subject: [PATCH 25/36] Update descriptions checksum for some test for Unistra and Youtube --- youtube_dl/extractor/unistra.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index 5ba0a9061..516e18914 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor): u'md5': u'736f605cfdc96724d55bb543ab3ced24', u'info_dict': { u'title': u'M!ss Yella', - u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc', + u'description': u'md5:104892c71bd48e55d70b902736b81bbf', }, } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8e486afd0..4038af256 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -335,7 +335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): u"info_dict": { u"upload_date": u"20120506", u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", - u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", + u"description": u"md5:3e2666e0a55044490499ea45fe9037b7", u"uploader": u"Icona Pop", u"uploader_id": u"IconaPop" } From 545434670b7b055a7f0ff82b76ee7acbb3d07dd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Aug 2013 19:16:07 +0200 Subject: [PATCH 26/36] Add an extractor for orf.at (closes #1346) Make find_xpath_attr also accept numbers in the value --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/orf.py | 65 ++++++++++++++++++++++++++++++++ youtube_dl/utils.py | 2 +- 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/orf.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6b5037c8c..90f1a4418 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -59,6 +59,7 @@ from .myvideo import MyVideoIE from .nba import NBAIE from .nbc import NBCNewsIE from .ooyala import OoyalaIE +from .orf import ORFIE from .pbs import PBSIE from .photobucket import PhotobucketIE from .pornotube import PornotubeIE diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py new file mode 100644 index 000000000..8da0a2c8e --- /dev/null +++ b/youtube_dl/extractor/orf.py @@ -0,0 +1,65 @@ +import re +import xml.etree.ElementTree +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + ExtractorError, + find_xpath_attr, +) + +class ORFIE(InfoExtractor): + _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P\d+)' + + _TEST = { + u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter', + u'file': u'6566957.flv', + u'info_dict': { + u'title': u'Wetter', + u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at', + }, + u'params': { + # It uses rtmp + u'skip_download': True, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + webpage = self._download_webpage(url, playlist_id) + + flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml') + flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0] + flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8')) + playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"') + playlist = json.loads(playlist_json) + + videos = [] + ns = '{http://tempuri.org/XMLSchema.xsd}' + xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} + webpage_description = self._og_search_description(webpage) + for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): + # Get best quality url + rtmp_url = None + for q in ['Q6A', 'Q4A', 'Q1A']: + video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) + if video_url is not None: + rtmp_url = video_url.text + break + if rtmp_url is None: + raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) + description = self._html_search_regex( + r'id="playlist_entry_%s".*?

(.*?)

' % i, webpage, + u'description', default=webpage_description, flags=re.DOTALL) + videos.append({ + '_type': 'video', + 'id': info['id'], + 'title': info['title'], + 'url': rtmp_url, + 'ext': 'flv', + 'description': description, + }) + + return videos diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b3d0f64ea..201802cee 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -213,7 +213,7 @@ if sys.version_info >= (2,7): def find_xpath_attr(node, xpath, key, val): """ Find the xpath xpath[@key=val] """ assert re.match(r'^[a-zA-Z]+$', key) - assert re.match(r'^[a-zA-Z@\s]*$', val) + assert re.match(r'^[a-zA-Z0-9@\s]*$', val) expr = xpath + u"[@%s='%s']" % (key, val) return node.find(expr) else: From 8928491074095ec4da84be9c7d5ff4f1c0f98400 Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Thu, 29 Aug 2013 12:51:38 -0500 Subject: [PATCH 27/36] Fix orf.at extractor by adding file coding mark --- youtube_dl/extractor/orf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 8da0a2c8e..41ef8e992 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -1,3 +1,5 @@ +# coding: utf-8 + import re import xml.etree.ElementTree import json From f1fb2d12b32910c641f27096e585513c5f97f9ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Aug 2013 21:39:36 +0200 Subject: [PATCH 28/36] [ign] extract videos from articles pages --- youtube_dl/extractor/ign.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 62abab655..263959716 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -13,7 +13,7 @@ class IGNIE(InfoExtractor): Some videos of it.ign.com are also supported """ - _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P.+)' + _VALID_URL = r'https?://.+?\.ign\.com/(?Pvideos|show_videos|articles)(/.+)?/(?P.+)' IE_NAME = u'ign.com' _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' @@ -41,7 +41,11 @@ class IGNIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name_or_id = mobj.group('name_or_id') + page_type = mobj.group('type') webpage = self._download_webpage(url, name_or_id) + if page_type == 'articles': + video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url') + return self.url_result(video_url, ie='IGN') video_id = self._find_video_id(webpage) result = self._get_video_info(video_id) description = self._html_search_regex(self._DESCRIPTION_RE, From ee80d66727d7b194e595fa7e0c19c40cc4adb408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Aug 2013 21:51:09 +0200 Subject: [PATCH 29/36] [ign] update 1up extractor to work with the updated IGNIE --- youtube_dl/extractor/ign.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 263959716..b1c84278a 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -72,7 +72,7 @@ class IGNIE(InfoExtractor): class OneUPIE(IGNIE): """Extractor for 1up.com, it uses the ign videos system.""" - _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P.+)' + _VALID_URL = r'https?://gamevideos.1up.com/(?Pvideo)/id/(?P.+)' IE_NAME = '1up.com' _DESCRIPTION_RE = r'
(.+?)
' From 52e1eea18bae4771137abd888830036c40b6eaa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Aug 2013 22:33:58 +0200 Subject: [PATCH 30/36] [youtube] update algo for length 86 (fixes #1349) --- devscripts/youtube_genalgo.py | 4 ++-- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 917e8f79d..b06416e93 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -20,9 +20,9 @@ tests = [ # 87 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), - # 86 - vflh9ybst 2013/08/23 + # 86 - vflg0g8PQ 2013/08/29 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", - "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), + ">/?;}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), # 85 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4038af256..3a07df027 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -423,7 +423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif len(s) == 87: return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] elif len(s) == 86: - return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86] + return s[83:36:-1] + s[0] + s[35:2:-1] elif len(s) == 85: return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] elif len(s) == 84: From 23b00bc0e4ae7d85876409fad59d95ce29b00d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 29 Aug 2013 22:44:29 +0200 Subject: [PATCH 31/36] [youtube] update algo for length 84 Only appears sometimes, nearly identical to length 86. --- devscripts/youtube_genalgo.py | 4 ++-- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index b06416e93..13df535c7 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -26,9 +26,9 @@ tests = [ # 85 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), - # 84 - vflh9ybst 2013/08/23 (sporadic) + # 84 - vflg0g8PQ 2013/08/29 (sporadic) ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", - "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"), + ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), # 83 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3a07df027..9e2373bd5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif len(s) == 85: return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] elif len(s) == 84: - return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84] + return s[81:36:-1] + s[0] + s[35:2:-1] elif len(s) == 83: return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: From 3243d0f7b669128c91c64816a9ca3502ae4e4094 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 29 Aug 2013 23:29:34 +0200 Subject: [PATCH 32/36] release 2013.08.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2ba75258d..c28320181 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.28.1' +__version__ = '2013.08.29' From 9193c1eededf996378307a0c60bb1e7eeaa1b9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 30 Aug 2013 20:11:53 +0200 Subject: [PATCH 33/36] Add youtube keywords to the bash completion script --- devscripts/bash-completion.in | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in index 3b99a9614..bd10f63c2 100644 --- a/devscripts/bash-completion.in +++ b/devscripts/bash-completion.in @@ -4,8 +4,12 @@ __youtube-dl() COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" opts="{{flags}}" + keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater" - if [[ ${cur} == * ]] ; then + if [[ ${cur} =~ : ]]; then + COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) + return 0 + elif [[ ${cur} == * ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) return 0 fi From c7a7750d3b825e3f7aa9cd3617a76d2f56c1387d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 30 Aug 2013 20:13:05 +0200 Subject: [PATCH 34/36] [youtube] Fix typo in the _VALID_URL for YoutubeFavouritesIE, it was intended to also match :ytfavourites --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9e2373bd5..00e2b320a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1161,7 +1161,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = u'youtube:favorites' IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?' + _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' _LOGIN_REQUIRED = True def _real_extract(self, url): From 2e756879f156139c68c3557bc65a7ab1ac31137d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 30 Aug 2013 20:49:51 +0200 Subject: [PATCH 35/36] [youtube] update algo for length 86 --- devscripts/youtube_genalgo.py | 4 ++-- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index 13df535c7..97a0d7290 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -20,9 +20,9 @@ tests = [ # 87 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), - # 86 - vflg0g8PQ 2013/08/29 + # 86 - vflHOr_nV 2013/08/30 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", - ">/?;}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), + "?;}|[{=+._)(*&^%$#@!MNBqCXZASDFGHJKLPOIUYTREWQ<987654321mnbvcxzasdfghjklpoiuytrew"), # 85 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 00e2b320a..810ce6f5d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -423,7 +423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif len(s) == 87: return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] elif len(s) == 86: - return s[83:36:-1] + s[0] + s[35:2:-1] + return s[81:73:-1] + s[84] + s[72:58:-1] + s[0] + s[57:35:-1] + s[85] + s[34:0:-1] elif len(s) == 85: return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] elif len(s) == 84: From 10f5c016ec6262e5d29327e97fe4f3d1127ccdff Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 30 Aug 2013 21:02:07 +0200 Subject: [PATCH 36/36] release 2013.08.30 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c28320181..b6284c6d6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.29' +__version__ = '2013.08.30'