Browse Source

[extractor/common] Improve _form_hidden_inputs and rename to _hidden_inputs

totalwebcasting
Sergey M․ 10 years ago
parent
commit
f8da79f828
11 changed files with 21 additions and 14 deletions
  1. +11
    -4
      youtube_dl/extractor/common.py
  2. +1
    -1
      youtube_dl/extractor/gorillavid.py
  3. +1
    -1
      youtube_dl/extractor/hostingbulk.py
  4. +1
    -1
      youtube_dl/extractor/played.py
  5. +1
    -1
      youtube_dl/extractor/primesharetv.py
  6. +1
    -1
      youtube_dl/extractor/promptfile.py
  7. +1
    -1
      youtube_dl/extractor/shared.py
  8. +1
    -1
      youtube_dl/extractor/twitch.py
  9. +1
    -1
      youtube_dl/extractor/vimeo.py
  10. +1
    -1
      youtube_dl/extractor/vk.py
  11. +1
    -1
      youtube_dl/extractor/vodlocker.py

+ 11
- 4
youtube_dl/extractor/common.py View File

@ -706,10 +706,17 @@ class InfoExtractor(object):
'twitter card player') 'twitter card player')
@staticmethod @staticmethod
def _form_hidden_inputs(html):
return dict(re.findall(
r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
html))
def _hidden_inputs(html):
return dict([
(input.group('name'), input.group('value')) for input in re.finditer(
r'''(?x)
<input\s+
type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
(?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
''', html)
])
def _sort_formats(self, formats, field_preference=None): def _sort_formats(self, formats, field_preference=None):
if not formats: if not formats:


+ 1
- 1
youtube_dl/extractor/gorillavid.py View File

@ -78,7 +78,7 @@ class GorillaVidIE(InfoExtractor):
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
fields = self._form_hidden_inputs(webpage)
fields = self._hidden_inputs(webpage)
if fields['op'] == 'download1': if fields['op'] == 'download1':
countdown = int_or_none(self._search_regex( countdown = int_or_none(self._search_regex(


+ 1
- 1
youtube_dl/extractor/hostingbulk.py View File

@ -58,7 +58,7 @@ class HostingBulkIE(InfoExtractor):
r'<img src="([^"]+)".+?class="pic"', r'<img src="([^"]+)".+?class="pic"',
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
fields = self._form_hidden_inputs(webpage)
fields = self._hidden_inputs(webpage)
request = compat_urllib_request.Request(url, urlencode_postdata(fields)) request = compat_urllib_request.Request(url, urlencode_postdata(fields))
request.add_header('Content-type', 'application/x-www-form-urlencoded') request.add_header('Content-type', 'application/x-www-form-urlencoded')


+ 1
- 1
youtube_dl/extractor/played.py View File

@ -38,7 +38,7 @@ class PlayedIE(InfoExtractor):
if m_error: if m_error:
raise ExtractorError(m_error.group('msg'), expected=True) raise ExtractorError(m_error.group('msg'), expected=True)
data = self._form_hidden_inputs(orig_webpage)
data = self._hidden_inputs(orig_webpage)
self._sleep(2, video_id) self._sleep(2, video_id)


+ 1
- 1
youtube_dl/extractor/primesharetv.py View File

@ -29,7 +29,7 @@ class PrimeShareTVIE(InfoExtractor):
if '>File not exist<' in webpage: if '>File not exist<' in webpage:
raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
fields = self._form_hidden_inputs(webpage)
fields = self._hidden_inputs(webpage)
headers = { headers = {
'Referer': url, 'Referer': url,


+ 1
- 1
youtube_dl/extractor/promptfile.py View File

@ -35,7 +35,7 @@ class PromptFileIE(InfoExtractor):
raise ExtractorError('Video %s does not exist' % video_id, raise ExtractorError('Video %s does not exist' % video_id,
expected=True) expected=True)
fields = self._form_hidden_inputs(webpage)
fields = self._hidden_inputs(webpage)
post = compat_urllib_parse.urlencode(fields) post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post) req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('Content-type', 'application/x-www-form-urlencoded')


+ 1
- 1
youtube_dl/extractor/shared.py View File

@ -34,7 +34,7 @@ class SharedIE(InfoExtractor):
raise ExtractorError( raise ExtractorError(
'Video %s does not exist' % video_id, expected=True) 'Video %s does not exist' % video_id, expected=True)
download_form = self._form_hidden_inputs(webpage)
download_form = self._hidden_inputs(webpage)
request = compat_urllib_request.Request( request = compat_urllib_request.Request(
url, compat_urllib_parse.urlencode(download_form)) url, compat_urllib_parse.urlencode(download_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')


+ 1
- 1
youtube_dl/extractor/twitch.py View File

@ -59,7 +59,7 @@ class TwitchBaseIE(InfoExtractor):
login_page = self._download_webpage( login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page') self._LOGIN_URL, None, 'Downloading login page')
login_form = self._form_hidden_inputs(login_page)
login_form = self._hidden_inputs(login_page)
login_form.update({ login_form.update({
'login': username.encode('utf-8'), 'login': username.encode('utf-8'),


+ 1
- 1
youtube_dl/extractor/vimeo.py View File

@ -452,7 +452,7 @@ class VimeoChannelIE(InfoExtractor):
password = self._downloader.params.get('videopassword', None) password = self._downloader.params.get('videopassword', None)
if password is None: if password is None:
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
fields = self._form_hidden_inputs(login_form)
fields = self._hidden_inputs(login_form)
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token') token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
fields['token'] = token fields['token'] = token
fields['password'] = password fields['password'] = password


+ 1
- 1
youtube_dl/extractor/vk.py View File

@ -168,7 +168,7 @@ class VKIE(InfoExtractor):
login_page = self._download_webpage( login_page = self._download_webpage(
'https://vk.com', None, 'Downloading login page') 'https://vk.com', None, 'Downloading login page')
login_form = self._form_hidden_inputs(login_page)
login_form = self._hidden_inputs(login_page)
login_form.update({ login_form.update({
'email': username.encode('cp1251'), 'email': username.encode('cp1251'),


+ 1
- 1
youtube_dl/extractor/vodlocker.py View File

@ -26,7 +26,7 @@ class VodlockerIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
fields = self._form_hidden_inputs(webpage)
fields = self._hidden_inputs(webpage)
if fields['op'] == 'download1': if fields['op'] == 'download1':
self._sleep(3, video_id) # they do detect when requests happen too fast! self._sleep(3, video_id) # they do detect when requests happen too fast!


Loading…
Cancel
Save