|
@ -6,6 +6,7 @@ import hashlib |
|
|
import json |
|
|
import json |
|
|
import netrc |
|
|
import netrc |
|
|
import os |
|
|
import os |
|
|
|
|
|
import random |
|
|
import re |
|
|
import re |
|
|
import socket |
|
|
import socket |
|
|
import sys |
|
|
import sys |
|
@ -39,6 +40,8 @@ from ..utils import ( |
|
|
ExtractorError, |
|
|
ExtractorError, |
|
|
fix_xml_ampersands, |
|
|
fix_xml_ampersands, |
|
|
float_or_none, |
|
|
float_or_none, |
|
|
|
|
|
GeoRestrictedError, |
|
|
|
|
|
GeoUtils, |
|
|
int_or_none, |
|
|
int_or_none, |
|
|
js_to_json, |
|
|
js_to_json, |
|
|
parse_iso8601, |
|
|
parse_iso8601, |
|
@ -320,17 +323,25 @@ class InfoExtractor(object): |
|
|
_real_extract() methods and define a _VALID_URL regexp. |
|
|
_real_extract() methods and define a _VALID_URL regexp. |
|
|
Probably, they should also be added to the list of extractors. |
|
|
Probably, they should also be added to the list of extractors. |
|
|
|
|
|
|
|
|
|
|
|
_BYPASS_GEO attribute may be set to False in order to disable |
|
|
|
|
|
geo restriction bypass mechanisms for a particular extractor. |
|
|
|
|
|
Though it won't disable explicit geo restriction bypass based on |
|
|
|
|
|
country code provided with bypass_geo_restriction_as_country. |
|
|
|
|
|
|
|
|
Finally, the _WORKING attribute should be set to False for broken IEs |
|
|
Finally, the _WORKING attribute should be set to False for broken IEs |
|
|
in order to warn the users and skip the tests. |
|
|
in order to warn the users and skip the tests. |
|
|
""" |
|
|
""" |
|
|
|
|
|
|
|
|
_ready = False |
|
|
_ready = False |
|
|
_downloader = None |
|
|
_downloader = None |
|
|
|
|
|
_x_forwarded_for_ip = None |
|
|
|
|
|
_BYPASS_GEO = True |
|
|
_WORKING = True |
|
|
_WORKING = True |
|
|
|
|
|
|
|
|
def __init__(self, downloader=None): |
|
|
def __init__(self, downloader=None): |
|
|
"""Constructor. Receives an optional downloader.""" |
|
|
"""Constructor. Receives an optional downloader.""" |
|
|
self._ready = False |
|
|
self._ready = False |
|
|
|
|
|
self._x_forwarded_for_ip = None |
|
|
self.set_downloader(downloader) |
|
|
self.set_downloader(downloader) |
|
|
|
|
|
|
|
|
@classmethod |
|
|
@classmethod |
|
@ -359,6 +370,10 @@ class InfoExtractor(object): |
|
|
|
|
|
|
|
|
def initialize(self): |
|
|
def initialize(self): |
|
|
"""Initializes an instance (authentication, etc).""" |
|
|
"""Initializes an instance (authentication, etc).""" |
|
|
|
|
|
if not self._x_forwarded_for_ip: |
|
|
|
|
|
country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None) |
|
|
|
|
|
if country_code: |
|
|
|
|
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) |
|
|
if not self._ready: |
|
|
if not self._ready: |
|
|
self._real_initialize() |
|
|
self._real_initialize() |
|
|
self._ready = True |
|
|
self._ready = True |
|
@ -366,8 +381,22 @@ class InfoExtractor(object): |
|
|
def extract(self, url): |
|
|
def extract(self, url): |
|
|
"""Extracts URL information and returns it in list of dicts.""" |
|
|
"""Extracts URL information and returns it in list of dicts.""" |
|
|
try: |
|
|
try: |
|
|
self.initialize() |
|
|
|
|
|
return self._real_extract(url) |
|
|
|
|
|
|
|
|
for _ in range(2): |
|
|
|
|
|
try: |
|
|
|
|
|
self.initialize() |
|
|
|
|
|
return self._real_extract(url) |
|
|
|
|
|
except GeoRestrictedError as e: |
|
|
|
|
|
if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and |
|
|
|
|
|
self._BYPASS_GEO and |
|
|
|
|
|
self._downloader.params.get('bypass_geo_restriction', True) and |
|
|
|
|
|
not self._x_forwarded_for_ip and |
|
|
|
|
|
e.countries): |
|
|
|
|
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) |
|
|
|
|
|
if self._x_forwarded_for_ip: |
|
|
|
|
|
self.report_warning( |
|
|
|
|
|
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) |
|
|
|
|
|
continue |
|
|
|
|
|
raise |
|
|
except ExtractorError: |
|
|
except ExtractorError: |
|
|
raise |
|
|
raise |
|
|
except compat_http_client.IncompleteRead as e: |
|
|
except compat_http_client.IncompleteRead as e: |
|
@ -434,6 +463,15 @@ class InfoExtractor(object): |
|
|
if isinstance(url_or_request, (compat_str, str)): |
|
|
if isinstance(url_or_request, (compat_str, str)): |
|
|
url_or_request = url_or_request.partition('#')[0] |
|
|
url_or_request = url_or_request.partition('#')[0] |
|
|
|
|
|
|
|
|
|
|
|
# Some sites check X-Forwarded-For HTTP header in order to figure out |
|
|
|
|
|
# the origin of the client behind proxy. This allows bypassing geo |
|
|
|
|
|
# restriction by faking this header's value to IP that belongs to some |
|
|
|
|
|
# geo unrestricted country. We will do so once we encounter any |
|
|
|
|
|
# geo restriction error. |
|
|
|
|
|
if self._x_forwarded_for_ip: |
|
|
|
|
|
if 'X-Forwarded-For' not in headers: |
|
|
|
|
|
headers['X-Forwarded-For'] = self._x_forwarded_for_ip |
|
|
|
|
|
|
|
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) |
|
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) |
|
|
if urlh is False: |
|
|
if urlh is False: |
|
|
assert not fatal |
|
|
assert not fatal |
|
@ -609,10 +647,8 @@ class InfoExtractor(object): |
|
|
expected=True) |
|
|
expected=True) |
|
|
|
|
|
|
|
|
@staticmethod |
|
|
@staticmethod |
|
|
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'): |
|
|
|
|
|
raise ExtractorError( |
|
|
|
|
|
'%s. You might want to use --proxy to workaround.' % msg, |
|
|
|
|
|
expected=True) |
|
|
|
|
|
|
|
|
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None): |
|
|
|
|
|
raise GeoRestrictedError(msg, countries=countries) |
|
|
|
|
|
|
|
|
# Methods for following #608 |
|
|
# Methods for following #608 |
|
|
@staticmethod |
|
|
@staticmethod |
|
|