Browse Source

[commonmistakes] Detect BOMs at the beginning of URLs

Reported at https://bugzilla.redhat.com/show_bug.cgi?id=1093517 .
totalwebcasting
Philipp Hagemeister 10 years ago
parent
commit
c73fae1e2e
2 changed files with 18 additions and 1 deletions
  1. +1
    -1
      youtube_dl/extractor/__init__.py
  2. +17
    -0
      youtube_dl/extractor/commonmistakes.py

+ 1
- 1
youtube_dl/extractor/__init__.py View File

@ -74,7 +74,7 @@ from .collegehumor import CollegeHumorIE
from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE


+ 17
- 0
youtube_dl/extractor/commonmistakes.py View File

@ -27,3 +27,20 @@ class CommonMistakesIE(InfoExtractor):
if not self._downloader.params.get('verbose'):
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
raise ExtractorError(msg, expected=True)
class UnicodeBOMIE(InfoExtractor):
IE_DESC = False
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
_TESTS = [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True,
}]
def _real_extract(self, url):
real_url = self._match_id(url)
self.report_warning(
'Your URL starts with a Byte Order Mark (BOM). '
'Removing the BOM and looking for "%s" ...' % real_url)
return self.url_result(real_url)

Loading…
Cancel
Save