[commonmistakes] Detect BOMs at the beginning of URLs

Reported at https://bugzilla.redhat.com/show_bug.cgi?id=1093517 .
10 years ago · c73fae1e2e
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -74,7 +74,7 @@ from .collegehumor import CollegeHumorIE
 from .collegerama import CollegeRamaIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
 from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .criterion import CriterionIE
--- a/youtube_dl/extractor/commonmistakes.py
+++ b/youtube_dl/extractor/commonmistakes.py
@ -27,3 +27,20 @@ class CommonMistakesIE(InfoExtractor):
        if not self._downloader.params.get('verbose'):
            msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
        raise ExtractorError(msg, expected=True)


 class UnicodeBOMIE(InfoExtractor):
        IE_DESC = False
        _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'

        _TESTS = [{
            'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
            'only_matching': True,
        }]

        def _real_extract(self, url):
            real_url = self._match_id(url)
            self.report_warning(
                'Your URL starts with a Byte Order Mark (BOM). '
                'Removing the BOM and looking for "%s" ...' % real_url)
            return self.url_result(real_url)