Browse Source

[utils] Transliterate "þ" as "th" (#20897)

Despite visual similarity "þ" is unrelated to "p".
It is normally transliterated as "th":

    $ echo þ-Þ | iconv -t ASCII//TRANSLIT
    th-TH
master
Jakub Wilk 6 years ago
committed by Sergey M
parent
commit
fd35d8cdfd
2 changed files with 3 additions and 3 deletions
  1. +1
    -1
      test/test_utils.py
  2. +2
    -2
      youtube_dl/utils.py

+ 1
- 1
test/test_utils.py View File

@ -183,7 +183,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')


+ 2
- 2
youtube_dl/utils.py View File

@ -125,8 +125,8 @@ KNOWN_EXTENSIONS = (
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
DATE_FORMATS = (
'%d %B %Y',


Loading…
Cancel
Save