Browse Source

Merge branch 'master' into totalwebcasting

totalwebcasting
Filippo Valsorda 7 years ago
committed by GitHub
parent
commit
97bc05116e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
488 changed files with 29324 additions and 10030 deletions
  1. +8
    -8
      .github/ISSUE_TEMPLATE.md
  2. +6
    -6
      .github/ISSUE_TEMPLATE_tmpl.md
  3. +1
    -0
      .github/PULL_REQUEST_TEMPLATE.md
  4. +2
    -1
      .gitignore
  5. +18
    -9
      .travis.yml
  6. +40
    -0
      AUTHORS
  7. +9
    -7
      CONTRIBUTING.md
  8. +1766
    -0
      ChangeLog
  9. +4
    -2
      MANIFEST.in
  10. +24
    -9
      Makefile
  11. +166
    -142
      README.md
  12. +2
    -2
      devscripts/check-porn.py
  13. +5
    -0
      devscripts/install_jython.sh
  14. +2
    -1
      devscripts/make_lazy_extractors.py
  15. +1
    -1
      devscripts/prepare_manpage.py
  16. +22
    -0
      devscripts/run_tests.sh
  17. +157
    -50
      docs/supportedsites.md
  18. +4
    -2
      setup.py
  19. +611
    -1
      test/test_InfoExtractor.py
  20. +90
    -3
      test/test_YoutubeDL.py
  21. +8
    -1
      test/test_aes.py
  22. +3
    -3
      test/test_compat.py
  23. +36
    -7
      test/test_download.py
  24. +26
    -0
      test/test_options.py
  25. +4
    -4
      test/test_subtitles.py
  26. +192
    -4
      test/test_utils.py
  27. +275
    -0
      test/test_youtube_chapters.py
  28. +10
    -0
      test/testdata/f4m/custom_base_url.f4m
  29. +14
    -0
      test/testdata/m3u8/pluzz_francetv_11507.m3u8
  30. +16
    -0
      test/testdata/m3u8/teamcoco_11995.m3u8
  31. +13
    -0
      test/testdata/m3u8/toggle_mobile_12211.m3u8
  32. +20
    -0
      test/testdata/m3u8/twitch_vod.m3u8
  33. +10
    -0
      test/testdata/m3u8/vidio.m3u8
  34. +18
    -0
      test/testdata/mpd/float_duration.mpd
  35. +218
    -0
      test/testdata/mpd/urls_only.mpd
  36. +291
    -85
      youtube_dl/YoutubeDL.py
  37. +30
    -9
      youtube_dl/__init__.py
  38. +28
    -0
      youtube_dl/aes.py
  39. +6
    -3
      youtube_dl/cache.py
  40. +86
    -12
      youtube_dl/compat.py
  41. +3
    -0
      youtube_dl/downloader/__init__.py
  42. +28
    -25
      youtube_dl/downloader/common.py
  43. +19
    -31
      youtube_dl/downloader/dash.py
  44. +37
    -3
      youtube_dl/downloader/external.py
  45. +27
    -31
      youtube_dl/downloader/f4m.py
  46. +124
    -15
      youtube_dl/downloader/fragment.py
  47. +57
    -30
      youtube_dl/downloader/hls.py
  48. +195
    -151
      youtube_dl/downloader/http.py
  49. +10
    -24
      youtube_dl/downloader/ism.py
  50. +1
    -1
      youtube_dl/downloader/rtmp.py
  51. +41
    -9
      youtube_dl/extractor/abc.py
  52. +15
    -5
      youtube_dl/extractor/abcnews.py
  53. +1
    -1
      youtube_dl/extractor/abcotvs.py
  54. +12
    -11
      youtube_dl/extractor/acast.py
  55. +2
    -1
      youtube_dl/extractor/addanime.py
  56. +150
    -0
      youtube_dl/extractor/adn.py
  57. +113
    -18
      youtube_dl/extractor/adobepass.py
  58. +104
    -177
      youtube_dl/extractor/adultswim.py
  59. +37
    -11
      youtube_dl/extractor/aenetworks.py
  60. +175
    -38
      youtube_dl/extractor/afreecatv.py
  61. +10
    -18
      youtube_dl/extractor/airmozilla.py
  62. +53
    -0
      youtube_dl/extractor/aliexpress.py
  63. +6
    -3
      youtube_dl/extractor/aljazeera.py
  64. +33
    -11
      youtube_dl/extractor/allocine.py
  65. +33
    -13
      youtube_dl/extractor/amcnetworks.py
  66. +85
    -0
      youtube_dl/extractor/americastestkitchen.py
  67. +19
    -7
      youtube_dl/extractor/amp.py
  68. +44
    -22
      youtube_dl/extractor/animeondemand.py
  69. +70
    -14
      youtube_dl/extractor/anvato.py
  70. +30
    -19
      youtube_dl/extractor/aparat.py
  71. +2
    -2
      youtube_dl/extractor/appleconnect.py
  72. +5
    -4
      youtube_dl/extractor/appletrailers.py
  73. +4
    -4
      youtube_dl/extractor/archiveorg.py
  74. +16
    -7
      youtube_dl/extractor/ard.py
  75. +1
    -2
      youtube_dl/extractor/arkena.py
  76. +19
    -3
      youtube_dl/extractor/arte.py
  77. +93
    -0
      youtube_dl/extractor/asiancrush.py
  78. +13
    -8
      youtube_dl/extractor/atresplayer.py
  79. +73
    -0
      youtube_dl/extractor/atvat.py
  80. +2
    -2
      youtube_dl/extractor/audioboom.py
  81. +78
    -0
      youtube_dl/extractor/aws.py
  82. +213
    -0
      youtube_dl/extractor/azmedien.py
  83. +0
    -140
      youtube_dl/extractor/azubu.py
  84. +1
    -1
      youtube_dl/extractor/bambuser.py
  85. +128
    -11
      youtube_dl/extractor/bandcamp.py
  86. +74
    -9
      youtube_dl/extractor/bbc.py
  87. +188
    -0
      youtube_dl/extractor/beampro.py
  88. +13
    -6
      youtube_dl/extractor/beeg.py
  89. +10
    -2
      youtube_dl/extractor/bellmedia.py
  90. +149
    -13
      youtube_dl/extractor/bilibili.py
  91. +3
    -7
      youtube_dl/extractor/bleacherreport.py
  92. +8
    -3
      youtube_dl/extractor/bloomberg.py
  93. +72
    -0
      youtube_dl/extractor/bostonglobe.py
  94. +9
    -4
      youtube_dl/extractor/bpb.py
  95. +144
    -4
      youtube_dl/extractor/br.py
  96. +138
    -62
      youtube_dl/extractor/brightcove.py
  97. +4
    -3
      youtube_dl/extractor/buzzfeed.py
  98. +18
    -53
      youtube_dl/extractor/byutv.py
  99. +1
    -4
      youtube_dl/extractor/canalc2.py
  100. +40
    -20
      youtube_dl/extractor/canalplus.py

+ 8
- 8
.github/ISSUE_TEMPLATE.md View File

@ -1,16 +1,16 @@
## Please follow the guide below ## Please follow the guide below
- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
- Use *Preview* tab to see how your issue will actually look like
- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
- Use the *Preview* tab to see what your issue will actually look like
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.10**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.31*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.31**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
### What is the purpose of your *issue*? ### What is the purpose of your *issue*?
@ -28,14 +28,14 @@
### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
``` ```
$ youtube-dl -v <your command line>
[debug] System config: [] [debug] System config: []
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.01.10
[debug] youtube-dl version 2017.12.31
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}


+ 6
- 6
.github/ISSUE_TEMPLATE_tmpl.md View File

@ -1,16 +1,16 @@
## Please follow the guide below ## Please follow the guide below
- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly - You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x])
- Use *Preview* tab to see how your issue will actually look like
- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
- Use the *Preview* tab to see what your issue will actually look like
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
### What is the purpose of your *issue*? ### What is the purpose of your *issue*?
@ -28,9 +28,9 @@
### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: ### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
``` ```
$ youtube-dl -v <your command line>
[debug] System config: [] [debug] System config: []
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']


+ 1
- 0
.github/PULL_REQUEST_TEMPLATE.md View File

@ -9,6 +9,7 @@
### Before submitting a *pull request* make sure you have: ### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)


+ 2
- 1
.gitignore View File

@ -22,6 +22,7 @@ cover/
updates_key.pem updates_key.pem
*.egg-info *.egg-info
*.srt *.srt
*.ttml
*.sbv *.sbv
*.vtt *.vtt
*.flv *.flv
@ -35,8 +36,8 @@ updates_key.pem
*.mkv *.mkv
*.swf *.swf
*.part *.part
*.ytdl
*.swp *.swp
test/testdata
test/local_parameters.json test/local_parameters.json
.tox .tox
youtube-dl.zsh youtube-dl.zsh


+ 18
- 9
.travis.yml View File

@ -6,13 +6,22 @@ python:
- "3.3" - "3.3"
- "3.4" - "3.4"
- "3.5" - "3.5"
- "3.6"
- "pypy"
- "pypy3"
sudo: false sudo: false
script: nosetests test --verbose
notifications:
email:
- filippo.valsorda@gmail.com
- yasoob.khld@gmail.com
# irc:
# channels:
# - "irc.freenode.org#youtube-dl"
# skip_join: true
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
matrix:
include:
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
before_install:
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
script: ./devscripts/run_tests.sh

+ 40
- 0
AUTHORS View File

@ -191,3 +191,43 @@ Rich Leeper
Zhong Jianxin Zhong Jianxin
Thor77 Thor77
Mattias Wadman Mattias Wadman
Arjan Verwer
Costy Petrisor
Logan B
Alex Seiler
Vijay Singh
Paul Hartmann
Stephen Chen
Fabian Stahl
Bagira
Odd Stråbø
Philip Herzog
Thomas Christlieb
Marek Rusinowski
Tobias Gruetzmacher
Olivier Bilodeau
Lars Vierbergen
Juanjo Benages
Xiao Di Guan
Thomas Winant
Daniel Twardowski
Jeremie Jarosh
Gerard Rovira
Marvin Ewald
Frédéric Bournival
Timendum
gritstub
Adam Voss
Mike Fährmann
Jan Kundrát
Giuseppe Fabiano
Örn Guðjónsson
Parmjit Virk
Genki Sky
Ľuboš Katrinec
Corey Nicholson
Ashutosh Chaudhary
John Dong
Tatsuyuki Ishi
Daniel Weber
Kay Bouché

+ 9
- 7
CONTRIBUTING.md View File

@ -3,7 +3,7 @@
$ youtube-dl -v <your command line> $ youtube-dl -v <your command line>
[debug] System config: [] [debug] System config: []
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06 [debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e [debug] Git HEAD: 135392e
@ -34,7 +34,7 @@ For bug reports, this means that your report should contain the *complete* outpu
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
### Are you using the latest version? ### Are you using the latest version?
@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t
# DEVELOPER INSTRUCTIONS # DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py python test/test_download.py
nosetests nosetests
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need If you want to create a build of youtube-dl yourself, you'll need
* python * python
@ -118,7 +120,7 @@ After you have ensured this site is distributing its content legally, you can fo
class YourExtractorIE(InfoExtractor): class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://yourextractor.com/watch/42',
'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': { 'info_dict': {
'id': '42', 'id': '42',
@ -149,10 +151,10 @@ After you have ensured this site is distributing its content legally, you can fo
} }
``` ```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py $ git add youtube_dl/extractor/yourextractor.py


+ 1766
- 0
ChangeLog
File diff suppressed because it is too large
View File


+ 4
- 2
MANIFEST.in View File

@ -1,7 +1,9 @@
include README.md include README.md
include test/*.py
include test/*.json
include LICENSE
include AUTHORS
include ChangeLog
include youtube-dl.bash-completion include youtube-dl.bash-completion
include youtube-dl.fish include youtube-dl.fish
include youtube-dl.1 include youtube-dl.1
recursive-include docs Makefile conf.py *.rst recursive-include docs Makefile conf.py *.rst
recursive-include test *

+ 24
- 9
Makefile View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete find . -name "*.pyc" -delete
find . -name "*.class" -delete find . -name "*.class" -delete
@ -36,8 +36,17 @@ test:
ot: offlinetest ot: offlinetest
# Keep this list in sync with devscripts/run_tests.sh
offlinetest: codetest offlinetest: codetest
$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py
$(PYTHON) -m nose --verbose test \
--exclude test_age_restriction.py \
--exclude test_download.py \
--exclude test_iqiyi_sdk_interpreter.py \
--exclude test_socks.py \
--exclude test_subtitles.py \
--exclude test_write_annotations.py \
--exclude test_youtube_lists.py \
--exclude test_youtube_signature.py
tar: youtube-dl.tar.gz tar: youtube-dl.tar.gz
@ -46,8 +55,15 @@ tar: youtube-dl.tar.gz
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
mkdir -p zip
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\
done
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
mv zip/youtube_dl/__main__.py zip/
cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
rm -rf zip
echo '#!$(PYTHON)' > youtube-dl echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip rm youtube-dl.zip
@ -94,20 +110,19 @@ _EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -in
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@ $(PYTHON) devscripts/make_lazy_extractors.py $@
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \ --exclude '*.DS_Store' \
--exclude '*.kate-swp' \ --exclude '*.kate-swp' \
--exclude '*.pyc' \ --exclude '*.pyc' \
--exclude '*.pyo' \ --exclude '*.pyo' \
--exclude '*~' \ --exclude '*~' \
--exclude '__pycache' \
--exclude '__pycache__' \
--exclude '.git' \ --exclude '.git' \
--exclude 'testdata' \
--exclude 'docs/_build' \ --exclude 'docs/_build' \
-- \ -- \
bin devscripts test youtube_dl docs \ bin devscripts test youtube_dl docs \
ChangeLog LICENSE README.md README.txt \
ChangeLog AUTHORS LICENSE README.md README.txt \
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
youtube-dl.zsh youtube-dl.fish setup.py \
youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \
youtube-dl youtube-dl

+ 166
- 142
README.md View File

@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
youtube-dl - download videos from youtube.com or other video platforms youtube-dl - download videos from youtube.com or other video platforms
- [INSTALLATION](#installation) - [INSTALLATION](#installation)
@ -25,7 +27,7 @@ If you do not have curl, you can alternatively use a recent wget:
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+rx /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
You can also use pip: You can also use pip:
@ -33,7 +35,7 @@ You can also use pip:
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
brew install youtube-dl brew install youtube-dl
@ -88,8 +90,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--mark-watched Mark videos watched (YouTube only) --mark-watched Mark videos watched (YouTube only)
--no-mark-watched Do not mark videos watched (YouTube only) --no-mark-watched Do not mark videos watched (YouTube only)
--no-color Do not emit color codes in output --no-color Do not emit color codes in output
--abort-on-unavailable-fragment Abort downloading when some fragment is not
available
## Network Options: ## Network Options:
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
@ -99,16 +99,23 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
string (--proxy "") for direct connection string (--proxy "") for direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds --socket-timeout SECONDS Time to wait before giving up, in seconds
--source-address IP Client-side IP address to bind to --source-address IP Client-side IP address to bind to
(experimental)
-4, --force-ipv4 Make all connections via IPv4 -4, --force-ipv4 Make all connections via IPv4
(experimental)
-6, --force-ipv6 Make all connections via IPv6 -6, --force-ipv6 Make all connections via IPv6
(experimental)
## Geo Restriction:
--geo-verification-proxy URL Use this proxy to verify the IP address for --geo-verification-proxy URL Use this proxy to verify the IP address for
some geo-restricted sites. The default some geo-restricted sites. The default
proxy specified by --proxy (or none, if the proxy specified by --proxy (or none, if the
options is not present) is used for the options is not present) is used for the
actual downloading. (experimental)
actual downloading.
--geo-bypass Bypass geographic restriction via faking
X-Forwarded-For HTTP header (experimental)
--no-geo-bypass Do not bypass geographic restriction via
faking X-Forwarded-For HTTP header
(experimental)
--geo-bypass-country CODE Force bypass geographic restriction with
explicitly provided two-letter ISO 3166-2
country code (experimental)
## Video Selection: ## Video Selection:
--playlist-start NUMBER Playlist video to start at (default is 1) --playlist-start NUMBER Playlist video to start at (default is 1)
@ -139,17 +146,19 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
COUNT views COUNT views
--max-views COUNT Do not download any videos with more than --max-views COUNT Do not download any videos with more than
COUNT views COUNT views
--match-filter FILTER Generic video filter (experimental).
Specify any key (see help for -o for a list
of available keys) to match if the key is
--match-filter FILTER Generic video filter. Specify any key (see
the "OUTPUT TEMPLATE" for a list of
available keys) to match if the key is
present, !key to check if the key is not present, !key to check if the key is not
present,key > NUMBER (like "comment_count >
12", also works with >=, <, <=, !=, =) to
compare against a number, and & to require
multiple matches. Values which are not
known are excluded unless you put a
question mark (?) after the operator.For
example, to only match videos that have
present, key > NUMBER (like "comment_count
> 12", also works with >=, <, <=, !=, =) to
compare against a number, key = 'LITERAL'
(like "uploader = 'Mike Smith'", also works
with !=) to match against a string literal
and & to require multiple matches. Values
which are not known are excluded unless you
put a question mark (?) after the operator.
For example, to only match videos that have
been liked more than 100 times and disliked been liked more than 100 times and disliked
less than 50 times (or the dislike less than 50 times (or the dislike
functionality is not available at the given functionality is not available at the given
@ -174,10 +183,15 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
-R, --retries RETRIES Number of retries (default is 10), or -R, --retries RETRIES Number of retries (default is 10), or
"infinite". "infinite".
--fragment-retries RETRIES Number of retries for a fragment (default --fragment-retries RETRIES Number of retries for a fragment (default
is 10), or "infinite" (DASH and hlsnative
only)
--skip-unavailable-fragments Skip unavailable fragments (DASH and
hlsnative only)
is 10), or "infinite" (DASH, hlsnative and
ISM)
--skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative
and ISM)
--abort-on-unavailable-fragment Abort downloading when some fragment is not
available
--keep-fragments Keep downloaded fragments on disk after
downloading is finished; fragments are
erased by default
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
(default is 1024) (default is 1024)
--no-resize-buffer Do not automatically adjust the buffer --no-resize-buffer Do not automatically adjust the buffer
@ -185,6 +199,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
automatically resized from an initial value automatically resized from an initial value
of SIZE. of SIZE.
--playlist-reverse Download playlist videos in reverse order --playlist-reverse Download playlist videos in reverse order
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with --xattr-set-filesize Set file xattribute ytdl.filesize with
expected file size (experimental) expected file size (experimental)
--hls-prefer-native Use the native HLS downloader instead of --hls-prefer-native Use the native HLS downloader instead of
@ -207,19 +222,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--id Use only video ID in file name --id Use only video ID in file name
-o, --output TEMPLATE Output filename template, see the "OUTPUT -o, --output TEMPLATE Output filename template, see the "OUTPUT
TEMPLATE" for all the info TEMPLATE" for all the info
--autonumber-size NUMBER Specify the number of digits in
%(autonumber)s when it is present in output
filename template or --auto-number option
is given
--autonumber-start NUMBER Specify the start value for %(autonumber)s
(default is 1)
--restrict-filenames Restrict filenames to only ASCII --restrict-filenames Restrict filenames to only ASCII
characters, and avoid "&" and spaces in characters, and avoid "&" and spaces in
filenames filenames
-A, --auto-number [deprecated; use -o
"%(autonumber)s-%(title)s.%(ext)s" ] Number
downloaded files starting from 00000
-t, --title [deprecated] Use title in file name
(default)
-l, --literal [deprecated] Alias of --title
-w, --no-overwrites Do not overwrite files -w, --no-overwrites Do not overwrite files
-c, --continue Force resume of partially downloaded files. -c, --continue Force resume of partially downloaded files.
By default, youtube-dl will resume By default, youtube-dl will resume
@ -272,8 +279,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--get-filename Simulate, quiet but print output filename --get-filename Simulate, quiet but print output filename
--get-format Simulate, quiet but print output format --get-format Simulate, quiet but print output format
-j, --dump-json Simulate, quiet but print JSON information. -j, --dump-json Simulate, quiet but print JSON information.
See --output for a description of available
keys.
See the "OUTPUT TEMPLATE" for a description
of available keys.
-J, --dump-single-json Simulate, quiet but print JSON information -J, --dump-single-json Simulate, quiet but print JSON information
for each command-line argument. If the URL for each command-line argument. If the URL
refers to a playlist, dump the whole refers to a playlist, dump the whole
@ -373,8 +380,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
(requires ffmpeg or avconv and ffprobe or (requires ffmpeg or avconv and ffprobe or
avprobe) avprobe)
--audio-format FORMAT Specify audio format: "best", "aac", --audio-format FORMAT Specify audio format: "best", "aac",
"vorbis", "mp3", "m4a", "opus", or "wav";
"best" by default
"flac", "mp3", "m4a", "opus", "vorbis", or
"wav"; "best" by default; No effect without
-x
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
a value between 0 (better) and 9 (worse) a value between 0 (better) and 9 (worse)
for VBR or a specific bitrate like 128K for VBR or a specific bitrate like 128K
@ -394,12 +402,14 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--add-metadata Write metadata to the video file --add-metadata Write metadata to the video file
--metadata-from-title FORMAT Parse additional metadata like song title / --metadata-from-title FORMAT Parse additional metadata like song title /
artist from the video title. The format artist from the video title. The format
syntax is the same as --output, the parsed
parameters replace existing values.
Additional templates: %(album)s,
%(artist)s. Example: --metadata-from-title
"%(artist)s - %(title)s" matches a title
like "Coldplay - Paradise"
syntax is the same as --output. Regular
expression with named capture groups may
also be used. The parsed parameters replace
existing values. Example: --metadata-from-
title "%(artist)s - %(title)s" matches a
title like "Coldplay - Paradise". Example
(regex): --metadata-from-title
"(?P<artist>.+?) - (?P<title>.+)"
--xattrs Write metadata to the video file's xattrs --xattrs Write metadata to the video file's xattrs
(using dublin core and xdg standards) (using dublin core and xdg standards)
--fixup POLICY Automatically correct known faults of the --fixup POLICY Automatically correct known faults of the
@ -419,7 +429,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
syntax. Example: --exec 'adb push {} syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}' /sdcard/Music/ && rm {}'
--convert-subs FORMAT Convert the subtitles to other format --convert-subs FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt)
(currently supported: srt|ass|vtt|lrc)
# CONFIGURATION # CONFIGURATION
@ -450,7 +460,7 @@ You can also use `--config-location` if you want to use custom configuration fil
### Authentication with `.netrc` file ### Authentication with `.netrc` file
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
``` ```
touch $HOME/.netrc touch $HOME/.netrc
chmod a-rwx,u+rw $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc
@ -466,7 +476,10 @@ machine twitch login my_twitch_account_name password my_twitch_password
``` ```
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration). To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
On Windows you may also need to setup the `%HOME%` environment variable manually.
On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
```
set HOME=%USERPROFILE%
```
# OUTPUT TEMPLATE # OUTPUT TEMPLATE
@ -474,87 +487,96 @@ The `-o` option allows users to indicate a template for the output file names.
**tl;dr:** [navigate me to examples](#output-template-examples). **tl;dr:** [navigate me to examples](#output-template-examples).
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
- `id`: Video identifier
- `title`: Video title
- `url`: Video URL
- `ext`: Video filename extension
- `alt_title`: A secondary title of the video
- `display_id`: An alternative identifier for the video
- `uploader`: Full name of the video uploader
- `license`: License name the video is licensed under
- `creator`: The creator of the video
- `release_date`: The date (YYYYMMDD) when the video was released
- `timestamp`: UNIX timestamp of the moment the video became available
- `upload_date`: Video upload date (YYYYMMDD)
- `uploader_id`: Nickname or id of the video uploader
- `location`: Physical location where the video was filmed
- `duration`: Length of the video in seconds
- `view_count`: How many users have watched the video on the platform
- `like_count`: Number of positive ratings of the video
- `dislike_count`: Number of negative ratings of the video
- `repost_count`: Number of reposts of the video
- `average_rating`: Average rating give by users, the scale used depends on the webpage
- `comment_count`: Number of comments on the video
- `age_limit`: Age restriction for the video (years)
- `format`: A human-readable description of the format
- `format_id`: Format code specified by `--format`
- `format_note`: Additional info about the format
- `width`: Width of the video
- `height`: Height of the video
- `resolution`: Textual description of width and height
- `tbr`: Average bitrate of audio and video in KBit/s
- `abr`: Average audio bitrate in KBit/s
- `acodec`: Name of the audio codec in use
- `asr`: Audio sampling rate in Hertz
- `vbr`: Average video bitrate in KBit/s
- `fps`: Frame rate
- `vcodec`: Name of the video codec in use
- `container`: Name of the container format
- `filesize`: The number of bytes, if known in advance
- `filesize_approx`: An estimate for the number of bytes
- `protocol`: The protocol that will be used for the actual download
- `extractor`: Name of the extractor
- `extractor_key`: Key name of the extractor
- `epoch`: Unix epoch when creating the file
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
- `playlist`: Name or id of the playlist that contains the video
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
- `playlist_id`: Playlist identifier
- `playlist_title`: Playlist title
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
- `id` (string): Video identifier
- `title` (string): Video title
- `url` (string): Video URL
- `ext` (string): Video filename extension
- `alt_title` (string): A secondary title of the video
- `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader
- `license` (string): License name the video is licensed under
- `creator` (string): The creator of the video
- `release_date` (string): The date (YYYYMMDD) when the video was released
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date (YYYYMMDD)
- `uploader_id` (string): Nickname or id of the video uploader
- `location` (string): Physical location where the video was filmed
- `duration` (numeric): Length of the video in seconds
- `view_count` (numeric): How many users have watched the video on the platform
- `like_count` (numeric): Number of positive ratings of the video
- `dislike_count` (numeric): Number of negative ratings of the video
- `repost_count` (numeric): Number of reposts of the video
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
- `comment_count` (numeric): Number of comments on the video
- `age_limit` (numeric): Age restriction for the video (years)
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- `format` (string): A human-readable description of the format
- `format_id` (string): Format code specified by `--format`
- `format_note` (string): Additional info about the format
- `width` (numeric): Width of the video
- `height` (numeric): Height of the video
- `resolution` (string): Textual description of width and height
- `tbr` (numeric): Average bitrate of audio and video in KBit/s
- `abr` (numeric): Average audio bitrate in KBit/s
- `acodec` (string): Name of the audio codec in use
- `asr` (numeric): Audio sampling rate in Hertz
- `vbr` (numeric): Average video bitrate in KBit/s
- `fps` (numeric): Frame rate
- `vcodec` (string): Name of the video codec in use
- `container` (string): Name of the container format
- `filesize` (numeric): The number of bytes, if known in advance
- `filesize_approx` (numeric): An estimate for the number of bytes
- `protocol` (string): The protocol that will be used for the actual download
- `extractor` (string): Name of the extractor
- `extractor_key` (string): Key name of the extractor
- `epoch` (numeric): Unix epoch when creating the file
- `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
- `playlist` (string): Name or id of the playlist that contains the video
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
- `playlist_id` (string): Playlist identifier
- `playlist_title` (string): Playlist title
- `playlist_uploader` (string): Full name of the playlist uploader
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
Available for the video that belongs to some logical chapter or section: Available for the video that belongs to some logical chapter or section:
- `chapter`: Name or title of the chapter the video belongs to
- `chapter_number`: Number of the chapter the video belongs to
- `chapter_id`: Id of the chapter the video belongs to
- `chapter` (string): Name or title of the chapter the video belongs to
- `chapter_number` (numeric): Number of the chapter the video belongs to
- `chapter_id` (string): Id of the chapter the video belongs to
Available for the video that is an episode of some series or programme: Available for the video that is an episode of some series or programme:
- `series`: Title of the series or programme the video episode belongs to
- `season`: Title of the season the video episode belongs to
- `season_number`: Number of the season the video episode belongs to
- `season_id`: Id of the season the video episode belongs to
- `episode`: Title of the video episode
- `episode_number`: Number of the video episode within a season
- `episode_id`: Id of the video episode
- `series` (string): Title of the series or programme the video episode belongs to
- `season` (string): Title of the season the video episode belongs to
- `season_number` (numeric): Number of the season the video episode belongs to
- `season_id` (string): Id of the season the video episode belongs to
- `episode` (string): Title of the video episode
- `episode_number` (numeric): Number of the video episode within a season
- `episode_id` (string): Id of the video episode
Available for the media that is a track or a part of a music album: Available for the media that is a track or a part of a music album:
- `track`: Title of the track
- `track_number`: Number of the track within an album or a disc
- `track_id`: Id of the track
- `artist`: Artist(s) of the track
- `genre`: Genre(s) of the track
- `album`: Title of the album the track belongs to
- `album_type`: Type of the album
- `album_artist`: List of all artists appeared on the album
- `disc_number`: Number of the disc or other physical medium the track belongs to
- `release_year`: Year (YYYY) when the album was released
- `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track
- `artist` (string): Artist(s) of the track
- `genre` (string): Genre(s) of the track
- `album` (string): Title of the album the track belongs to
- `album_type` (string): Type of the album
- `album_artist` (string): List of all artists appeared on the album
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
- `release_year` (numeric): Year (YYYY) when the album was released
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`. Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory. For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
To use percent literals in an output template use `%%`. To output to stdout use `-o -`. To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
@ -569,7 +591,7 @@ If you are using an output template inside a Windows batch file then you must es
#### Output template examples #### Output template examples
Note on Windows you may need to use double quotes instead of single.
Note that on Windows you may need to use double quotes instead of single.
```bash ```bash
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
@ -588,7 +610,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/ $ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos # Download entire series season keeping each series and each season in separate directory under C:/MyVideos
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
# Stream the video being downloaded to stdout # Stream the video being downloaded to stdout
$ youtube-dl -o - BaW_jenozKc $ youtube-dl -o - BaW_jenozKc
@ -639,7 +661,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
- `acodec`: Name of the audio codec in use - `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use - `vcodec`: Name of the video codec in use
- `container`: Name of the container format - `container`: Name of the container format
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- `format_id`: A short description of the format - `format_id`: A short description of the format
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
@ -656,7 +678,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2
#### Format selection examples #### Format selection examples
Note on Windows you may need to use double quotes instead of single.
Note that on Windows you may need to use double quotes instead of single.
```bash ```bash
# Download best mp4 format available or any other best if no mp4 available # Download best mp4 format available or any other best if no mp4 available
@ -701,17 +723,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
### How do I update youtube-dl? ### How do I update youtube-dl?
If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
sudo apt-get remove -y youtube-dl sudo apt-get remove -y youtube-dl
Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
``` ```
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
@ -751,11 +773,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much.
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option. youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
### I have downloaded a video but how can I play it? ### I have downloaded a video but how can I play it?
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser. ### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
@ -830,10 +852,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o
### How do I download a video starting with a `-`? ### How do I download a video starting with a `-`?
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
youtube-dl -- -wNyEUrxzFU youtube-dl -- -wNyEUrxzFU
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
### How do I pass cookies to youtube-dl? ### How do I pass cookies to youtube-dl?
@ -841,15 +863,15 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
### How do I stream directly to media player? ### How do I stream directly to media player?
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
### How do I download only new videos from a playlist? ### How do I download only new videos from a playlist?
@ -869,7 +891,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg. When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case. If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
@ -895,7 +917,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue
### How can I detect whether a given URL is supported by youtube-dl? ### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
@ -909,7 +931,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
# DEVELOPER INSTRUCTIONS # DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@ -921,6 +943,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py python test/test_download.py
nosetests nosetests
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need If you want to create a build of youtube-dl yourself, you'll need
* python * python
@ -957,7 +981,7 @@ After you have ensured this site is distributing its content legally, you can fo
class YourExtractorIE(InfoExtractor): class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://yourextractor.com/watch/42',
'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': { 'info_dict': {
'id': '42', 'id': '42',
@ -988,10 +1012,10 @@ After you have ensured this site is distributing its content legally, you can fo
} }
``` ```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py $ git add youtube_dl/extractor/yourextractor.py
@ -1147,10 +1171,10 @@ import youtube_dl
ydl_opts = {} ydl_opts = {}
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
``` ```
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
@ -1186,19 +1210,19 @@ ydl_opts = {
'progress_hooks': [my_hook], 'progress_hooks': [my_hook],
} }
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
``` ```
# BUGS # BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: **Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
``` ```
$ youtube-dl -v <your command line> $ youtube-dl -v <your command line>
[debug] System config: [] [debug] System config: []
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06 [debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e [debug] Git HEAD: 135392e
@ -1229,7 +1253,7 @@ For bug reports, this means that your report should contain the *complete* outpu
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
### Are you using the latest version? ### Are you using the latest version?


+ 2
- 2
devscripts/check-porn.py View File

@ -14,7 +14,7 @@ import os
import sys import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases
from test.helper import gettestcases
from youtube_dl.utils import compat_urllib_parse_urlparse from youtube_dl.utils import compat_urllib_parse_urlparse
from youtube_dl.utils import compat_urllib_request from youtube_dl.utils import compat_urllib_request
@ -24,7 +24,7 @@ if len(sys.argv) > 1:
else: else:
METHOD = 'EURISTIC' METHOD = 'EURISTIC'
for test in get_testcases():
for test in gettestcases():
if METHOD == 'EURISTIC': if METHOD == 'EURISTIC':
try: try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()


+ 5
- 0
devscripts/install_jython.sh View File

@ -0,0 +1,5 @@
#!/bin/bash
wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
$HOME/jython/bin/jython -m pip install nose

+ 2
- 1
devscripts/make_lazy_extractors.py View File

@ -1,6 +1,7 @@
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
from inspect import getsource from inspect import getsource
import io
import os import os
from os.path import dirname as dirn from os.path import dirname as dirn
import sys import sys
@ -95,5 +96,5 @@ module_contents.append(
module_src = '\n'.join(module_contents) + '\n' module_src = '\n'.join(module_contents) + '\n'
with open(lazy_extractors_filename, 'wt') as f:
with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
f.write(module_src) f.write(module_src)

+ 1
- 1
devscripts/prepare_manpage.py View File

@ -8,7 +8,7 @@ import re
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md') README_FILE = os.path.join(ROOT_DIR, 'README.md')
PREFIX = '''%YOUTUBE-DL(1)
PREFIX = r'''%YOUTUBE-DL(1)
# NAME # NAME


+ 22
- 0
devscripts/run_tests.sh View File

@ -0,0 +1,22 @@
#!/bin/bash
# Keep this list in sync with the `offlinetest` target in Makefile
DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|socks|subtitles|write_annotations|youtube_lists|youtube_signature"
test_set=""
multiprocess_args=""
case "$YTDL_TEST_SET" in
core)
test_set="-I test_($DOWNLOAD_TESTS)\.py"
;;
download)
test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
multiprocess_args="--processes=4 --process-timeout=540"
;;
*)
break
;;
esac
nosetests test --verbose $test_set $multiprocess_args

+ 157
- 50
docs/supportedsites.md View File

@ -3,14 +3,15 @@
- **1up.com** - **1up.com**
- **20min** - **20min**
- **220.ro** - **220.ro**
- **22tracks:genre**
- **22tracks:track**
- **23video**
- **24video** - **24video**
- **3qsdn**: 3Q SDN - **3qsdn**: 3Q SDN
- **3sat** - **3sat**
- **4tube** - **4tube**
- **56.com** - **56.com**
- **5min** - **5min**
- **6play**
- **7plus**
- **8tracks** - **8tracks**
- **91porn** - **91porn**
- **9c9media** - **9c9media**
@ -27,21 +28,25 @@
- **acast** - **acast**
- **acast:channel** - **acast:channel**
- **AddAnime** - **AddAnime**
- **ADN**: Anime Digital Network
- **AdobeTV** - **AdobeTV**
- **AdobeTVChannel** - **AdobeTVChannel**
- **AdobeTVShow** - **AdobeTVShow**
- **AdobeTVVideo** - **AdobeTVVideo**
- **AdultSwim** - **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
- **AfreecaTV**: afreecatv.com
- **afreecatv**: afreecatv.com
- **AirMozilla** - **AirMozilla**
- **AliExpressLive**
- **AlJazeera** - **AlJazeera**
- **Allocine** - **Allocine**
- **AlphaPorno** - **AlphaPorno**
- **AMCNetworks** - **AMCNetworks**
- **anderetijden**: npo.nl and ntr.nl
- **AmericasTestKitchen**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand** - **AnimeOnDemand**
- **anitube.se** - **anitube.se**
- **Anvato**
- **AnySex** - **AnySex**
- **Aparat** - **Aparat**
- **AppleConnect** - **AppleConnect**
@ -63,8 +68,11 @@
- **arte.tv:info** - **arte.tv:info**
- **arte.tv:magazine** - **arte.tv:magazine**
- **arte.tv:playlist** - **arte.tv:playlist**
- **AsianCrush**
- **AsianCrushPlaylist**
- **AtresPlayer** - **AtresPlayer**
- **ATTTechChannel** - **ATTTechChannel**
- **ATVAt**
- **AudiMedia** - **AudiMedia**
- **AudioBoom** - **AudioBoom**
- **audiomack** - **audiomack**
@ -74,13 +82,16 @@
- **awaan:live** - **awaan:live**
- **awaan:season** - **awaan:season**
- **awaan:video** - **awaan:video**
- **Azubu**
- **AzubuLive**
- **AZMedien**: AZ Medien videos
- **AZMedienPlaylist**: AZ Medien playlists
- **AZMedienShowPlaylist**: AZ Medien show playlists
- **BaiduVideo**: 百度视频 - **BaiduVideo**: 百度视频
- **bambuser** - **bambuser**
- **bambuser:channel** - **bambuser:channel**
- **Bandcamp** - **Bandcamp**
- **Bandcamp:album** - **Bandcamp:album**
- **Bandcamp:weekly**
- **bangumi.bilibili.com**: BiliBili番剧
- **bbc**: BBC - **bbc**: BBC
- **bbc.co.uk**: BBC iPlayer - **bbc.co.uk**: BBC iPlayer
- **bbc.co.uk:article**: BBC articles - **bbc.co.uk:article**: BBC articles
@ -101,23 +112,25 @@
- **blinkx** - **blinkx**
- **Bloomberg** - **Bloomberg**
- **BokeCC** - **BokeCC**
- **BostonGlobe**
- **Bpb**: Bundeszentrale für politische Bildung - **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
- **BR**: Bayerischer Rundfunk
- **BravoTV** - **BravoTV**
- **Break** - **Break**
- **brightcove:legacy** - **brightcove:legacy**
- **brightcove:new** - **brightcove:new**
- **BRMediathek**: Bayerischer Rundfunk Mediathek
- **bt:article**: Bergens Tidende Articles - **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BuzzFeed** - **BuzzFeed**
- **BYUtv** - **BYUtv**
- **BYUtvEvent**
- **Camdemy** - **Camdemy**
- **CamdemyFolder** - **CamdemyFolder**
- **CamWithHer** - **CamWithHer**
- **canalc2.tv** - **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas** - **Canvas**
- **CanvasEen**: canvas.be and een.be
- **CarambaTV** - **CarambaTV**
- **CarambaTVPage** - **CarambaTVPage**
- **CartoonNetwork** - **CartoonNetwork**
@ -135,6 +148,7 @@
- **CCTV**: 央视网 - **CCTV**: 央视网
- **CDA** - **CDA**
- **CeskaTelevize** - **CeskaTelevize**
- **CeskaTelevizePorady**
- **channel9**: Channel 9 - **channel9**: Channel 9
- **CharlieRose** - **CharlieRose**
- **Chaturbate** - **Chaturbate**
@ -142,8 +156,9 @@
- **chirbit** - **chirbit**
- **chirbit:profile** - **chirbit:profile**
- **Cinchcast** - **Cinchcast**
- **Clipfish**
- **CJSW**
- **cliphunter** - **cliphunter**
- **Clippit**
- **ClipRs** - **ClipRs**
- **Clipsyndicate** - **Clipsyndicate**
- **CloserToTruth** - **CloserToTruth**
@ -156,13 +171,13 @@
- **CNN** - **CNN**
- **CNNArticle** - **CNNArticle**
- **CNNBlogs** - **CNNBlogs**
- **CollegeRama**
- **ComCarCoff** - **ComCarCoff**
- **ComedyCentral** - **ComedyCentral**
- **ComedyCentralFullEpisodes** - **ComedyCentralFullEpisodes**
- **ComedyCentralShortname** - **ComedyCentralShortname**
- **ComedyCentralTV** - **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **Corus**
- **Coub** - **Coub**
- **Cracked** - **Cracked**
- **Crackle** - **Crackle**
@ -183,7 +198,8 @@
- **dailymotion** - **dailymotion**
- **dailymotion:playlist** - **dailymotion:playlist**
- **dailymotion:user** - **dailymotion:user**
- **DailymotionCloud**
- **DaisukiMotto**
- **DaisukiMottoPlaylist**
- **daum.net** - **daum.net**
- **daum.net:clip** - **daum.net:clip**
- **daum.net:playlist** - **daum.net:playlist**
@ -198,15 +214,22 @@
- **Digiteka** - **Digiteka**
- **Discovery** - **Discovery**
- **DiscoveryGo** - **DiscoveryGo**
- **DiscoveryGoPlaylist**
- **DiscoveryNetworksDe**
- **DiscoveryVR**
- **Disney**
- **Dotsub** - **Dotsub**
- **DouyuShow**
- **DouyuTV**: 斗鱼 - **DouyuTV**: 斗鱼
- **DPlay** - **DPlay**
- **DPlayIt**
- **dramafever** - **dramafever**
- **dramafever:series** - **dramafever:series**
- **DRBonanza** - **DRBonanza**
- **Dropbox** - **Dropbox**
- **DrTuber** - **DrTuber**
- **DRTV**
- **drtv**
- **drtv:live**
- **Dumpert** - **Dumpert**
- **dvtv**: http://video.aktualne.cz/ - **dvtv**: http://video.aktualne.cz/
- **dw** - **dw**
@ -215,11 +238,13 @@
- **EbaumsWorld** - **EbaumsWorld**
- **EchoMsk** - **EchoMsk**
- **egghead:course**: egghead.io course - **egghead:course**: egghead.io course
- **egghead:lesson**: egghead.io lesson
- **eHow** - **eHow**
- **Einthusan** - **Einthusan**
- **eitb.tv** - **eitb.tv**
- **EllenTV**
- **EllenTV:clips**
- **EllenTube**
- **EllenTubePlaylist**
- **EllenTubeVideo**
- **ElPais**: El País - **ElPais**: El País
- **Embedly** - **Embedly**
- **EMPFlix** - **EMPFlix**
@ -230,6 +255,7 @@
- **ESPN** - **ESPN**
- **ESPNArticle** - **ESPNArticle**
- **EsriVideo** - **EsriVideo**
- **ETOnline**
- **Europa** - **Europa**
- **EveryonesMixtape** - **EveryonesMixtape**
- **ExpoTV** - **ExpoTV**
@ -241,8 +267,10 @@
- **fc2** - **fc2**
- **fc2:embed** - **fc2:embed**
- **Fczenit** - **Fczenit**
- **fernsehkritik.tv**
- **Firstpost**
- **filmon**
- **filmon:channel**
- **Filmweb**
- **FiveThirtyEight**
- **FiveTV** - **FiveTV**
- **Flickr** - **Flickr**
- **Flipagram** - **Flipagram**
@ -256,23 +284,27 @@
- **foxnews:article** - **foxnews:article**
- **foxnews:insider** - **foxnews:insider**
- **FoxSports** - **FoxSports**
- **france2.fr:generation-quoi**
- **france2.fr:generation-what**
- **FranceCulture** - **FranceCulture**
- **FranceInter** - **FranceInter**
- **francetv**: France 2, 3, 4, 5 and Ô
- **FranceTV**
- **FranceTVEmbed**
- **francetvinfo.fr** - **francetvinfo.fr**
- **Freesound** - **Freesound**
- **freespeech.org** - **freespeech.org**
- **FreshLive**
- **Funimation** - **Funimation**
- **Funk**
- **FunnyOrDie** - **FunnyOrDie**
- **Fusion** - **Fusion**
- **Fux**
- **FXNetworks** - **FXNetworks**
- **GameInformer** - **GameInformer**
- **GameOne** - **GameOne**
- **gameone:playlist** - **gameone:playlist**
- **Gamersyde**
- **GameSpot** - **GameSpot**
- **GameStar** - **GameStar**
- **Gaskrank**
- **Gazeta** - **Gazeta**
- **GDCVault** - **GDCVault**
- **generic**: Generic downloader that works on some sites - **generic**: Generic downloader that works on some sites
@ -283,22 +315,22 @@
- **Globo** - **Globo**
- **GloboArticle** - **GloboArticle**
- **Go** - **Go**
- **Go90**
- **GodTube** - **GodTube**
- **GodTV**
- **Golem** - **Golem**
- **GoogleDrive** - **GoogleDrive**
- **Goshgay** - **Goshgay**
- **GPUTechConf** - **GPUTechConf**
- **Groupon** - **Groupon**
- **Hark** - **Hark**
- **HBO**
- **HBOEpisode**
- **hbo**
- **hbo:episode**
- **HearThisAt** - **HearThisAt**
- **Heise** - **Heise**
- **HellPorno** - **HellPorno**
- **Helsinki**: helsinki.fi - **Helsinki**: helsinki.fi
- **HentaiStigma** - **HentaiStigma**
- **HGTV**
- **hetklokhuis**
- **hgtv.com:show** - **hgtv.com:show**
- **HistoricFilms** - **HistoricFilms**
- **history:topic**: History.com Topic - **history:topic**: History.com Topic
@ -308,6 +340,7 @@
- **HornBunny** - **HornBunny**
- **HotNewHipHop** - **HotNewHipHop**
- **HotStar** - **HotStar**
- **hotstar:playlist**
- **Howcast** - **Howcast**
- **HowStuffWorks** - **HowStuffWorks**
- **HRTi** - **HRTi**
@ -328,10 +361,13 @@
- **InfoQ** - **InfoQ**
- **Instagram** - **Instagram**
- **instagram:user**: Instagram user profile - **instagram:user**: Instagram user profile
- **Internazionale**
- **InternetVideoArchive** - **InternetVideoArchive**
- **IPrima** - **IPrima**
- **iqiyi**: 爱奇艺 - **iqiyi**: 爱奇艺
- **Ir90Tv** - **Ir90Tv**
- **ITTF**
- **ITV**
- **ivi**: ivi.ru - **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations - **ivi:compilation**: ivi.ru compilations
- **ivideon**: Ivideon TV - **ivideon**: Ivideon TV
@ -340,9 +376,11 @@
- **Jamendo** - **Jamendo**
- **JamendoAlbum** - **JamendoAlbum**
- **JeuxVideo** - **JeuxVideo**
- **Joj**
- **Jove** - **Jove**
- **jpopsuki.tv** - **jpopsuki.tv**
- **JWPlatform** - **JWPlatform**
- **Kakao**
- **Kaltura** - **Kaltura**
- **Kamcord** - **Kamcord**
- **KanalPlay**: Kanal 5/9/11 Play - **KanalPlay**: Kanal 5/9/11 Play
@ -386,6 +424,7 @@
- **limelight:channel_list** - **limelight:channel_list**
- **LiTV** - **LiTV**
- **LiveLeak** - **LiveLeak**
- **LiveLeakEmbed**
- **livestream** - **livestream**
- **livestream:original** - **livestream:original**
- **LnkGo** - **LnkGo**
@ -402,9 +441,16 @@
- **MakerTV** - **MakerTV**
- **mangomolo:live** - **mangomolo:live**
- **mangomolo:video** - **mangomolo:video**
- **ManyVids**
- **massengeschmack.tv**
- **MatchTV** - **MatchTV**
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE and KiKA
- **media.ccc.de** - **media.ccc.de**
- **Medialaan**
- **Mediaset**
- **Mediasite**
- **Medici**
- **megaphone.fm**: megaphone.fm embedded players
- **Meipai**: 美拍 - **Meipai**: 美拍
- **MelonVOD** - **MelonVOD**
- **META** - **META**
@ -422,6 +468,8 @@
- **mixcloud:playlist** - **mixcloud:playlist**
- **mixcloud:stream** - **mixcloud:stream**
- **mixcloud:user** - **mixcloud:user**
- **Mixer:live**
- **Mixer:vod**
- **MLB** - **MLB**
- **Mnet** - **Mnet**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
@ -435,11 +483,11 @@
- **MovieFap** - **MovieFap**
- **Moviezine** - **Moviezine**
- **MovingImage** - **MovingImage**
- **MPORA**
- **MSN** - **MSN**
- **mtg**: MTG services - **mtg**: MTG services
- **mtv** - **mtv**
- **mtv.de** - **mtv.de**
- **mtv81**
- **mtv:video** - **mtv:video**
- **mtvservices:embedded** - **mtvservices:embedded**
- **MuenchenTV**: münchen.tv - **MuenchenTV**: münchen.tv
@ -452,7 +500,6 @@
- **MySpace:album** - **MySpace:album**
- **MySpass** - **MySpass**
- **Myvi** - **Myvi**
- **myvideo** (Currently broken)
- **MyVidster** - **MyVidster**
- **n-tv.de** - **n-tv.de**
- **natgeo** - **natgeo**
@ -479,9 +526,13 @@
- **netease:song**: 网易云音乐 - **netease:song**: 网易云音乐
- **Netzkino** - **Netzkino**
- **Newgrounds** - **Newgrounds**
- **NewgroundsPlaylist**
- **Newstube** - **Newstube**
- **NextMedia**: 蘋果日報 - **NextMedia**: 蘋果日報
- **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextMediaActionNews**: 蘋果日報 - 動新聞
- **NextTV**: 壹電視
- **Nexx**
- **NexxEmbed**
- **nfb**: National Film Board of Canada - **nfb**: National Film Board of Canada
- **nfl.com** - **nfl.com**
- **NhkVod** - **NhkVod**
@ -491,25 +542,28 @@
- **nhl.com:videocenter:category**: NHL videocenter category - **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com** - **nick.com**
- **nick.de** - **nick.de**
- **nickelodeon:br**
- **nickelodeonru**
- **nicknight** - **nicknight**
- **niconico**: ニコニコ動画 - **niconico**: ニコニコ動画
- **NiconicoPlaylist** - **NiconicoPlaylist**
- **Nintendo** - **Nintendo**
- **njoy**: N-JOY - **njoy**: N-JOY
- **njoy:embed** - **njoy:embed**
- **NJPWWorld**: 新日本プロレスワールド
- **NobelPrize** - **NobelPrize**
- **Noco** - **Noco**
- **NonkTube**
- **Noovo**
- **Normalboots** - **Normalboots**
- **NosVideo** - **NosVideo**
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- **nowness** - **nowness**
- **nowness:playlist** - **nowness:playlist**
- **nowness:series** - **nowness:series**
- **NowTV** (Currently broken)
- **NowTVList**
- **nowvideo**: NowVideo - **nowvideo**: NowVideo
- **Noz** - **Noz**
- **npo**: npo.nl and ntr.nl
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **npo.nl:live** - **npo.nl:live**
- **npo.nl:radio** - **npo.nl:radio**
- **npo.nl:radio:fragment** - **npo.nl:radio:fragment**
@ -520,6 +574,7 @@
- **NRKTV**: NRK TV and NRK Radio - **NRKTV**: NRK TV and NRK Radio
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
- **NRKTVEpisodes** - **NRKTVEpisodes**
- **NRKTVSeries**
- **ntv.ru** - **ntv.ru**
- **Nuvid** - **Nuvid**
- **NYTimes** - **NYTimes**
@ -531,24 +586,31 @@
- **OktoberfestTV** - **OktoberfestTV**
- **on.aol.com** - **on.aol.com**
- **OnDemandKorea** - **OnDemandKorea**
- **onet.pl**
- **onet.tv** - **onet.tv**
- **onet.tv:channel** - **onet.tv:channel**
- **OnetMVP**
- **OnionStudios** - **OnionStudios**
- **Ooyala** - **Ooyala**
- **OoyalaExternal** - **OoyalaExternal**
- **Openload** - **Openload**
- **OraTV** - **OraTV**
- **orf:fm4**: radio FM4 - **orf:fm4**: radio FM4
- **orf:fm4:story**: fm4.orf.at stories
- **orf:iptv**: iptv.ORF.at - **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1 - **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek - **orf:tvthek**: ORF TVthek
- **PacktPub**
- **PacktPubCourse**
- **PandaTV**: 熊猫TV - **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV - **pandora.tv**: 판도라TV
- **parliamentlive.tv**: UK parliament videos - **parliamentlive.tv**: UK parliament videos
- **Patreon** - **Patreon**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **pcmag** - **pcmag**
- **PearVideo**
- **People** - **People**
- **PerformGroup**
- **periscope**: Periscope - **periscope**: Periscope
- **periscope:user**: Periscope user videos - **periscope:user**: Periscope user videos
- **PhilharmonieDeParis**: Philharmonie de Paris - **PhilharmonieDeParis**: Philharmonie de Paris
@ -565,12 +627,14 @@
- **pluralsight** - **pluralsight**
- **pluralsight:course** - **pluralsight:course**
- **plus.google**: Google Plus - **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic** - **podomatic**
- **Pokemon** - **Pokemon**
- **PolskieRadio** - **PolskieRadio**
- **PolskieRadioCategory** - **PolskieRadioCategory**
- **PopcornTV**
- **PornCom** - **PornCom**
- **PornerBros**
- **PornFlip**
- **PornHd** - **PornHd**
- **PornHub**: PornHub and Thumbzilla - **PornHub**: PornHub and Thumbzilla
- **PornHubPlaylist** - **PornHubPlaylist**
@ -578,6 +642,7 @@
- **Pornotube** - **Pornotube**
- **PornoVoisines** - **PornoVoisines**
- **PornoXO** - **PornoXO**
- **PornTube**
- **PressTV** - **PressTV**
- **PrimeShareTV** - **PrimeShareTV**
- **PromptFile** - **PromptFile**
@ -598,9 +663,14 @@
- **radiofrance** - **radiofrance**
- **RadioJavan** - **RadioJavan**
- **Rai** - **Rai**
- **RaiTV**
- **RaiPlay**
- **RaiPlayLive**
- **RaiPlayPlaylist**
- **RBMARadio** - **RBMARadio**
- **RDS**: RDS.ca - **RDS**: RDS.ca
- **RedBullTV**
- **Reddit**
- **RedditR**
- **RedTube** - **RedTube**
- **RegioTV** - **RegioTV**
- **RENTV** - **RENTV**
@ -622,7 +692,9 @@
- **rte**: Raidió Teilifís Éireann TV - **rte**: Raidió Teilifís Éireann TV
- **rte:radio**: Raidió Teilifís Éireann radio - **rte:radio**: Raidió Teilifís Éireann radio
- **rtl.nl**: rtl.nl and rtlxl.nl - **rtl.nl**: rtl.nl and rtlxl.nl
- **RTL2**
- **rtl2**
- **rtl2:you**
- **rtl2:you:series**
- **RTP** - **RTP**
- **RTS**: RTS.ch - **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta - **rtve.es:alacarta**: RTVE a la carta
@ -638,34 +710,38 @@
- **rutube:embed**: Rutube embedded videos - **rutube:embed**: Rutube embedded videos
- **rutube:movie**: Rutube movies - **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos - **rutube:person**: Rutube person videos
- **rutube:playlist**: Rutube playlists
- **RUTV**: RUTV.RU - **RUTV**: RUTV.RU
- **Ruutu** - **Ruutu**
- **Ruv**
- **safari**: safaribooksonline.com online video - **safari**: safaribooksonline.com online video
- **safari:api** - **safari:api**
- **safari:course**: safaribooksonline.com online courses - **safari:course**: safaribooksonline.com online courses
- **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos - **Sapo**: SAPO Vídeos
- **savefrom.net** - **savefrom.net**
- **SBS**: sbs.com.au - **SBS**: sbs.com.au
- **schooltv** - **schooltv**
- **SciVee**
- **screen.yahoo:search**: Yahoo screen search - **screen.yahoo:search**: Yahoo screen search
- **Screencast** - **Screencast**
- **ScreencastOMatic** - **ScreencastOMatic**
- **scrippsnetworks:watch**
- **Seeker** - **Seeker**
- **SenateISVP** - **SenateISVP**
- **SendtoNews** - **SendtoNews**
- **ServingSys** - **ServingSys**
- **Servus**
- **Sexu** - **Sexu**
- **Shahid** - **Shahid**
- **ShahidShow**
- **Shared**: shared.sx - **Shared**: shared.sx
- **ShowRoomLive** - **ShowRoomLive**
- **Sina** - **Sina**
- **SixPlay**
- **SkylineWebcams**
- **skynewsarabia:article** - **skynewsarabia:article**
- **skynewsarabia:video** - **skynewsarabia:video**
- **SkySports** - **SkySports**
- **Slideshare** - **Slideshare**
- **SlidesLive**
- **Slutload** - **Slutload**
- **smotri**: Smotri.com - **smotri**: Smotri.com
- **smotri:broadcast**: Smotri.com broadcasts - **smotri:broadcast**: Smotri.com broadcasts
@ -678,6 +754,7 @@
- **soundcloud:playlist** - **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search - **soundcloud:search**: Soundcloud search
- **soundcloud:set** - **soundcloud:set**
- **soundcloud:trackstation**
- **soundcloud:user** - **soundcloud:user**
- **soundgasm** - **soundgasm**
- **soundgasm:profile** - **soundgasm:profile**
@ -693,10 +770,10 @@
- **Spiegeltv** - **Spiegeltv**
- **Spike** - **Spike**
- **Sport5** - **Sport5**
- **SportBox**
- **SportBoxEmbed** - **SportBoxEmbed**
- **SportDeutschland** - **SportDeutschland**
- **Sportschau** - **Sportschau**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk - **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR** - **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
@ -704,9 +781,11 @@
- **Steam** - **Steam**
- **Stitcher** - **Stitcher**
- **Streamable** - **Streamable**
- **Streamango**
- **streamcloud.eu** - **streamcloud.eu**
- **StreamCZ** - **StreamCZ**
- **StreetVoice** - **StreetVoice**
- **StretchInternet**
- **SunPorno** - **SunPorno**
- **SVT** - **SVT**
- **SVTPlay**: SVT Play and Öppet arkiv - **SVTPlay**: SVT Play and Öppet arkiv
@ -717,13 +796,13 @@
- **Tagesschau** - **Tagesschau**
- **tagesschau:player** - **tagesschau:player**
- **Tass** - **Tass**
- **TastyTrade**
- **TBS** - **TBS**
- **TDSLifeway** - **TDSLifeway**
- **teachertube**: teachertube.com videos - **teachertube**: teachertube.com videos
- **teachertube:user:collection**: teachertube.com user and collection videos - **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel** - **TeachingChannel**
- **Teamcoco** - **Teamcoco**
- **TeamFourStar**
- **TechTalks** - **TechTalks**
- **techtv.mit.edu** - **techtv.mit.edu**
- **ted** - **ted**
@ -744,51 +823,57 @@
- **TheScene** - **TheScene**
- **TheSixtyOne** - **TheSixtyOne**
- **TheStar** - **TheStar**
- **TheSun**
- **TheWeatherChannel** - **TheWeatherChannel**
- **ThisAmericanLife** - **ThisAmericanLife**
- **ThisAV** - **ThisAV**
- **ThisOldHouse** - **ThisOldHouse**
- **tinypic**: tinypic.com videos - **tinypic**: tinypic.com videos
- **tlc.de**
- **TMZ** - **TMZ**
- **TMZArticle** - **TMZArticle**
- **TNAFlix** - **TNAFlix**
- **TNAFlixNetworkEmbed** - **TNAFlixNetworkEmbed**
- **toggle** - **toggle**
- **ToonGoggles**
- **Tosh**: Tosh.0 - **Tosh**: Tosh.0
- **tou.tv** - **tou.tv**
- **Toypics**: Toypics user profile
- **Toypics**: Toypics video
- **ToypicsUser**: Toypics user profile - **ToypicsUser**: Toypics user profile
- **TrailerAddict** (Currently broken) - **TrailerAddict** (Currently broken)
- **Trilulilu** - **Trilulilu**
- **TruTV** - **TruTV**
- **Tube8** - **Tube8**
- **TubiTv** - **TubiTv**
- **tudou**
- **tudou:album**
- **tudou:playlist**
- **Tumblr** - **Tumblr**
- **tunein:clip** - **tunein:clip**
- **tunein:program** - **tunein:program**
- **tunein:station** - **tunein:station**
- **tunein:topic** - **tunein:topic**
- **TunePk**
- **Turbo** - **Turbo**
- **Tutv** - **Tutv**
- **tv.dfb.de** - **tv.dfb.de**
- **TV2** - **TV2**
- **tv2.hu**
- **TV2Article** - **TV2Article**
- **TV3** - **TV3**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
- **TVANouvelles** - **TVANouvelles**
- **TVANouvellesArticle** - **TVANouvellesArticle**
- **TVC** - **TVC**
- **TVCArticle** - **TVCArticle**
- **tvigle**: Интернет-телевидение Tvigle.ru - **tvigle**: Интернет-телевидение Tvigle.ru
- **tvland.com** - **tvland.com**
- **TVN24**
- **TVNoe** - **TVNoe**
- **TVNow**
- **TVNowList**
- **tvp**: Telewizja Polska - **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska - **tvp:embed**: Telewizja Polska
- **tvp:series** - **tvp:series**
- **TVPlayer**
- **Tweakers** - **Tweakers**
- **twitch:chapter** - **twitch:chapter**
- **twitch:clips** - **twitch:clips**
@ -806,11 +891,16 @@
- **udemy** - **udemy**
- **udemy:course** - **udemy:course**
- **UDNEmbed**: 聯合影音 - **UDNEmbed**: 聯合影音
- **UFCTV**
- **UKTVPlay** - **UKTVPlay**
- **umg:de**: Universal Music Deutschland
- **Unistra** - **Unistra**
- **Unity**
- **uol.com.br** - **uol.com.br**
- **uplynk** - **uplynk**
- **uplynk:preplay** - **uplynk:preplay**
- **Upskill**
- **UpskillCourse**
- **Urort**: NRK P3 Urørt - **Urort**: NRK P3 Urørt
- **URPlay** - **URPlay**
- **USANetwork** - **USANetwork**
@ -830,9 +920,10 @@
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com** - **vh1.com**
- **Viafree** - **Viafree**
- **Vice**
- **vice**
- **vice:article**
- **vice:show**
- **Viceland** - **Viceland**
- **ViceShow**
- **Vidbit** - **Vidbit**
- **Viddler** - **Viddler**
- **Videa** - **Videa**
@ -845,13 +936,14 @@
- **videomore:season** - **videomore:season**
- **videomore:video** - **videomore:video**
- **VideoPremium** - **VideoPremium**
- **VideoPress**
- **videoweed**: VideoWeed - **videoweed**: VideoWeed
- **Vidio** - **Vidio**
- **vidme** - **vidme**
- **vidme:user** - **vidme:user**
- **vidme:user:likes** - **vidme:user:likes**
- **Vidzi** - **Vidzi**
- **vier**
- **vier**: vier.be and vijf.be
- **vier:videos** - **vier:videos**
- **ViewLift** - **ViewLift**
- **ViewLiftEmbed** - **ViewLiftEmbed**
@ -879,13 +971,23 @@
- **vk:uservideos**: VK - User's Videos - **vk:uservideos**: VK - User's Videos
- **vk:wallpost** - **vk:wallpost**
- **vlive** - **vlive**
- **vlive:channel**
- **vlive:playlist**
- **Vodlocker** - **Vodlocker**
- **VODPl**
- **VODPlatform** - **VODPlatform**
- **VoiceRepublic** - **VoiceRepublic**
- **Voot**
- **VoxMedia** - **VoxMedia**
- **VoxMediaVolume**
- **Vporn** - **Vporn**
- **vpro**: npo.nl and ntr.nl
- **VRT**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
- **vube**: Vube.com - **vube**: Vube.com
- **VuClip** - **VuClip**
- **VVVVID** - **VVVVID**
@ -895,6 +997,7 @@
- **washingtonpost** - **washingtonpost**
- **washingtonpost:article** - **washingtonpost:article**
- **wat.tv** - **wat.tv**
- **WatchBox**
- **WatchIndianPorn**: Watch Indian Porn - **WatchIndianPorn**: Watch Indian Porn
- **WDR** - **WDR**
- **wdr:mobile** - **wdr:mobile**
@ -906,14 +1009,15 @@
- **wholecloud**: WholeCloud - **wholecloud**: WholeCloud
- **Wimp** - **Wimp**
- **Wistia** - **Wistia**
- **wnl**: npo.nl and ntr.nl
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **WorldStarHipHop** - **WorldStarHipHop**
- **wrzuta.pl** - **wrzuta.pl**
- **wrzuta.pl:playlist** - **wrzuta.pl:playlist**
- **WSJ**: Wall Street Journal - **WSJ**: Wall Street Journal
- **WSJArticle**
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑 - **xiami:album**: 虾米音乐 - 专辑
@ -929,7 +1033,7 @@
- **XVideos** - **XVideos**
- **XXXYMovies** - **XXXYMovies**
- **Yahoo**: Yahoo screen and movies - **Yahoo**: Yahoo screen and movies
- **Yam**: 蕃薯藤yam天空部落
- **YandexDisk**
- **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек - **yandexmusic:track**: Яндекс.Музыка - Трек
@ -939,6 +1043,9 @@
- **YouJizz** - **YouJizz**
- **youku**: 优酷 - **youku**: 优酷
- **youku:show** - **youku:show**
- **YouNowChannel**
- **YouNowLive**
- **YouNowMoment**
- **YouPorn** - **YouPorn**
- **YourUpload** - **YourUpload**
- **youtube**: YouTube.com - **youtube**: YouTube.com
@ -952,12 +1059,12 @@
- **youtube:search**: YouTube.com searches - **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first - **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs - **youtube:search_url**: YouTube.com search URLs
- **youtube:shared**
- **youtube:show**: YouTube.com (multi-season) shows - **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks** - **Zapiks**
- **Zaq1**
- **ZDF** - **ZDF**
- **ZDFChannel** - **ZDFChannel**
- **zingmp3**: mp3.zing.vn - **zingmp3**: mp3.zing.vn

+ 4
- 2
setup.py View File

@ -107,8 +107,9 @@ setup(
url='https://github.com/rg3/youtube-dl', url='https://github.com/rg3/youtube-dl',
author='Ricardo Garcia', author='Ricardo Garcia',
author_email='ytdl@yt-dl.org', author_email='ytdl@yt-dl.org',
maintainer='Philipp Hagemeister',
maintainer_email='phihag@phihag.de',
maintainer='Sergey M.',
maintainer_email='dstftw@gmail.com',
license='Unlicense',
packages=[ packages=[
'youtube_dl', 'youtube_dl',
'youtube_dl.extractor', 'youtube_dl.downloader', 'youtube_dl.extractor', 'youtube_dl.downloader',
@ -130,6 +131,7 @@ setup(
'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
], ],
cmdclass={'build_lazy_extractors': build_lazy_extractors}, cmdclass={'build_lazy_extractors': build_lazy_extractors},


+ 611
- 1
test/test_InfoExtractor.py View File

@ -3,12 +3,14 @@
from __future__ import unicode_literals from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
import io
import os import os
import sys import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from test.helper import FakeYDL, expect_dict, expect_value
from youtube_dl.compat import compat_etree_fromstring
from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@ -84,6 +86,614 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
self,
self.ie._extract_jwplayer_data(r'''
<script type='text/javascript'>
jwplayer('my-video').setup({
file: 'rtmp://192.138.214.154/live/sjclive',
fallback: 'true',
width: '95%',
aspectratio: '16:9',
primary: 'flash',
mediaid:'XEgvuql4'
});
</script>
''', None, require_title=False),
{
'id': 'XEgvuql4',
'formats': [{
'url': 'rtmp://192.138.214.154/live/sjclive',
'ext': 'flv'
}]
})
# from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/
expect_dict(
self,
self.ie._extract_jwplayer_data(r'''
<script type="text/javascript">
jwplayer("mediaplayer").setup({
'videoid': "7564",
'width': "100%",
'aspectratio': "16:9",
'stretching': "exactfit",
'autostart': 'false',
'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf",
'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv",
'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg",
'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4",
'logo.hide': true,
'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip",
'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf",
'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370",
'controlbar': 'bottom',
'modes': [
{type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'}
],
'provider': 'http'
});
//noinspection JSAnnotator
invideo.setup({
adsUrl: "/banner-iframe/?zoneId=32",
adsUrl2: "",
autostart: false
});
</script>
''', 'dummy', require_title=False),
{
'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg',
'formats': [{
'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv',
'ext': 'flv'
}]
})
# from http://www.indiedb.com/games/king-machine/videos
expect_dict(
self,
self.ie._extract_jwplayer_data(r'''
<script>
jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) {
videoAnalytics("play");
}).once("complete", function(event) {
videoAnalytics("completed");
});
</script>
''', 'dummy'),
{
'title': 'king machine trailer 1',
'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg',
'formats': [{
'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4',
'height': 360,
'ext': 'mp4'
}, {
'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4',
'height': 720,
'ext': 'mp4'
}]
})
def test_parse_m3u8_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/11507
# http://pluzz.francetv.fr/videos/le_ministere.html
'pluzz_francetv_11507',
'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
[{
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
'ext': 'mp4',
'format_id': '180',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.66.30',
'tbr': 180,
'width': 256,
'height': 144,
}, {
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
'ext': 'mp4',
'format_id': '303',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.66.30',
'tbr': 303,
'width': 320,
'height': 180,
}, {
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
'ext': 'mp4',
'format_id': '575',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.66.30',
'tbr': 575,
'width': 512,
'height': 288,
}, {
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
'ext': 'mp4',
'format_id': '831',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.77.30',
'tbr': 831,
'width': 704,
'height': 396,
}, {
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
'ext': 'mp4',
'protocol': 'm3u8',
'format_id': '1467',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.77.30',
'tbr': 1467,
'width': 1024,
'height': 576,
}]
),
(
# https://github.com/rg3/youtube-dl/issues/11995
# http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
'teamcoco_11995',
'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
[{
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
'ext': 'mp4',
'format_id': 'audio-0-Default',
'protocol': 'm3u8',
'vcodec': 'none',
}, {
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
'ext': 'mp4',
'format_id': 'audio-1-Default',
'protocol': 'm3u8',
'vcodec': 'none',
}, {
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
'ext': 'mp4',
'format_id': '71',
'protocol': 'm3u8',
'acodec': 'mp4a.40.5',
'vcodec': 'none',
'tbr': 71,
}, {
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
'ext': 'mp4',
'format_id': '413',
'protocol': 'm3u8',
'acodec': 'none',
'vcodec': 'avc1.42001e',
'tbr': 413,
'width': 400,
'height': 224,
}, {
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
'ext': 'mp4',
'format_id': '522',
'protocol': 'm3u8',
'acodec': 'none',
'vcodec': 'avc1.42001e',
'tbr': 522,
'width': 400,
'height': 224,
}, {
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
'ext': 'mp4',
'format_id': '1205',
'protocol': 'm3u8',
'acodec': 'none',
'vcodec': 'avc1.4d001e',
'tbr': 1205,
'width': 640,
'height': 360,
}, {
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
'ext': 'mp4',
'format_id': '2374',
'protocol': 'm3u8',
'acodec': 'none',
'vcodec': 'avc1.4d001f',
'tbr': 2374,
'width': 1024,
'height': 576,
}]
),
(
# https://github.com/rg3/youtube-dl/issues/12211
# http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
'toggle_mobile_12211',
'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
[{
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
'ext': 'mp4',
'format_id': 'audio-English',
'protocol': 'm3u8',
'language': 'eng',
'vcodec': 'none',
}, {
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8',
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
'ext': 'mp4',
'format_id': 'audio-Undefined',
'protocol': 'm3u8',
'language': 'und',
'vcodec': 'none',
}, {
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
'ext': 'mp4',
'format_id': '155',
'protocol': 'm3u8',
'tbr': 155.648,
'width': 320,
'height': 180,
}, {
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
'ext': 'mp4',
'format_id': '502',
'protocol': 'm3u8',
'tbr': 502.784,
'width': 480,
'height': 270,
}, {
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
'ext': 'mp4',
'format_id': '827',
'protocol': 'm3u8',
'tbr': 827.392,
'width': 640,
'height': 360,
}, {
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
'ext': 'mp4',
'format_id': '1396',
'protocol': 'm3u8',
'tbr': 1396.736,
'width': 854,
'height': 480,
}]
),
(
# http://www.twitch.tv/riotgames/v/6528877
'twitch_vod',
'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
[{
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
'ext': 'mp4',
'format_id': 'Audio Only',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 182.725,
}, {
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
'ext': 'mp4',
'format_id': 'Mobile',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.42C00D',
'tbr': 280.474,
'width': 400,
'height': 226,
}, {
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
'ext': 'mp4',
'format_id': 'Low',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.42C01E',
'tbr': 628.347,
'width': 640,
'height': 360,
}, {
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
'ext': 'mp4',
'format_id': 'Medium',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.42C01E',
'tbr': 893.387,
'width': 852,
'height': 480,
}, {
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
'ext': 'mp4',
'format_id': 'High',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.42C01F',
'tbr': 1603.789,
'width': 1280,
'height': 720,
}, {
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
'ext': 'mp4',
'format_id': 'Source',
'protocol': 'm3u8',
'acodec': 'mp4a.40.2',
'vcodec': 'avc1.100.31',
'tbr': 3214.134,
'width': 1280,
'height': 720,
}]
),
(
# http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
# EXT-X-STREAM-INF tag with NAME attribute that is not defined
# in HLS specification
'vidio',
'https://www.vidio.com/videos/165683/playlist.m3u8',
[{
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
'ext': 'mp4',
'format_id': '270p 3G',
'protocol': 'm3u8',
'tbr': 300,
'width': 480,
'height': 270,
}, {
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
'ext': 'mp4',
'format_id': '360p SD',
'protocol': 'm3u8',
'tbr': 600,
'width': 640,
'height': 360,
}, {
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
'ext': 'mp4',
'format_id': '720p HD',
'protocol': 'm3u8',
'tbr': 1200,
'width': 1280,
'height': 720,
}]
)
]
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_m3u8_formats(
f.read(), m3u8_url, ext='mp4')
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_mpd_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/13919
# Also tests duplicate representation ids, see
# https://github.com/rg3/youtube-dl/issues/15111
'float_duration',
'http://unknown/manifest.mpd',
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'm4a',
'format_id': '318597',
'format_note': 'DASH audio',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 61.587,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '318597',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.42001f',
'tbr': 318.597,
'width': 340,
'height': 192,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '638590',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.42001f',
'tbr': 638.59,
'width': 512,
'height': 288,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '1022565',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.4d001f',
'tbr': 1022.565,
'width': 688,
'height': 384,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '2046506',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.4d001f',
'tbr': 2046.506,
'width': 1024,
'height': 576,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '3998017',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.640029',
'tbr': 3998.017,
'width': 1280,
'height': 720,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '5997485',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.640032',
'tbr': 5997.485,
'width': 1920,
'height': 1080,
}]
), (
# https://github.com/rg3/youtube-dl/pull/14844
'urls_only',
'http://unknown/manifest.mpd',
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_144p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 200,
'width': 256,
'height': 144,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_240p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 400,
'width': 424,
'height': 240,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_360p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 800,
'width': 640,
'height': 360,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_480p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 1200,
'width': 856,
'height': 480,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_576p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 1600,
'width': 1024,
'height': 576,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_720p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 2400,
'width': 1280,
'height': 720,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_1080p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 4400,
'width': 1920,
'height': 1080,
}]
)
]
for mpd_file, mpd_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_mpd_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/14660
'custom_base_url',
'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
[{
'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
'ext': 'flv',
'format_id': '2148',
'protocol': 'f4m',
'tbr': 2148,
'width': 1280,
'height': 720,
}]
),
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
f4m_url, None)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

+ 90
- 3
test/test_YoutubeDL.py View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
@ -40,6 +41,7 @@ def _make_result(formats, **kwargs):
'id': 'testid', 'id': 'testid',
'title': 'testttitle', 'title': 'testttitle',
'extractor': 'testex', 'extractor': 'testex',
'extractor_key': 'TestEx',
} }
res.update(**kwargs) res.update(**kwargs)
return res return res
@ -369,6 +371,19 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'format': 'best[height>360]'}) ydl = YDL({'format': 'best[height>360]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
def test_format_selection_issue_10083(self):
# See https://github.com/rg3/youtube-dl/issues/10083
formats = [
{'format_id': 'regular', 'height': 360, 'url': TEST_URL},
{'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
{'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
]
info_dict = _make_result(formats)
ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
ydl.process_ie_result(info_dict.copy())
self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
def test_invalid_format_specs(self): def test_invalid_format_specs(self):
def assert_syntax_error(format_spec): def assert_syntax_error(format_spec):
ydl = YDL({'format': format_spec}) ydl = YDL({'format': format_spec})
@ -447,6 +462,23 @@ class TestFormatSelection(unittest.TestCase):
pass pass
self.assertEqual(ydl.downloaded_info_dicts, []) self.assertEqual(ydl.downloaded_info_dicts, [])
def test_default_format_spec(self):
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
ydl = YDL({'outtmpl': '-'})
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
class TestYoutubeDL(unittest.TestCase): class TestYoutubeDL(unittest.TestCase):
def test_subtitles(self): def test_subtitles(self):
@ -525,6 +557,9 @@ class TestYoutubeDL(unittest.TestCase):
'id': '1234', 'id': '1234',
'ext': 'mp4', 'ext': 'mp4',
'width': None, 'width': None,
'height': 1080,
'title1': '$PATH',
'title2': '%PATH%',
} }
def fname(templ): def fname(templ):
@ -534,16 +569,33 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
# Replace missing fields with 'NA' # Replace missing fields with 'NA'
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%%'), '%')
self.assertEqual(fname('%%%%'), '%%')
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
def test_format_note(self): def test_format_note(self):
ydl = YoutubeDL() ydl = YoutubeDL()
self.assertEqual(ydl._format_note({}), '') self.assertEqual(ydl._format_note({}), '')
assertRegexpMatches(self, ydl._format_note({ assertRegexpMatches(self, ydl._format_note({
'vbr': 10, 'vbr': 10,
}), '^\s*10k$')
}), r'^\s*10k$')
assertRegexpMatches(self, ydl._format_note({ assertRegexpMatches(self, ydl._format_note({
'fps': 30, 'fps': 30,
}), '^30fps$')
}), r'^30fps$')
def test_postprocessors(self): def test_postprocessors(self):
filename = 'post-processor-testfile.mp4' filename = 'post-processor-testfile.mp4'
@ -606,6 +658,8 @@ class TestYoutubeDL(unittest.TestCase):
'duration': 30, 'duration': 30,
'filesize': 10 * 1024, 'filesize': 10 * 1024,
'playlist_id': '42', 'playlist_id': '42',
'uploader': "變態妍字幕版 太妍 тест",
'creator': "тест ' 123 ' тест--",
} }
second = { second = {
'id': '2', 'id': '2',
@ -616,6 +670,7 @@ class TestYoutubeDL(unittest.TestCase):
'description': 'foo', 'description': 'foo',
'filesize': 5 * 1024, 'filesize': 5 * 1024,
'playlist_id': '43', 'playlist_id': '43',
'uploader': "тест 123",
} }
videos = [first, second] videos = [first, second]
@ -656,6 +711,26 @@ class TestYoutubeDL(unittest.TestCase):
res = get_videos(f) res = get_videos(f)
self.assertEqual(res, ['1']) self.assertEqual(res, ['1'])
f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
res = get_videos(f)
self.assertEqual(res, ['1'])
f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
res = get_videos(f)
self.assertEqual(res, ['2'])
f = match_filter_func('creator = "тест \' 123 \' тест--"')
res = get_videos(f)
self.assertEqual(res, ['1'])
f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
res = get_videos(f)
self.assertEqual(res, ['1'])
f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
res = get_videos(f)
self.assertEqual(res, [])
def test_playlist_items_selection(self): def test_playlist_items_selection(self):
entries = [{ entries = [{
'id': compat_str(i), 'id': compat_str(i),
@ -701,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
result = get_ids({'playlist_items': '10'}) result = get_ids({'playlist_items': '10'})
self.assertEqual(result, []) self.assertEqual(result, [])
result = get_ids({'playlist_items': '3-10'})
self.assertEqual(result, [3, 4])
result = get_ids({'playlist_items': '2-4,3-4,3'})
self.assertEqual(result, [2, 3, 4])
def test_urlopen_no_file_protocol(self): def test_urlopen_no_file_protocol(self):
# see https://github.com/rg3/youtube-dl/issues/8227 # see https://github.com/rg3/youtube-dl/issues/8227
ydl = YDL() ydl = YDL()
@ -717,6 +798,8 @@ class TestYoutubeDL(unittest.TestCase):
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'foo2:', 'url': 'foo2:',
'ie_key': 'Foo2', 'ie_key': 'Foo2',
'title': 'foo1 title',
'id': 'foo1_id',
} }
class Foo2IE(InfoExtractor): class Foo2IE(InfoExtractor):
@ -733,7 +816,7 @@ class TestYoutubeDL(unittest.TestCase):
_VALID_URL = r'foo3:' _VALID_URL = r'foo3:'
def _real_extract(self, url): def _real_extract(self, url):
return _make_result([{'url': TEST_URL}])
return _make_result([{'url': TEST_URL}], title='foo3 title')
ydl.add_info_extractor(Foo1IE(ydl)) ydl.add_info_extractor(Foo1IE(ydl))
ydl.add_info_extractor(Foo2IE(ydl)) ydl.add_info_extractor(Foo2IE(ydl))
@ -741,6 +824,10 @@ class TestYoutubeDL(unittest.TestCase):
ydl.extract_info('foo1:') ydl.extract_info('foo1:')
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['url'], TEST_URL) self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'foo1 title')
self.assertEqual(downloaded['id'], 'testid')
self.assertEqual(downloaded['extractor'], 'testex')
self.assertEqual(downloaded['extractor_key'], 'TestEx')
if __name__ == '__main__': if __name__ == '__main__':


+ 8
- 1
test/test_aes.py View File

@ -8,7 +8,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
import base64 import base64
@ -34,6 +34,13 @@ class TestAES(unittest.TestCase):
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_cbc_encrypt(self):
data = bytes_to_intlist(self.secret_msg)
encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
self.assertEqual(
encrypted,
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd")
def test_decrypt_text(self): def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode('utf-8') password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode( encrypted = base64.b64encode(


+ 3
- 3
test/test_compat.py View File

@ -27,11 +27,11 @@ from youtube_dl.compat import (
class TestCompat(unittest.TestCase): class TestCompat(unittest.TestCase):
def test_compat_getenv(self): def test_compat_getenv(self):
test_str = 'тест' test_str = 'тест'
compat_setenv('YOUTUBE-DL-TEST', test_str)
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
def test_compat_setenv(self): def test_compat_setenv(self):
test_var = 'YOUTUBE-DL-TEST'
test_var = 'YOUTUBE_DL_COMPAT_SETENV'
test_str = 'тест' test_str = 'тест'
compat_setenv(test_var, test_str) compat_setenv(test_var, test_str)
compat_getenv(test_var) compat_getenv(test_var)


+ 36
- 7
test/test_download.py View File

@ -65,15 +65,31 @@ defs = gettestcases()
class TestDownload(unittest.TestCase): class TestDownload(unittest.TestCase):
# Parallel testing in nosetests. See
# http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
_multiprocess_shared_ = True
maxDiff = None maxDiff = None
def __str__(self):
"""Identify each test with the `add_ie` attribute, if available."""
def strclass(cls):
"""From 2.7's unittest; 2.6 had _strclass so we can't import it."""
return '%s.%s' % (cls.__module__, cls.__name__)
add_ie = getattr(self, self._testMethodName).add_ie
return '%s (%s)%s:' % (self._testMethodName,
strclass(self.__class__),
' [%s]' % add_ie if add_ie else '')
def setUp(self): def setUp(self):
self.defs = defs self.defs = defs
# Dynamically generate tests # Dynamically generate tests
def generator(test_case):
def generator(test_case, tname):
def test_template(self): def test_template(self):
ie = youtube_dl.extractor.get_info_extractor(test_case['name']) ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
@ -102,6 +118,7 @@ def generator(test_case):
return return
params = get_params(test_case.get('params', {})) params = get_params(test_case.get('params', {}))
params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case: if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist') params.setdefault('extract_flat', 'in_playlist')
params.setdefault('skip_download', True) params.setdefault('skip_download', True)
@ -134,7 +151,7 @@ def generator(test_case):
try_num = 1 try_num = 1
while True: while True:
try: try:
# We're not using .download here sine that is just a shim
# We're not using .download here since that is just a shim
# for outside error handling, and returns the exit code # for outside error handling, and returns the exit code
# instead of the result dict. # instead of the result dict.
res_dict = ydl.extract_info( res_dict = ydl.extract_info(
@ -146,7 +163,7 @@ def generator(test_case):
raise raise
if try_num == RETRIES: if try_num == RETRIES:
report_warning('Failed due to network errors, skipping...')
report_warning('%s failed due to network errors, skipping...' % tname)
return return
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
@ -182,7 +199,16 @@ def generator(test_case):
self.assertEqual( self.assertEqual(
test_case['playlist_duration_sum'], got_duration) test_case['playlist_duration_sum'], got_duration)
for tc in test_cases:
# Generalize both playlists and single videos to unified format for
# simplicity
if 'entries' not in res_dict:
res_dict['entries'] = [res_dict]
for tc_num, tc in enumerate(test_cases):
tc_res_dict = res_dict['entries'][tc_num]
# First, check test cases' data against extracted data alone
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
# Now, check downloaded file consistency
tc_filename = get_tc_filename(tc) tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False): if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
@ -199,14 +225,15 @@ def generator(test_case):
format_bytes(got_fsize))) format_bytes(got_fsize)))
if 'md5' in tc: if 'md5' in tc:
md5_for_file = _file_md5(tc_filename) md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
self.assertEqual(tc['md5'], md5_for_file)
# Finally, check test cases' data again but this time against
# extracted data from info JSON file written during processing
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue( self.assertTrue(
os.path.exists(info_json_fn), os.path.exists(info_json_fn),
'Missing info file %s' % info_json_fn) 'Missing info file %s' % info_json_fn)
with io.open(info_json_fn, encoding='utf-8') as infof: with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof) info_dict = json.load(infof)
expect_info_dict(self, info_dict, tc.get('info_dict', {})) expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally: finally:
try_rm_tcs_files() try_rm_tcs_files()
@ -221,13 +248,15 @@ def generator(test_case):
# And add them to TestDownload # And add them to TestDownload
for n, test_case in enumerate(defs): for n, test_case in enumerate(defs):
test_method = generator(test_case)
tname = 'test_' + str(test_case['name']) tname = 'test_' + str(test_case['name'])
i = 1 i = 1
while hasattr(TestDownload, tname): while hasattr(TestDownload, tname):
tname = 'test_%s_%d' % (test_case['name'], i) tname = 'test_%s_%d' % (test_case['name'], i)
i += 1 i += 1
test_method = generator(test_case, tname)
test_method.__name__ = str(tname) test_method.__name__ = str(tname)
ie_list = test_case.get('add_ie')
test_method.add_ie = ie_list and ','.join(ie_list)
setattr(TestDownload, test_method.__name__, test_method) setattr(TestDownload, test_method.__name__, test_method)
del test_method del test_method


+ 26
- 0
test/test_options.py View File

@ -0,0 +1,26 @@
# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.options import _hide_login_info
class TestOptions(unittest.TestCase):
def test_hide_login_info(self):
self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']),
['-u', 'PRIVATE', '-p', 'PRIVATE'])
self.assertEqual(_hide_login_info(['-u']), ['-u'])
self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']),
['-u', 'PRIVATE', '-u', 'PRIVATE'])
self.assertEqual(_hide_login_info(['--username=foo']),
['--username=PRIVATE'])
if __name__ == '__main__':
unittest.main()

+ 4
- 4
test/test_subtitles.py View File

@ -21,7 +21,7 @@ from youtube_dl.extractor import (
NPOIE, NPOIE,
ComedyCentralIE, ComedyCentralIE,
NRKTVIE, NRKTVIE,
RaiTVIE,
RaiPlayIE,
VikiIE, VikiIE,
ThePlatformIE, ThePlatformIE,
ThePlatformFeedIE, ThePlatformFeedIE,
@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2') self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
class TestRaiSubtitles(BaseTestSubtitles):
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiTVIE
class TestRaiPlaySubtitles(BaseTestSubtitles):
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiPlayIE
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True


+ 192
- 4
test/test_utils.py View File

@ -34,6 +34,9 @@ from youtube_dl.utils import (
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
get_element_by_class, get_element_by_class,
get_element_by_attribute,
get_elements_by_class,
get_elements_by_attribute,
InAdvancePagedList, InAdvancePagedList,
intlist_to_bytes, intlist_to_bytes,
is_html, is_html,
@ -41,6 +44,7 @@ from youtube_dl.utils import (
limit_length, limit_length,
mimetype2ext, mimetype2ext,
month_by_name, month_by_name,
multipart_encode,
ohdave_rsa_encrypt, ohdave_rsa_encrypt,
OnDemandPagedList, OnDemandPagedList,
orderedSet, orderedSet,
@ -49,9 +53,11 @@ from youtube_dl.utils import (
parse_filesize, parse_filesize,
parse_count, parse_count,
parse_iso8601, parse_iso8601,
pkcs1pad,
read_batch_urls, read_batch_urls,
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
expand_path,
prepend_extension, prepend_extension,
replace_extension, replace_extension,
remove_start, remove_start,
@ -91,6 +97,9 @@ from youtube_dl.utils import (
from youtube_dl.compat import ( from youtube_dl.compat import (
compat_chr, compat_chr,
compat_etree_fromstring, compat_etree_fromstring,
compat_getenv,
compat_os_name,
compat_setenv,
compat_urlparse, compat_urlparse,
compat_parse_qs, compat_parse_qs,
) )
@ -210,6 +219,18 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc')
def test_expand_path(self):
def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
self.assertEqual(expand_path('~'), compat_getenv('HOME'))
self.assertEqual(
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
'%s/expanded' % compat_getenv('HOME'))
def test_prepend_extension(self): def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@ -258,6 +279,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unescapeHTML('&#47;'), '/') self.assertEqual(unescapeHTML('&#47;'), '/')
self.assertEqual(unescapeHTML('&eacute;'), 'é') self.assertEqual(unescapeHTML('&eacute;'), 'é')
self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;') self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
# HTML5 entities # HTML5 entities
self.assertEqual(unescapeHTML('&period;&apos;'), '.\'') self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
@ -295,6 +317,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
def test_unified_timestamps(self): def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@ -316,6 +341,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
def test_determine_ext(self): def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@ -423,7 +451,9 @@ class TestUtil(unittest.TestCase):
def test_shell_quote(self): def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
self.assertEqual(
shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
def test_str_to_int(self): def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123,456'), 123456)
@ -448,6 +478,9 @@ class TestUtil(unittest.TestCase):
def test_urljoin(self): def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
@ -507,6 +540,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
def test_fix_xml_ampersands(self): def test_fix_xml_ampersands(self):
self.assertEqual( self.assertEqual(
@ -593,6 +628,16 @@ class TestUtil(unittest.TestCase):
'http://example.com/path', {'test': '第二行тест'})), 'http://example.com/path', {'test': '第二行тест'})),
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
def test_multipart_encode(self):
self.assertEqual(
multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
self.assertEqual(
multipart_encode({'欄位'.encode('utf-8'): ''.encode('utf-8')}, boundary='AAAAAA')[0],
b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
self.assertRaises(
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
def test_dict_get(self): def test_dict_get(self):
FALSE_VALUES = { FALSE_VALUES = {
'none': None, 'none': None,
@ -640,6 +685,14 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'}) self.assertEqual(d, {'status': 'success'})
stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
stripped = strip_jsonp('window.cb && cb({"status": "success"});')
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
def test_uppercase_escape(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '') self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
@ -781,12 +834,27 @@ class TestUtil(unittest.TestCase):
on = js_to_json('["abc", "def",]') on = js_to_json('["abc", "def",]')
self.assertEqual(json.loads(on), ['abc', 'def']) self.assertEqual(json.loads(on), ['abc', 'def'])
on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]')
self.assertEqual(json.loads(on), ['abc', 'def'])
on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]')
self.assertEqual(json.loads(on), ['abc', 'def'])
on = js_to_json('{"abc": "def",}') on = js_to_json('{"abc": "def",}')
self.assertEqual(json.loads(on), {'abc': 'def'}) self.assertEqual(json.loads(on), {'abc': 'def'})
on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}')
self.assertEqual(json.loads(on), {'abc': 'def'})
on = js_to_json('{ 0: /* " \n */ ",]" , }') on = js_to_json('{ 0: /* " \n */ ",]" , }')
self.assertEqual(json.loads(on), {'0': ',]'}) self.assertEqual(json.loads(on), {'0': ',]'})
on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }')
self.assertEqual(json.loads(on), {'0': ',]'})
on = js_to_json('{ 0: // comment\n1 }')
self.assertEqual(json.loads(on), {'0': 1})
on = js_to_json(r'["<p>x<\/p>"]') on = js_to_json(r'["<p>x<\/p>"]')
self.assertEqual(json.loads(on), ['<p>x</p>']) self.assertEqual(json.loads(on), ['<p>x</p>'])
@ -796,15 +864,27 @@ class TestUtil(unittest.TestCase):
on = js_to_json("['a\\\nb']") on = js_to_json("['a\\\nb']")
self.assertEqual(json.loads(on), ['ab']) self.assertEqual(json.loads(on), ['ab'])
on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/")
self.assertEqual(json.loads(on), ['ab'])
on = js_to_json('{0xff:0xff}') on = js_to_json('{0xff:0xff}')
self.assertEqual(json.loads(on), {'255': 255}) self.assertEqual(json.loads(on), {'255': 255})
on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}')
self.assertEqual(json.loads(on), {'255': 255})
on = js_to_json('{077:077}') on = js_to_json('{077:077}')
self.assertEqual(json.loads(on), {'63': 63}) self.assertEqual(json.loads(on), {'63': 63})
on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}')
self.assertEqual(json.loads(on), {'63': 63})
on = js_to_json('{42:42}') on = js_to_json('{42:42}')
self.assertEqual(json.loads(on), {'42': 42}) self.assertEqual(json.loads(on), {'42': 42})
on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
self.assertEqual(json.loads(on), {'42': 42})
def test_extract_attributes(self): def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
@ -842,10 +922,13 @@ class TestUtil(unittest.TestCase):
supports_outside_bmp = False supports_outside_bmp = False
if supports_outside_bmp: if supports_outside_bmp:
self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'}) self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
# Malformed HTML should not break attributes extraction on older Python
self.assertEqual(extract_attributes('<mal"formed/>'), {})
def test_clean_html(self): def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
def test_intlist_to_bytes(self): def test_intlist_to_bytes(self):
self.assertEqual( self.assertEqual(
@ -855,7 +938,7 @@ class TestUtil(unittest.TestCase):
def test_args_to_str(self): def test_args_to_str(self):
self.assertEqual( self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
'foo ba/r -baz \'2 be\' \'\''
'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
) )
def test_parse_filesize(self): def test_parse_filesize(self):
@ -983,7 +1066,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
<p begin="3" dur="-1">Ignored, three</p> <p begin="3" dur="-1">Ignored, three</p>
</div> </div>
</body> </body>
</tt>'''
</tt>'''.encode('utf-8')
srt_data = '''1 srt_data = '''1
00:00:00,000 --> 00:00:01,000 00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols The following line contains Chinese characters and special symbols
@ -1008,7 +1091,7 @@ Line
<p begin="0" end="1">The first line</p> <p begin="0" end="1">The first line</p>
</div> </div>
</body> </body>
</tt>'''
</tt>'''.encode('utf-8')
srt_data = '''1 srt_data = '''1
00:00:00,000 --> 00:00:01,000 00:00:00,000 --> 00:00:01,000
The first line The first line
@ -1016,6 +1099,67 @@ The first line
''' '''
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
<head>
<styling>
<style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" />
<style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" />
<style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" />
<style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" />
</styling>
</head>
<body tts:textAlign="center" style="s0">
<div>
<p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p>
<p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p>
<p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p>
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
</div>
</body>
</tt>'''.encode('utf-8')
srt_data = '''1
00:00:02,080 --> 00:00:05,839
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
2
00:00:02,080 --> 00:00:05,839
<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
</font>part 2</font></b>
3
00:00:05,839 --> 00:00:09,560
<u><font color="lime">line 3
part 3</font></u>
4
00:00:09,560 --> 00:00:12,359
<i><u><font color="yellow"><font color="lime">inner
</font>style</font></u></i>
'''
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
<body>
<div xml:lang="en">
<p begin="0" end="1">Line 1</p>
<p begin="1" end="2"></p>
</div>
</body>
</tt>'''.encode('utf-16')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
Line 1
2
00:00:01,000 --> 00:00:02,000
'''
self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
def test_cli_option(self): def test_cli_option(self):
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
@ -1061,6 +1205,10 @@ The first line
cli_bool_option( cli_bool_option(
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
['--check-certificate=true']) ['--check-certificate=true'])
self.assertEqual(
cli_bool_option(
{}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
[])
def test_ohdave_rsa_encrypt(self): def test_ohdave_rsa_encrypt(self):
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
@ -1070,6 +1218,14 @@ The first line
ohdave_rsa_encrypt(b'aa111222', e, N), ohdave_rsa_encrypt(b'aa111222', e, N),
'726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
def test_pkcs1pad(self):
data = [1, 2, 3]
padded_data = pkcs1pad(data, 32)
self.assertEqual(padded_data[:2], [0, 2])
self.assertEqual(padded_data[28:], [0, 1, 2, 3])
self.assertRaises(ValueError, pkcs1pad, data, 8)
def test_encode_base_n(self): def test_encode_base_n(self):
self.assertEqual(encode_base_n(0, 30), '0') self.assertEqual(encode_base_n(0, 30), '0')
self.assertEqual(encode_base_n(80, 30), '2k') self.assertEqual(encode_base_n(80, 30), '2k')
@ -1093,6 +1249,38 @@ The first line
self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('foo', html), 'nice')
self.assertEqual(get_element_by_class('no-such-class', html), None) self.assertEqual(get_element_by_class('no-such-class', html), None)
def test_get_element_by_attribute(self):
html = '''
<span class="foo bar">nice</span>
'''
self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
html = '''
<div itemprop="author" itemscope>foo</div>
'''
self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
def test_get_elements_by_class(self):
html = '''
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
'''
self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
self.assertEqual(get_elements_by_class('no-such-class', html), [])
def test_get_elements_by_attribute(self):
html = '''
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
'''
self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

+ 275
- 0
test/test_youtube_chapters.py View File

@ -0,0 +1,275 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import expect_value
from youtube_dl.extractor import YoutubeIE
class TestYoutubeChapters(unittest.TestCase):
_TEST_CASES = [
(
# https://www.youtube.com/watch?v=A22oy8dFjqc
# pattern: 00:00 - <title>
'''This is the absolute ULTIMATE experience of Queen's set at LIVE AID, this is the best video mixed to the absolutely superior stereo radio broadcast. This vastly superior audio mix takes a huge dump on all of the official mixes. Best viewed in 1080p. ENJOY! ***MAKE SURE TO READ THE DESCRIPTION***<br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+36);return false;">00:36</a> - Bohemian Rhapsody<br /><a href="#" onclick="yt.www.watch.player.seekTo(02*60+42);return false;">02:42</a> - Radio Ga Ga<br /><a href="#" onclick="yt.www.watch.player.seekTo(06*60+53);return false;">06:53</a> - Ay Oh!<br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+34);return false;">07:34</a> - Hammer To Fall<br /><a href="#" onclick="yt.www.watch.player.seekTo(12*60+08);return false;">12:08</a> - Crazy Little Thing Called Love<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+03);return false;">16:03</a> - We Will Rock You<br /><a href="#" onclick="yt.www.watch.player.seekTo(17*60+18);return false;">17:18</a> - We Are The Champions<br /><a href="#" onclick="yt.www.watch.player.seekTo(21*60+12);return false;">21:12</a> - Is This The World We Created...?<br /><br />Short song analysis:<br /><br />- "Bohemian Rhapsody": Although it's a short medley version, it's one of the best performances of the ballad section, with Freddie nailing the Bb4s with the correct studio phrasing (for the first time ever!).<br /><br />- "Radio Ga Ga": Although it's missing one chorus, this is one of - if not the best - the best versions ever, Freddie nails all the Bb4s and sounds very clean! Spike Edney's Roland Jupiter 8 also really shines through on this mix, compared to the DVD releases!<br /><br />- "Audience Improv": A great improv, Freddie sounds strong and confident. You gotta love when he sustains that A4 for 4 seconds!<br /><br />- "Hammer To Fall": Despite missing a verse and a chorus, it's a strong version (possibly the best ever). Freddie sings the song amazingly, and even ad-libs a C#5 and a C5! Also notice how heavy Brian's guitar sounds compared to the thin DVD mixes - it roars!<br /><br />- "Crazy Little Thing Called Love": A great version, the crowd loves the song, the jam is great as well! Only downside to this is the slight feedback issues.<br /><br />- "We Will Rock You": Although cut down to the 1st verse and chorus, Freddie sounds strong. He nails the A4, and the solo from Dr. May is brilliant!<br /><br />- "We Are the Champions": Perhaps the high-light of the performance - Freddie is very daring on this version, he sustains the pre-chorus Bb4s, nails the 1st C5, belts great A4s, but most importantly: He nails the chorus Bb4s, in all 3 choruses! This is the only time he has ever done so! It has to be said though, the last one sounds a bit rough, but that's a side effect of belting high notes for the past 18 minutes, with nodules AND laryngitis!<br /><br />- "Is This The World We Created... ?": Freddie and Brian perform a beautiful version of this, and it is one of the best versions ever. It's both sad and hilarious that a couple of BBC engineers are talking over the song, one of them being completely oblivious of the fact that he is interrupting the performance, on live television... Which was being televised to almost 2 billion homes.<br /><br /><br />All rights go to their respective owners!<br />-----Copyright Disclaimer Under Section 107 of the Copyright Act 1976, allowance is made for fair use for purposes such as criticism, comment, news reporting, teaching, scholarship, and research. Fair use is a use permitted by copyright statute that might otherwise be infringing. Non-profit, educational or personal use tips the balance in favor of fair use''',
1477,
[{
'start_time': 36,
'end_time': 162,
'title': 'Bohemian Rhapsody',
}, {
'start_time': 162,
'end_time': 413,
'title': 'Radio Ga Ga',
}, {
'start_time': 413,
'end_time': 454,
'title': 'Ay Oh!',
}, {
'start_time': 454,
'end_time': 728,
'title': 'Hammer To Fall',
}, {
'start_time': 728,
'end_time': 963,
'title': 'Crazy Little Thing Called Love',
}, {
'start_time': 963,
'end_time': 1038,
'title': 'We Will Rock You',
}, {
'start_time': 1038,
'end_time': 1272,
'title': 'We Are The Champions',
}, {
'start_time': 1272,
'end_time': 1477,
'title': 'Is This The World We Created...?',
}]
),
(
# https://www.youtube.com/watch?v=ekYlRhALiRQ
# pattern: <num>. <title> 0:00
'1. Those Beaten Paths of Confusion <a href="#" onclick="yt.www.watch.player.seekTo(0*60+00);return false;">0:00</a><br />2. Beyond the Shadows of Emptiness & Nothingness <a href="#" onclick="yt.www.watch.player.seekTo(11*60+47);return false;">11:47</a><br />3. Poison Yourself...With Thought <a href="#" onclick="yt.www.watch.player.seekTo(26*60+30);return false;">26:30</a><br />4. The Agents of Transformation <a href="#" onclick="yt.www.watch.player.seekTo(35*60+57);return false;">35:57</a><br />5. Drowning in the Pain of Consciousness <a href="#" onclick="yt.www.watch.player.seekTo(44*60+32);return false;">44:32</a><br />6. Deny the Disease of Life <a href="#" onclick="yt.www.watch.player.seekTo(53*60+07);return false;">53:07</a><br /><br />More info/Buy: http://crepusculonegro.storenvy.com/products/257645-cn-03-arizmenda-within-the-vacuum-of-infinity<br /><br />No copyright is intended. The rights to this video are assumed by the owner and its affiliates.',
4009,
[{
'start_time': 0,
'end_time': 707,
'title': '1. Those Beaten Paths of Confusion',
}, {
'start_time': 707,
'end_time': 1590,
'title': '2. Beyond the Shadows of Emptiness & Nothingness',
}, {
'start_time': 1590,
'end_time': 2157,
'title': '3. Poison Yourself...With Thought',
}, {
'start_time': 2157,
'end_time': 2672,
'title': '4. The Agents of Transformation',
}, {
'start_time': 2672,
'end_time': 3187,
'title': '5. Drowning in the Pain of Consciousness',
}, {
'start_time': 3187,
'end_time': 4009,
'title': '6. Deny the Disease of Life',
}]
),
(
# https://www.youtube.com/watch?v=WjL4pSzog9w
# pattern: 00:00 <title>
'<a href="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" class="yt-uix-servicelink " data-target-new-window="True" data-servicelink="CDAQ6TgYACITCNf1raqT2dMCFdRjGAod_o0CBSj4HQ" data-url="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" rel="nofollow noopener" target="_blank">https://arizmenda.bandcamp.com/merch/...</a><br /><br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+00);return false;">00:00</a> Christening Unborn Deformities <br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+08);return false;">07:08</a> Taste of Purity<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+16);return false;">16:16</a> Sculpting Sins of a Universal Tongue<br /><a href="#" onclick="yt.www.watch.player.seekTo(24*60+45);return false;">24:45</a> Birth<br /><a href="#" onclick="yt.www.watch.player.seekTo(31*60+24);return false;">31:24</a> Neves<br /><a href="#" onclick="yt.www.watch.player.seekTo(37*60+55);return false;">37:55</a> Libations in Limbo',
2705,
[{
'start_time': 0,
'end_time': 428,
'title': 'Christening Unborn Deformities',
}, {
'start_time': 428,
'end_time': 976,
'title': 'Taste of Purity',
}, {
'start_time': 976,
'end_time': 1485,
'title': 'Sculpting Sins of a Universal Tongue',
}, {
'start_time': 1485,
'end_time': 1884,
'title': 'Birth',
}, {
'start_time': 1884,
'end_time': 2275,
'title': 'Neves',
}, {
'start_time': 2275,
'end_time': 2705,
'title': 'Libations in Limbo',
}]
),
(
# https://www.youtube.com/watch?v=o3r1sn-t3is
# pattern: <title> 00:00 <note>
'Download this show in MP3: <a href="http://sh.st/njZKK" class="yt-uix-servicelink " data-url="http://sh.st/njZKK" data-target-new-window="True" data-servicelink="CDAQ6TgYACITCK3j8_6o2dMCFVDCGAoduVAKKij4HQ" rel="nofollow noopener" target="_blank">http://sh.st/njZKK</a><br /><br />Setlist:<br />I-E-A-I-A-I-O <a href="#" onclick="yt.www.watch.player.seekTo(00*60+45);return false;">00:45</a><br />Suite-Pee <a href="#" onclick="yt.www.watch.player.seekTo(4*60+26);return false;">4:26</a> (Incomplete)<br />Attack <a href="#" onclick="yt.www.watch.player.seekTo(5*60+31);return false;">5:31</a> (First live performance since 2011)<br />Prison Song <a href="#" onclick="yt.www.watch.player.seekTo(8*60+42);return false;">8:42</a><br />Know <a href="#" onclick="yt.www.watch.player.seekTo(12*60+32);return false;">12:32</a> (First live performance since 2011)<br />Aerials <a href="#" onclick="yt.www.watch.player.seekTo(15*60+32);return false;">15:32</a><br />Soldier Side - Intro <a href="#" onclick="yt.www.watch.player.seekTo(19*60+13);return false;">19:13</a><br />B.Y.O.B. <a href="#" onclick="yt.www.watch.player.seekTo(20*60+09);return false;">20:09</a><br />Soil <a href="#" onclick="yt.www.watch.player.seekTo(24*60+32);return false;">24:32</a><br />Darts <a href="#" onclick="yt.www.watch.player.seekTo(27*60+48);return false;">27:48</a><br />Radio/Video <a href="#" onclick="yt.www.watch.player.seekTo(30*60+38);return false;">30:38</a><br />Hypnotize <a href="#" onclick="yt.www.watch.player.seekTo(35*60+05);return false;">35:05</a><br />Temper <a href="#" onclick="yt.www.watch.player.seekTo(38*60+08);return false;">38:08</a> (First live performance since 1999)<br />CUBErt <a href="#" onclick="yt.www.watch.player.seekTo(41*60+00);return false;">41:00</a><br />Needles <a href="#" onclick="yt.www.watch.player.seekTo(42*60+57);return false;">42:57</a><br />Deer Dance <a href="#" onclick="yt.www.watch.player.seekTo(46*60+27);return false;">46:27</a><br />Bounce <a href="#" onclick="yt.www.watch.player.seekTo(49*60+38);return false;">49:38</a><br />Suggestions <a href="#" onclick="yt.www.watch.player.seekTo(51*60+25);return false;">51:25</a><br />Psycho <a href="#" onclick="yt.www.watch.player.seekTo(53*60+52);return false;">53:52</a><br />Chop Suey! <a href="#" onclick="yt.www.watch.player.seekTo(58*60+13);return false;">58:13</a><br />Lonely Day <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+01*60+15);return false;">1:01:15</a><br />Question! <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+04*60+14);return false;">1:04:14</a><br />Lost in Hollywood <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+08*60+10);return false;">1:08:10</a><br />Vicinity of Obscenity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+13*60+40);return false;">1:13:40</a>(First live performance since 2012)<br />Forest <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+16*60+17);return false;">1:16:17</a><br />Cigaro <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+20*60+02);return false;">1:20:02</a><br />Toxicity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+23*60+57);return false;">1:23:57</a>(with Chino Moreno)<br />Sugar <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+27*60+53);return false;">1:27:53</a>',
5640,
[{
'start_time': 45,
'end_time': 266,
'title': 'I-E-A-I-A-I-O',
}, {
'start_time': 266,
'end_time': 331,
'title': 'Suite-Pee (Incomplete)',
}, {
'start_time': 331,
'end_time': 522,
'title': 'Attack (First live performance since 2011)',
}, {
'start_time': 522,
'end_time': 752,
'title': 'Prison Song',
}, {
'start_time': 752,
'end_time': 932,
'title': 'Know (First live performance since 2011)',
}, {
'start_time': 932,
'end_time': 1153,
'title': 'Aerials',
}, {
'start_time': 1153,
'end_time': 1209,
'title': 'Soldier Side - Intro',
}, {
'start_time': 1209,
'end_time': 1472,
'title': 'B.Y.O.B.',
}, {
'start_time': 1472,
'end_time': 1668,
'title': 'Soil',
}, {
'start_time': 1668,
'end_time': 1838,
'title': 'Darts',
}, {
'start_time': 1838,
'end_time': 2105,
'title': 'Radio/Video',
}, {
'start_time': 2105,
'end_time': 2288,
'title': 'Hypnotize',
}, {
'start_time': 2288,
'end_time': 2460,
'title': 'Temper (First live performance since 1999)',
}, {
'start_time': 2460,
'end_time': 2577,
'title': 'CUBErt',
}, {
'start_time': 2577,
'end_time': 2787,
'title': 'Needles',
}, {
'start_time': 2787,
'end_time': 2978,
'title': 'Deer Dance',
}, {
'start_time': 2978,
'end_time': 3085,
'title': 'Bounce',
}, {
'start_time': 3085,
'end_time': 3232,
'title': 'Suggestions',
}, {
'start_time': 3232,
'end_time': 3493,
'title': 'Psycho',
}, {
'start_time': 3493,
'end_time': 3675,
'title': 'Chop Suey!',
}, {
'start_time': 3675,
'end_time': 3854,
'title': 'Lonely Day',
}, {
'start_time': 3854,
'end_time': 4090,
'title': 'Question!',
}, {
'start_time': 4090,
'end_time': 4420,
'title': 'Lost in Hollywood',
}, {
'start_time': 4420,
'end_time': 4577,
'title': 'Vicinity of Obscenity (First live performance since 2012)',
}, {
'start_time': 4577,
'end_time': 4802,
'title': 'Forest',
}, {
'start_time': 4802,
'end_time': 5037,
'title': 'Cigaro',
}, {
'start_time': 5037,
'end_time': 5273,
'title': 'Toxicity (with Chino Moreno)',
}, {
'start_time': 5273,
'end_time': 5640,
'title': 'Sugar',
}]
),
(
# https://www.youtube.com/watch?v=PkYLQbsqCE8
# pattern: <num> - <title> [<latinized title>] 0:00:00
'''Затемно (Zatemno) is an Obscure Black Metal Band from Russia.<br /><br />"Во прах (Vo prakh)'' Into The Ashes", Debut mini-album released may 6, 2016, by Death Knell Productions<br />Released on 6 panel digipak CD, limited to 100 copies only<br />And digital format on Bandcamp<br /><br />Tracklist<br /><br />1 - Во прах [Vo prakh] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;">0:00:00</a><br />2 - Искупление [Iskupleniye] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+08*60+10);return false;">0:08:10</a><br />3 - Из серпов луны...[Iz serpov luny] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+14*60+30);return false;">0:14:30</a><br /><br />Links:<br /><a href="https://deathknellprod.bandcamp.com/album/--2" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://deathknellprod.bandcamp.com/album/--2" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://deathknellprod.bandcamp.com/a...</a><br /><a href="https://www.facebook.com/DeathKnellProd/" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://www.facebook.com/DeathKnellProd/" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://www.facebook.com/DeathKnellProd/</a><br /><br /><br />I don't have any right about this artifact, my only intention is to spread the music of the band, all rights are reserved to the Затемно (Zatemno) and his producers, Death Knell Productions.<br /><br />------------------------------------------------------------------<br /><br />Subscribe for more videos like this.<br />My link: <a href="https://web.facebook.com/AttackOfTheDragons" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://web.facebook.com/AttackOfTheDragons" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://web.facebook.com/AttackOfTheD...</a>''',
1138,
[{
'start_time': 0,
'end_time': 490,
'title': '1 - Во прах [Vo prakh]',
}, {
'start_time': 490,
'end_time': 870,
'title': '2 - Искупление [Iskupleniye]',
}, {
'start_time': 870,
'end_time': 1138,
'title': '3 - Из серпов луны...[Iz serpov luny]',
}]
),
(
# https://www.youtube.com/watch?v=xZW70zEasOk
# time point more than duration
'''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
283,
[]
),
]
def test_youtube_chapters(self):
for description, duration, expected_chapters in self._TEST_CASES:
ie = YoutubeIE()
expect_value(
self, ie._extract_chapters(description, duration),
expected_chapters, None)
if __name__ == '__main__':
unittest.main()

+ 10
- 0
test/testdata/f4m/custom_base_url.f4m View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest xmlns="http://ns.adobe.com/f4m/1.0">
<streamType>recorded</streamType>
<baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
<duration>269.293</duration>
<bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
<media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
<metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
</media>
</manifest>

+ 14
- 0
test/testdata/m3u8/pluzz_francetv_11507.m3u8 View File

@ -0,0 +1,14 @@
#EXTM3U
#EXT-X-VERSION:5
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2"
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0

+ 16
- 0
test/testdata/m3u8/teamcoco_11995.m3u8 View File

@ -0,0 +1,16 @@
#EXTM3U
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
hls/CONAN_020217_Highlight_show-2m_v4.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
hls/CONAN_020217_Highlight_show-1m_v4.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
hls/CONAN_020217_Highlight_show-400k_v4.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
hls/CONAN_020217_Highlight_show-400k_v4.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8

+ 13
- 0
test/testdata/m3u8/toggle_mobile_12211.m3u8 View File

@ -0,0 +1,13 @@
#EXTM3U
#EXT-X-VERSION:4
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8

+ 20
- 0
test/testdata/m3u8/twitch_vod.m3u8 View File

@ -0,0 +1,20 @@
#EXTM3U
#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8

+ 10
- 0
test/testdata/m3u8/vidio.m3u8 View File

@ -0,0 +1,10 @@
#EXTM3U
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G"
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD"
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD"
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8

+ 18
- 0
test/testdata/mpd/float_duration.mpd View File

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
<Period bitstreamSwitching="true">
<AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
<Representation id="318597" bandwidth="61587"></Representation>
</AdaptationSet>
<AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
<Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
<Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
<Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
<Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
<Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
<Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
</AdaptationSet>
</Period>
</MPD>

+ 218
- 0
test/testdata/mpd/urls_only.mpd View File

@ -0,0 +1,218 @@
<?xml version="1.0" ?>
<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
<Period duration="PT0H4M1.728S">
<AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
<ContentComponent contentType="video" id="1"/>
<Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
</AdaptationSet>
</Period>
</MPD>

+ 291
- 85
youtube_dl/YoutubeDL.py View File

@ -24,14 +24,17 @@ import sys
import time import time
import tokenize import tokenize
import traceback import traceback
import random
from string import ascii_letters
from .compat import ( from .compat import (
compat_basestring, compat_basestring,
compat_cookiejar, compat_cookiejar,
compat_expanduser,
compat_get_terminal_size, compat_get_terminal_size,
compat_http_client, compat_http_client,
compat_kwargs, compat_kwargs,
compat_numeric_types,
compat_os_name, compat_os_name,
compat_str, compat_str,
compat_tokenize_tokenize, compat_tokenize_tokenize,
@ -52,12 +55,17 @@ from .utils import (
encode_compat_str, encode_compat_str,
encodeFilename, encodeFilename,
error_to_compat_str, error_to_compat_str,
expand_path,
ExtractorError, ExtractorError,
format_bytes, format_bytes,
formatSeconds, formatSeconds,
GeoRestrictedError,
int_or_none,
ISO3166Utils,
locked_file, locked_file,
make_HTTPS_handler, make_HTTPS_handler,
MaxDownloadsReached, MaxDownloadsReached,
orderedSet,
PagedList, PagedList,
parse_filesize, parse_filesize,
PerRequestProxyHandler, PerRequestProxyHandler,
@ -85,6 +93,7 @@ from .utils import (
) )
from .cache import Cache from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
from .extractor.openload import PhantomJSwrapper
from .downloader import get_suitable_downloader from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
from .postprocessor import ( from .postprocessor import (
@ -159,6 +168,7 @@ class YoutubeDL(object):
playlistend: Playlist item to end at. playlistend: Playlist item to end at.
playlist_items: Specific indices of playlist to download. playlist_items: Specific indices of playlist to download.
playlistreverse: Download playlist items in reverse order. playlistreverse: Download playlist items in reverse order.
playlistrandom: Download playlist items in random order.
matchtitle: Download only matching titles. matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles. rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance. logger: Log messages to a logging.Logger instance.
@ -270,6 +280,12 @@ class YoutubeDL(object):
If it returns None, the video is downloaded. If it returns None, the video is downloaded.
match_filter_func in utils.py is one example for this. match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output. no_color: Do not emit color codes in output.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
HTTP header (experimental)
geo_bypass_country:
Two-letter ISO 3166-2 country code that will be used for
explicit geographic restriction bypassing via faking
X-Forwarded-For HTTP header (experimental)
The following options determine which downloader is picked: The following options determine which downloader is picked:
external_downloader: Executable of the external downloader to call. external_downloader: Executable of the external downloader to call.
@ -289,8 +305,25 @@ class YoutubeDL(object):
otherwise prefer avconv. otherwise prefer avconv.
postprocessor_args: A list of additional command-line arguments for the postprocessor_args: A list of additional command-line arguments for the
postprocessor. postprocessor.
The following options are used by the Youtube extractor:
youtube_include_dash_manifest: If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about DASH.
""" """
_NUMERIC_FIELDS = set((
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
'timestamp', 'upload_year', 'upload_month', 'upload_day',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'average_rating', 'comment_count', 'age_limit',
'start_time', 'end_time',
'chapter_number', 'season_number', 'episode_number',
'track_number', 'disc_number', 'release_year',
'playlist_index',
))
params = None params = None
_ies = [] _ies = []
_pps = [] _pps = []
@ -317,11 +350,21 @@ class YoutubeDL(object):
self.params.update(params) self.params.update(params)
self.cache = Cache(self) self.cache = Cache(self)
if self.params.get('cn_verification_proxy') is not None:
self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
self.report_warning(
'%s is deprecated. Use %s instead.' % (option, suggestion))
return True
return False
if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
if self.params.get('geo_verification_proxy') is None: if self.params.get('geo_verification_proxy') is None:
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
if params.get('bidi_workaround', False): if params.get('bidi_workaround', False):
try: try:
import pty import pty
@ -349,10 +392,10 @@ class YoutubeDL(object):
else: else:
raise raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and
if (sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
not params.get('restrictfilenames', False)): not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
# Unicode filesystem API will throw errors (#1474, #13027)
self.report_warning( self.report_warning(
'Assuming --restrict-filenames since file system encoding ' 'Assuming --restrict-filenames since file system encoding '
'cannot encode all characters. ' 'cannot encode all characters. '
@ -477,24 +520,25 @@ class YoutubeDL(object):
def to_console_title(self, message): def to_console_title(self, message):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
if compat_os_name == 'nt':
if ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ: elif 'TERM' in os.environ:
self._write_string('\033]0;%s\007' % message, self._screen_file) self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self): def save_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if 'TERM' in os.environ:
if compat_os_name != 'nt' and 'TERM' in os.environ:
# Save the title on stack # Save the title on stack
self._write_string('\033[22;0t', self._screen_file) self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self): def restore_console_title(self):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if 'TERM' in os.environ:
if compat_os_name != 'nt' and 'TERM' in os.environ:
# Restore the title from stack # Restore the title from stack
self._write_string('\033[23;0t', self._screen_file) self._write_string('\033[23;0t', self._screen_file)
@ -583,10 +627,7 @@ class YoutubeDL(object):
autonumber_size = self.params.get('autonumber_size') autonumber_size = self.params.get('autonumber_size')
if autonumber_size is None: if autonumber_size is None:
autonumber_size = 5 autonumber_size = 5
autonumber_templ = '%0' + str(autonumber_size) + 'd'
template_dict['autonumber'] = autonumber_templ % self._num_downloads
if template_dict.get('playlist_index') is not None:
template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
if template_dict.get('resolution') is None: if template_dict.get('resolution') is None:
if template_dict.get('width') and template_dict.get('height'): if template_dict.get('width') and template_dict.get('height'):
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
@ -598,15 +639,64 @@ class YoutubeDL(object):
sanitize = lambda k, v: sanitize_filename( sanitize = lambda k, v: sanitize_filename(
compat_str(v), compat_str(v),
restricted=self.params.get('restrictfilenames'), restricted=self.params.get('restrictfilenames'),
is_id=(k == 'id'))
template_dict = dict((k, sanitize(k, v))
is_id=(k == 'id' or k.endswith('_id')))
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items() for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict))) if v is not None and not isinstance(v, (list, tuple, dict)))
template_dict = collections.defaultdict(lambda: 'NA', template_dict) template_dict = collections.defaultdict(lambda: 'NA', template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
# For fields playlist_index and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = {
'playlist_index': len(str(template_dict['n_entries'])),
'autonumber': autonumber_size,
}
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
if mobj:
outtmpl = re.sub(
FIELD_SIZE_COMPAT_RE,
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
outtmpl)
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
# string 'NA' is returned for missing fields. We will patch output
# template for missing fields to meet string presentation type.
for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
FORMAT_RE = r'''(?x)
(?<!%)
%
\({0}\) # mapping key
(?:[#0\-+ ]+)? # conversion flags (optional)
(?:\d+)? # minimum field width (optional)
(?:\.\d+)? # precision (optional)
[hlL]? # length modifier (optional)
[diouxXeEfFgGcrs%] # conversion type
'''
outtmpl = re.sub(
FORMAT_RE.format(numeric_field),
r'%({0})s'.format(numeric_field), outtmpl)
# expand_path translates '%%' into '%' and '$$' into '$'
# correspondingly that is not what we want since we need to keep
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
# outtmpl should be expand_path'ed before template dict substitution
# because meta fields may contain env variables we don't want to
# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
# title "Hello $PATH", we don't want `$PATH` to be expanded.
filename = expand_path(outtmpl).replace(sep, '') % template_dict
# Temporary fix for #4787 # Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding # 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows # to workaround encoding issues with subprocess on python2 @ Windows
@ -705,6 +795,14 @@ class YoutubeDL(object):
return self.process_ie_result(ie_result, download, extra_info) return self.process_ie_result(ie_result, download, extra_info)
else: else:
return ie_result return ie_result
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
msg += '\nThis video is available in %s.' % ', '.join(
map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
break
except ExtractorError as e: # An error we somewhat expected except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback()) self.report_error(compat_str(e), e.format_traceback())
break break
@ -762,19 +860,32 @@ class YoutubeDL(object):
ie_result['url'], ie_key=ie_result.get('ie_key'), ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False) extra_info=extra_info, download=False, process=False)
# extract_info may return None when ignoreerrors is enabled and
# extraction failed with an error, don't crash and return early
# in this case
if not info:
return info
force_properties = dict( force_properties = dict(
(k, v) for k, v in ie_result.items() if v is not None) (k, v) for k, v in ie_result.items() if v is not None)
for f in ('_type', 'url', 'ie_key'):
for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
if f in force_properties: if f in force_properties:
del force_properties[f] del force_properties[f]
new_result = info.copy() new_result = info.copy()
new_result.update(force_properties) new_result.update(force_properties)
assert new_result.get('_type') != 'url_transparent'
# Extracted info may not be a video result (i.e.
# info.get('_type', 'video') != video) but rather an url or
# url_transparent. In such cases outer metadata (from ie_result)
# should be propagated to inner one (info). For this to happen
# _type of info should be overridden with url_transparent. This
# fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
if new_result.get('_type') == 'url':
new_result['_type'] = 'url_transparent'
return self.process_ie_result( return self.process_ie_result(
new_result, download=download, extra_info=extra_info) new_result, download=download, extra_info=extra_info)
elif result_type == 'playlist' or result_type == 'multi_video':
elif result_type in ('playlist', 'multi_video'):
# We process each entry in the playlist # We process each entry in the playlist
playlist = ie_result.get('title') or ie_result.get('id') playlist = ie_result.get('title') or ie_result.get('id')
self.to_screen('[download] Downloading playlist: %s' % playlist) self.to_screen('[download] Downloading playlist: %s' % playlist)
@ -798,15 +909,25 @@ class YoutubeDL(object):
yield int(item) yield int(item)
else: else:
yield int(string_segment) yield int(string_segment)
playlistitems = iter_playlistitems(playlistitems_str)
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries'] ie_entries = ie_result['entries']
def make_playlistitems_entries(list_ie_entries):
num_entries = len(list_ie_entries)
return [
list_ie_entries[i - 1] for i in playlistitems
if -num_entries <= i - 1 < num_entries]
def report_download(num_entries):
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, num_entries))
if isinstance(ie_entries, list): if isinstance(ie_entries, list):
n_all_entries = len(ie_entries) n_all_entries = len(ie_entries)
if playlistitems: if playlistitems:
entries = [
ie_entries[i - 1] for i in playlistitems
if -n_all_entries <= i - 1 < n_all_entries]
entries = make_playlistitems_entries(ie_entries)
else: else:
entries = ie_entries[playliststart:playlistend] entries = ie_entries[playliststart:playlistend]
n_entries = len(entries) n_entries = len(entries)
@ -824,31 +945,38 @@ class YoutubeDL(object):
entries = ie_entries.getslice( entries = ie_entries.getslice(
playliststart, playlistend) playliststart, playlistend)
n_entries = len(entries) n_entries = len(entries)
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, n_entries))
report_download(n_entries)
else: # iterable else: # iterable
if playlistitems: if playlistitems:
entry_list = list(ie_entries)
entries = [entry_list[i - 1] for i in playlistitems]
entries = make_playlistitems_entries(list(itertools.islice(
ie_entries, 0, max(playlistitems))))
else: else:
entries = list(itertools.islice( entries = list(itertools.islice(
ie_entries, playliststart, playlistend)) ie_entries, playliststart, playlistend))
n_entries = len(entries) n_entries = len(entries)
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, n_entries))
report_download(n_entries)
if self.params.get('playlistreverse', False): if self.params.get('playlistreverse', False):
entries = entries[::-1] entries = entries[::-1]
if self.params.get('playlistrandom', False):
random.shuffle(entries)
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
for i, entry in enumerate(entries, 1): for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
# This __x_forwarded_for_ip thing is a bit ugly but requires
# minimal changes
if x_forwarded_for:
entry['__x_forwarded_for_ip'] = x_forwarded_for
extra = { extra = {
'n_entries': n_entries, 'n_entries': n_entries,
'playlist': playlist, 'playlist': playlist,
'playlist_id': ie_result.get('id'), 'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'), 'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_index': i + playliststart, 'playlist_index': i + playliststart,
'extractor': ie_result['extractor'], 'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'], 'webpage_url': ie_result['webpage_url'],
@ -952,6 +1080,30 @@ class YoutubeDL(object):
return op(actual_value, comparison_value) return op(actual_value, comparison_value)
return _filter return _filter
def _default_format_spec(self, info_dict, download=True):
def can_merge():
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
def prefer_best():
if self.params.get('simulate', False):
return False
if not download:
return False
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return True
if info_dict.get('is_live'):
return True
if not can_merge():
return True
return False
req_format_list = ['bestvideo+bestaudio', 'best']
if prefer_best():
req_format_list.reverse()
return '/'.join(req_format_list)
def build_format_selector(self, format_spec): def build_format_selector(self, format_spec):
def syntax_error(note, start): def syntax_error(note, start):
message = ( message = (
@ -1228,6 +1380,11 @@ class YoutubeDL(object):
if cookies: if cookies:
res['Cookie'] = cookies res['Cookie'] = cookies
if 'X-Forwarded-For' not in res:
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
if x_forwarded_for_ip:
res['X-Forwarded-For'] = x_forwarded_for_ip
return res return res
def _calc_cookies(self, info_dict): def _calc_cookies(self, info_dict):
@ -1243,9 +1400,28 @@ class YoutubeDL(object):
if 'title' not in info_dict: if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result') raise ExtractorError('Missing "title" field in extractor result')
if not isinstance(info_dict['id'], compat_str):
self.report_warning('"id" field is not a string - forcing string conversion')
info_dict['id'] = compat_str(info_dict['id'])
def report_force_conversion(field, field_not, conversion):
self.report_warning(
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
% (field, field_not, conversion))
def sanitize_string_field(info, string_field):
field = info.get(string_field)
if field is None or isinstance(field, compat_str):
return
report_force_conversion(string_field, 'a string', 'string')
info[string_field] = compat_str(field)
def sanitize_numeric_fields(info):
for numeric_field in self._NUMERIC_FIELDS:
field = info.get(numeric_field)
if field is None or isinstance(field, compat_numeric_types):
continue
report_force_conversion(numeric_field, 'numeric', 'int')
info[numeric_field] = int_or_none(field)
sanitize_string_field(info_dict, 'id')
sanitize_numeric_fields(info_dict)
if 'playlist' not in info_dict: if 'playlist' not in info_dict:
# It isn't part of a playlist # It isn't part of a playlist
@ -1326,16 +1502,28 @@ class YoutubeDL(object):
if not formats: if not formats:
raise ExtractorError('No video formats found!') raise ExtractorError('No video formats found!')
def is_wellformed(f):
url = f.get('url')
if not url:
self.report_warning(
'"url" field is missing or empty - skipping format, '
'there is an error in extractor')
return False
if isinstance(url, bytes):
sanitize_string_field(f, 'url')
return True
# Filter out malformed formats for better extraction robustness
formats = list(filter(is_wellformed, formats))
formats_dict = {} formats_dict = {}
# We check that all the formats have the format and format_id fields # We check that all the formats have the format and format_id fields
for i, format in enumerate(formats): for i, format in enumerate(formats):
if 'url' not in format:
raise ExtractorError('Missing "url" key in result (index %d)' % i)
sanitize_string_field(format, 'format_id')
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url']) format['url'] = sanitize_url(format['url'])
if format.get('format_id') is None:
if not format.get('format_id'):
format['format_id'] = compat_str(i) format['format_id'] = compat_str(i)
else: else:
# Sanitize format_id from characters used in format selector expression # Sanitize format_id from characters used in format selector expression
@ -1363,13 +1551,16 @@ class YoutubeDL(object):
format['ext'] = determine_ext(format['url']).lower() format['ext'] = determine_ext(format['url']).lower()
# Automatically determine protocol if missing (useful for format # Automatically determine protocol if missing (useful for format
# selection purposes) # selection purposes)
if 'protocol' not in format:
if format.get('protocol') is None:
format['protocol'] = determine_protocol(format) format['protocol'] = determine_protocol(format)
# Add HTTP headers, so that external programs can use them from the # Add HTTP headers, so that external programs can use them from the
# json output # json output
full_format_info = info_dict.copy() full_format_info = info_dict.copy()
full_format_info.update(format) full_format_info.update(format)
format['http_headers'] = self._calc_headers(full_format_info) format['http_headers'] = self._calc_headers(full_format_info)
# Remove private housekeeping stuff
if '__x_forwarded_for_ip' in info_dict:
del info_dict['__x_forwarded_for_ip']
# TODO Central sorting goes here # TODO Central sorting goes here
@ -1385,14 +1576,10 @@ class YoutubeDL(object):
req_format = self.params.get('format') req_format = self.params.get('format')
if req_format is None: if req_format is None:
req_format_list = []
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
not info_dict.get('is_live')):
merger = FFmpegMergerPP(self)
if merger.available and merger.can_merge():
req_format_list.append('bestvideo+bestaudio')
req_format_list.append('best')
req_format = '/'.join(req_format_list)
req_format = self._default_format_spec(info_dict, download=download)
if self.params.get('verbose'):
self.to_stdout('[debug] Default format spec: %s' % req_format)
format_selector = self.build_format_selector(req_format) format_selector = self.build_format_selector(req_format)
# While in format selection we may need to have an access to the original # While in format selection we may need to have an access to the original
@ -1544,12 +1731,17 @@ class YoutubeDL(object):
if filename is None: if filename is None:
return return
try:
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + error_to_compat_str(err))
def ensure_dir_exists(path):
try:
dn = os.path.dirname(path)
if dn and not os.path.exists(dn):
os.makedirs(dn)
return True
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + error_to_compat_str(err))
return False
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
return return
if self.params.get('writedescription', False): if self.params.get('writedescription', False):
@ -1592,29 +1784,30 @@ class YoutubeDL(object):
ie = self.get_info_extractor(info_dict['extractor_key']) ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items(): for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext'] sub_format = sub_info['ext']
if sub_info.get('data') is not None:
sub_data = sub_info['data']
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else: else:
try:
sub_data = ie._download_webpage(
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/rg3/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
else: else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
# Use newline='' to prevent conversion of newline characters
# See https://github.com/rg3/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
try:
sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
@ -1687,8 +1880,11 @@ class YoutubeDL(object):
for f in requested_formats: for f in requested_formats:
new_info = dict(info_dict) new_info = dict(info_dict)
new_info.update(f) new_info.update(f)
fname = self.prepare_filename(new_info)
fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
fname = prepend_extension(
self.prepare_filename(new_info),
'f%s' % f['format_id'], new_info['ext'])
if not ensure_dir_exists(fname):
return
downloaded.append(fname) downloaded.append(fname)
partial_success = dl(fname, new_info) partial_success = dl(fname, new_info)
success = success and partial_success success = success and partial_success
@ -1755,7 +1951,7 @@ class YoutubeDL(object):
info_dict.get('protocol') == 'm3u8' and info_dict.get('protocol') == 'm3u8' and
self.params.get('hls_prefer_native')): self.params.get('hls_prefer_native')):
if fixup_policy == 'warn': if fixup_policy == 'warn':
self.report_warning('%s: malformated aac bitstream.' % (
self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id'])) info_dict['id']))
elif fixup_policy == 'detect_or_warn': elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self) fixup_pp = FFmpegFixupM3u8PP(self)
@ -1764,7 +1960,7 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(fixup_pp) info_dict['__postprocessors'].append(fixup_pp)
else: else:
self.report_warning( self.report_warning(
'%s: malformated aac bitstream. %s'
'%s: malformed AAC bitstream detected. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else: else:
assert fixup_policy in ('ignore', 'never') assert fixup_policy in ('ignore', 'never')
@ -1780,6 +1976,7 @@ class YoutubeDL(object):
"""Download a given list of URLs.""" """Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and if (len(url_list) > 1 and
outtmpl != '-' and
'%' not in outtmpl and '%' not in outtmpl and
self.params.get('max_downloads') != 1): self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl) raise SameFileError(outtmpl)
@ -2036,11 +2233,20 @@ class YoutubeDL(object):
sys.exc_clear() sys.exc_clear()
except Exception: except Exception:
pass pass
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
def python_implementation():
impl_name = platform.python_implementation()
if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
return impl_name
self._write_string('[debug] Python version %s (%s) - %s\n' % (
platform.python_version(), python_implementation(),
platform_name()))
exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join( exe_str = ', '.join(
'%s %s' % (exe, v) '%s %s' % (exe, v)
for exe, v in sorted(exe_versions.items()) for exe, v in sorted(exe_versions.items())
@ -2077,7 +2283,7 @@ class YoutubeDL(object):
if opts_cookiefile is None: if opts_cookiefile is None:
self.cookiejar = compat_cookiejar.CookieJar() self.cookiejar = compat_cookiejar.CookieJar()
else: else:
opts_cookiefile = compat_expanduser(opts_cookiefile)
opts_cookiefile = expand_path(opts_cookiefile)
self.cookiejar = compat_cookiejar.MozillaCookieJar( self.cookiejar = compat_cookiejar.MozillaCookieJar(
opts_cookiefile) opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK): if os.access(opts_cookiefile, os.R_OK):


+ 30
- 9
youtube_dl/__init__.py View File

@ -16,7 +16,6 @@ from .options import (
parseOpts, parseOpts,
) )
from .compat import ( from .compat import (
compat_expanduser,
compat_getpass, compat_getpass,
compat_shlex_split, compat_shlex_split,
workaround_optparse_bug9161, workaround_optparse_bug9161,
@ -26,6 +25,7 @@ from .utils import (
decodeOption, decodeOption,
DEFAULT_OUTTMPL, DEFAULT_OUTTMPL,
DownloadError, DownloadError,
expand_path,
match_filter_func, match_filter_func,
MaxDownloadsReached, MaxDownloadsReached,
preferredencoding, preferredencoding,
@ -88,7 +88,7 @@ def _real_main(argv=None):
batchfd = sys.stdin batchfd = sys.stdin
else: else:
batchfd = io.open( batchfd = io.open(
compat_expanduser(opts.batchfile),
expand_path(opts.batchfile),
'r', encoding='utf-8', errors='ignore') 'r', encoding='utf-8', errors='ignore')
batch_urls = read_batch_urls(batchfd) batch_urls = read_batch_urls(batchfd)
if opts.verbose: if opts.verbose:
@ -133,6 +133,12 @@ def _real_main(argv=None):
parser.error('TV Provider account username missing\n') parser.error('TV Provider account username missing\n')
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
parser.error('using output template conflicts with using title, video ID or auto number') parser.error('using output template conflicts with using title, video ID or auto number')
if opts.autonumber_size is not None:
if opts.autonumber_size <= 0:
parser.error('auto number size must be positive')
if opts.autonumber_start is not None:
if opts.autonumber_start < 0:
parser.error('auto number start must be positive or 0')
if opts.usetitle and opts.useid: if opts.usetitle and opts.useid:
parser.error('using title conflicts with using video ID') parser.error('using title conflicts with using video ID')
if opts.username is not None and opts.password is None: if opts.username is not None and opts.password is None:
@ -190,7 +196,7 @@ def _real_main(argv=None):
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
raise ValueError('Playlist end must be greater than playlist start') raise ValueError('Playlist end must be greater than playlist start')
if opts.extractaudio: if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
parser.error('invalid audio format specified') parser.error('invalid audio format specified')
if opts.audioquality: if opts.audioquality:
opts.audioquality = opts.audioquality.strip('k').strip('K') opts.audioquality = opts.audioquality.strip('k').strip('K')
@ -200,7 +206,7 @@ def _real_main(argv=None):
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']: if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
parser.error('invalid video recode format specified') parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None: if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
parser.error('invalid subtitle format specified') parser.error('invalid subtitle format specified')
if opts.date is not None: if opts.date is not None:
@ -232,18 +238,15 @@ def _real_main(argv=None):
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
any_printing = opts.print_json any_printing = opts.print_json
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
# PostProcessors # PostProcessors
postprocessors = [] postprocessors = []
# Add the metadata pp first, the other pps will copy it
if opts.metafromtitle: if opts.metafromtitle:
postprocessors.append({ postprocessors.append({
'key': 'MetadataFromTitle', 'key': 'MetadataFromTitle',
'titleformat': opts.metafromtitle 'titleformat': opts.metafromtitle
}) })
if opts.addmetadata:
postprocessors.append({'key': 'FFmpegMetadata'})
if opts.extractaudio: if opts.extractaudio:
postprocessors.append({ postprocessors.append({
'key': 'FFmpegExtractAudio', 'key': 'FFmpegExtractAudio',
@ -256,6 +259,16 @@ def _real_main(argv=None):
'key': 'FFmpegVideoConvertor', 'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo, 'preferedformat': opts.recodevideo,
}) })
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
# FFmpegExtractAudioPP as containers before conversion may not support
# metadata (3gp, webm, etc.)
# And this post-processor should be placed before other metadata
# manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
# extra metadata. By default ffmpeg preserves metadata applicable for both
# source and target containers. From this point the container won't change,
# so metadata can be added here.
if opts.addmetadata:
postprocessors.append({'key': 'FFmpegMetadata'})
if opts.convertsubtitles: if opts.convertsubtitles:
postprocessors.append({ postprocessors.append({
'key': 'FFmpegSubtitlesConvertor', 'key': 'FFmpegSubtitlesConvertor',
@ -321,6 +334,7 @@ def _real_main(argv=None):
'listformats': opts.listformats, 'listformats': opts.listformats,
'outtmpl': outtmpl, 'outtmpl': outtmpl,
'autonumber_size': opts.autonumber_size, 'autonumber_size': opts.autonumber_size,
'autonumber_start': opts.autonumber_start,
'restrictfilenames': opts.restrictfilenames, 'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors, 'ignoreerrors': opts.ignoreerrors,
'force_generic_extractor': opts.force_generic_extractor, 'force_generic_extractor': opts.force_generic_extractor,
@ -329,6 +343,7 @@ def _real_main(argv=None):
'retries': opts.retries, 'retries': opts.retries,
'fragment_retries': opts.fragment_retries, 'fragment_retries': opts.fragment_retries,
'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'skip_unavailable_fragments': opts.skip_unavailable_fragments,
'keep_fragments': opts.keep_fragments,
'buffersize': opts.buffersize, 'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer, 'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl, 'continuedl': opts.continue_dl,
@ -337,6 +352,7 @@ def _real_main(argv=None):
'playliststart': opts.playliststart, 'playliststart': opts.playliststart,
'playlistend': opts.playlistend, 'playlistend': opts.playlistend,
'playlistreverse': opts.playlist_reverse, 'playlistreverse': opts.playlist_reverse,
'playlistrandom': opts.playlist_random,
'noplaylist': opts.noplaylist, 'noplaylist': opts.noplaylist,
'logtostderr': opts.outtmpl == '-', 'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle, 'consoletitle': opts.consoletitle,
@ -406,6 +422,11 @@ def _real_main(argv=None):
'cn_verification_proxy': opts.cn_verification_proxy, 'cn_verification_proxy': opts.cn_verification_proxy,
'geo_verification_proxy': opts.geo_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy,
'config_location': opts.config_location, 'config_location': opts.config_location,
'geo_bypass': opts.geo_bypass,
'geo_bypass_country': opts.geo_bypass_country,
# just for deprecation check
'autonumber': opts.autonumber if opts.autonumber is True else None,
'usetitle': opts.usetitle if opts.usetitle is True else None,
} }
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:
@ -429,7 +450,7 @@ def _real_main(argv=None):
try: try:
if opts.load_info_filename is not None: if opts.load_info_filename is not None:
retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
else: else:
retcode = ydl.download(all_urls) retcode = ydl.download(all_urls)
except MaxDownloadsReached: except MaxDownloadsReached:


+ 28
- 0
youtube_dl/aes.py View File

@ -60,6 +60,34 @@ def aes_cbc_decrypt(data, key, iv):
return decrypted_data return decrypted_data
def aes_cbc_encrypt(data, key, iv):
"""
Encrypt with aes in CBC mode. Using PKCS#7 padding
@param {int[]} data cleartext
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
@returns {int[]} encrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
encrypted_data = []
previous_cipher_block = iv
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
remaining_length = BLOCK_SIZE_BYTES - len(block)
block += [remaining_length] * remaining_length
mixed_block = xor(block, previous_cipher_block)
encrypted_block = aes_encrypt(mixed_block, expanded_key)
encrypted_data += encrypted_block
previous_cipher_block = encrypted_block
return encrypted_data
def key_expansion(data): def key_expansion(data):
""" """
Generate key schedule Generate key schedule


+ 6
- 3
youtube_dl/cache.py View File

@ -8,8 +8,11 @@ import re
import shutil import shutil
import traceback import traceback
from .compat import compat_expanduser, compat_getenv
from .utils import write_json_file
from .compat import compat_getenv
from .utils import (
expand_path,
write_json_file,
)
class Cache(object): class Cache(object):
@ -21,7 +24,7 @@ class Cache(object):
if res is None: if res is None:
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'youtube-dl') res = os.path.join(cache_root, 'youtube-dl')
return compat_expanduser(res)
return expand_path(res)
def _get_cache_fn(self, section, key, dtype): def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \


+ 86
- 12
youtube_dl/compat.py View File

@ -3,11 +3,14 @@ from __future__ import unicode_literals
import binascii import binascii
import collections import collections
import ctypes
import email import email
import getpass import getpass
import io import io
import itertools
import optparse import optparse
import os import os
import platform
import re import re
import shlex import shlex
import shutil import shutil
@ -15,7 +18,6 @@ import socket
import struct import struct
import subprocess import subprocess
import sys import sys
import itertools
import xml.etree.ElementTree import xml.etree.ElementTree
@ -2322,6 +2324,19 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
from HTMLParser import HTMLParser as compat_HTMLParser from HTMLParser import HTMLParser as compat_HTMLParser
try: # Python 2
from HTMLParser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python <3.4
try:
from html.parser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python >3.4
# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exceptiong handling
class compat_HTMLParseError(Exception):
pass
try: try:
from subprocess import DEVNULL from subprocess import DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL
@ -2529,6 +2544,24 @@ else:
el.text = el.text.decode('utf-8') el.text = el.text.decode('utf-8')
return doc return doc
if hasattr(etree, 'register_namespace'):
compat_etree_register_namespace = etree.register_namespace
else:
def compat_etree_register_namespace(prefix, uri):
"""Register a namespace prefix.
The registry is global, and any existing mapping for either the
given prefix or the namespace URI will be removed.
*prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
attributes in this namespace will be serialized with prefix if possible.
ValueError is raised if prefix is reserved or is invalid.
"""
if re.match(r"ns\d+$", prefix):
raise ValueError("Prefix format reserved for internal use")
for k, v in list(etree._namespace_map.items()):
if k == uri or v == prefix:
del etree._namespace_map[k]
etree._namespace_map[uri] = prefix
if sys.version_info < (2, 7): if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode, # Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . ! # .//node does not match if a node is a direct child of . !
@ -2586,14 +2619,22 @@ except ImportError: # Python 2
parsed_result[name] = [value] parsed_result[name] = [value]
return parsed_result return parsed_result
try:
from shlex import quote as compat_shlex_quote
except ImportError: # Python < 3.3
compat_os_name = os._name if os.name == 'java' else os.name
if compat_os_name == 'nt':
def compat_shlex_quote(s): def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s):
return s
else:
return "'" + s.replace("'", "'\"'\"'") + "'"
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
else:
try:
from shlex import quote as compat_shlex_quote
except ImportError: # Python < 3.3
def compat_shlex_quote(s):
if re.match(r'^[-_\w./]+$', s):
return s
else:
return "'" + s.replace("'", "'\"'\"'") + "'"
try: try:
@ -2618,9 +2659,6 @@ def compat_ord(c):
return ord(c) return ord(c)
compat_os_name = os._name if os.name == 'java' else os.name
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
compat_getenv = os.getenv compat_getenv = os.getenv
compat_expanduser = os.path.expanduser compat_expanduser = os.path.expanduser
@ -2674,7 +2712,7 @@ else:
userhome = pwent.pw_dir userhome = pwent.pw_dir
userhome = userhome.rstrip('/') userhome = userhome.rstrip('/')
return (userhome + path[i:]) or '/' return (userhome + path[i:]) or '/'
elif compat_os_name == 'nt' or compat_os_name == 'ce':
elif compat_os_name in ('nt', 'ce'):
def compat_expanduser(path): def compat_expanduser(path):
"""Expand ~ and ~user constructs. """Expand ~ and ~user constructs.
@ -2742,6 +2780,12 @@ else:
compat_kwargs = lambda kwargs: kwargs compat_kwargs = lambda kwargs: kwargs
try:
compat_numeric_types = (int, float, long, complex)
except NameError: # Python 3
compat_numeric_types = (int, float, complex)
if sys.version_info < (2, 7): if sys.version_info < (2, 7):
def compat_socket_create_connection(address, timeout, source_address=None): def compat_socket_create_connection(address, timeout, source_address=None):
host, port = address host, port = address
@ -2856,15 +2900,43 @@ else:
compat_struct_pack = struct.pack compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack compat_struct_unpack = struct.unpack
try:
from future_builtins import zip as compat_zip
except ImportError: # not 2.6+ or is 3.x
try:
from itertools import izip as compat_zip # < 2.5 or 3.x
except ImportError:
compat_zip = zip
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2].
# 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
# 2. https://github.com/rg3/youtube-dl/pull/4392
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
real = ctypes.WINFUNCTYPE(*args, **kwargs)
def resf(tpl, *args, **kwargs):
funcname, dll = tpl
return real((str(funcname), dll), *args, **kwargs)
return resf
else:
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)
__all__ = [ __all__ = [
'compat_HTMLParseError',
'compat_HTMLParser', 'compat_HTMLParser',
'compat_HTTPError', 'compat_HTTPError',
'compat_basestring', 'compat_basestring',
'compat_chr', 'compat_chr',
'compat_cookiejar', 'compat_cookiejar',
'compat_cookies', 'compat_cookies',
'compat_ctypes_WINFUNCTYPE',
'compat_etree_fromstring', 'compat_etree_fromstring',
'compat_etree_register_namespace',
'compat_expanduser', 'compat_expanduser',
'compat_get_terminal_size', 'compat_get_terminal_size',
'compat_getenv', 'compat_getenv',
@ -2876,6 +2948,7 @@ __all__ = [
'compat_input', 'compat_input',
'compat_itertools_count', 'compat_itertools_count',
'compat_kwargs', 'compat_kwargs',
'compat_numeric_types',
'compat_ord', 'compat_ord',
'compat_os_name', 'compat_os_name',
'compat_parse_qs', 'compat_parse_qs',
@ -2903,5 +2976,6 @@ __all__ = [
'compat_urlretrieve', 'compat_urlretrieve',
'compat_xml_parse_error', 'compat_xml_parse_error',
'compat_xpath', 'compat_xpath',
'compat_zip',
'workaround_optparse_bug9161', 'workaround_optparse_bug9161',
] ]

+ 3
- 0
youtube_dl/downloader/__init__.py View File

@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}):
if ed.can_download(info_dict): if ed.can_download(info_dict):
return ed return ed
if protocol.startswith('m3u8') and info_dict.get('is_live'):
return FFmpegFD
if protocol == 'm3u8' and params.get('hls_prefer_native') is True: if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
return HlsFD return HlsFD


+ 28
- 25
youtube_dl/downloader/common.py View File

@ -8,10 +8,11 @@ import random
from ..compat import compat_os_name from ..compat import compat_os_name
from ..utils import ( from ..utils import (
decodeArgument,
encodeFilename, encodeFilename,
error_to_compat_str, error_to_compat_str,
decodeArgument,
format_bytes, format_bytes,
shell_quote,
timeconvert, timeconvert,
) )
@ -187,6 +188,9 @@ class FileDownloader(object):
return filename[:-len('.part')] return filename[:-len('.part')]
return filename return filename
def ytdl_filename(self, filename):
return filename + '.ytdl'
def try_rename(self, old_filename, new_filename): def try_rename(self, old_filename, new_filename):
try: try:
if old_filename == new_filename: if old_filename == new_filename:
@ -300,11 +304,11 @@ class FileDownloader(object):
"""Report attempt to resume at given byte.""" """Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len) self.to_screen('[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx""" """Report retry in case of HTTP error 5xx"""
self.to_screen( self.to_screen(
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
% (count, self.format_retries(retries)))
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
% (error_to_compat_str(err), count, self.format_retries(retries)))
def report_file_already_downloaded(self, file_name): def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded.""" """Report file has already been fully downloaded."""
@ -327,27 +331,31 @@ class FileDownloader(object):
os.path.exists(encodeFilename(filename)) os.path.exists(encodeFilename(filename))
) )
continuedl_and_exists = (
self.params.get('continuedl', True) and
os.path.isfile(encodeFilename(filename)) and
not self.params.get('nopart', False)
)
# Check file already present
if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
if not hasattr(filename, 'write'):
continuedl_and_exists = (
self.params.get('continuedl', True) and
os.path.isfile(encodeFilename(filename)) and
not self.params.get('nopart', False)
)
# Check file already present
if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
min_sleep_interval = self.params.get('sleep_interval') min_sleep_interval = self.params.get('sleep_interval')
if min_sleep_interval: if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
self.to_screen(
'[download] Sleeping %s seconds...' % (
int(sleep_interval) if sleep_interval.is_integer()
else '%.2f' % sleep_interval))
time.sleep(sleep_interval) time.sleep(sleep_interval)
return self.real_download(filename, info_dict) return self.real_download(filename, info_dict)
@ -374,10 +382,5 @@ class FileDownloader(object):
if exe is None: if exe is None:
exe = os.path.basename(str_args[0]) exe = os.path.basename(str_args[0])
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen('[debug] %s command line: %s' % ( self.to_screen('[debug] %s command line: %s' % (
exe, shell_quote(str_args))) exe, shell_quote(str_args)))

+ 19
- 31
youtube_dl/downloader/dash.py View File

@ -1,13 +1,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error from ..compat import compat_urllib_error
from ..utils import (
sanitize_open,
encodeFilename,
)
from ..utils import urljoin
class DashSegmentsFD(FragmentFD): class DashSegmentsFD(FragmentFD):
@ -18,38 +13,39 @@ class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments' FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
fragment_base_url = info_dict.get('fragment_base_url')
fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments'] 'test', False) else info_dict['fragments']
ctx = { ctx = {
'filename': filename, 'filename': filename,
'total_frags': len(segments),
'total_frags': len(fragments),
} }
self._prepare_and_start_frag_download(ctx) self._prepare_and_start_frag_download(ctx)
segments_filenames = []
fragment_retries = self.params.get('fragment_retries', 0) fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
def process_segment(segment, tmp_filename, num):
segment_url = segment['url']
segment_name = 'Frag%d' % num
target_filename = '%s-%s' % (tmp_filename, segment_name)
frag_index = 0
for i, fragment in enumerate(fragments):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
# In DASH, the first segment contains necessary headers to # In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment # generate a valid MP4 file, so always abort for the first segment
fatal = num == 0 or not skip_unavailable_fragments
fatal = i == 0 or not skip_unavailable_fragments
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success = ctx['dl'].download(target_filename, {'url': segment_url})
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success: if not success:
return False return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
ctx['dest_stream'].write(down.read())
down.close()
segments_filenames.append(target_sanitized)
self._append_fragment(ctx, frag_content)
break break
except compat_urllib_error.HTTPError as err: except compat_urllib_error.HTTPError as err:
# YouTube may often return 404 HTTP error for a fragment causing the # YouTube may often return 404 HTTP error for a fragment causing the
@ -60,22 +56,14 @@ class DashSegmentsFD(FragmentFD):
# HTTP error. # HTTP error.
count += 1 count += 1
if count <= fragment_retries: if count <= fragment_retries:
self.report_retry_fragment(err, segment_name, count, fragment_retries)
self.report_retry_fragment(err, frag_index, count, fragment_retries)
if count > fragment_retries: if count > fragment_retries:
if not fatal: if not fatal:
self.report_skip_fragment(segment_name)
return True
self.report_skip_fragment(frag_index)
continue
self.report_error('giving up after %s fragment retries' % fragment_retries) self.report_error('giving up after %s fragment retries' % fragment_retries)
return False return False
return True
for i, segment in enumerate(segments):
if not process_segment(segment, ctx['tmpfilename'], i):
return False
self._finish_frag_download(ctx) self._finish_frag_download(ctx)
for segment_file in segments_filenames:
os.remove(encodeFilename(segment_file))
return True return True

+ 37
- 3
youtube_dl/downloader/external.py View File

@ -6,7 +6,10 @@ import sys
import re import re
from .common import FileDownloader from .common import FileDownloader
from ..compat import compat_setenv
from ..compat import (
compat_setenv,
compat_str,
)
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..utils import ( from ..utils import (
cli_option, cli_option,
@ -17,6 +20,7 @@ from ..utils import (
encodeArgument, encodeArgument,
handle_youtubedl_headers, handle_youtubedl_headers,
check_executable, check_executable,
is_outdated_version,
) )
@ -25,7 +29,17 @@ class ExternalFD(FileDownloader):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
retval = self._call_downloader(tmpfilename, info_dict)
try:
retval = self._call_downloader(tmpfilename, info_dict)
except KeyboardInterrupt:
if not info_dict.get('is_live'):
raise
# Live stream downloading cancellation should be considered as
# correct and expected termination thus all postprocessing
# should take place
retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename())
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
@ -198,6 +212,20 @@ class FFmpegFD(ExternalFD):
args = [ffpp.executable, '-y'] args = [ffpp.executable, '-y']
for log_level in ('quiet', 'verbose'):
if self.params.get(log_level, False):
args += ['-loglevel', log_level]
break
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server
# supports seeking(by adding the header `Range: bytes=0-`), which
# can cause problems in some cases
# https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
args += self._configuration_args() args += self._configuration_args()
# start_time = info_dict.get('start_time') or 0 # start_time = info_dict.get('start_time') or 0
@ -260,11 +288,17 @@ class FFmpegFD(ExternalFD):
args += ['-rtmp_live', 'live'] args += ['-rtmp_live', 'live']
args += ['-i', url, '-c', 'copy'] args += ['-i', url, '-c', 'copy']
if self.params.get('test', False):
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
if protocol in ('m3u8', 'm3u8_native'): if protocol in ('m3u8', 'm3u8_native'):
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
args += ['-f', 'mpegts'] args += ['-f', 'mpegts']
else: else:
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
args += ['-f', 'mp4']
if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
args += ['-bsf:a', 'aac_adtstoasc']
elif protocol == 'rtmp': elif protocol == 'rtmp':
args += ['-f', 'flv'] args += ['-f', 'flv']
else: else:


+ 27
- 31
youtube_dl/downloader/f4m.py View File

@ -3,7 +3,6 @@ from __future__ import division, unicode_literals
import base64 import base64
import io import io
import itertools import itertools
import os
import time import time
from .fragment import FragmentFD from .fragment import FragmentFD
@ -16,9 +15,7 @@ from ..compat import (
compat_struct_unpack, compat_struct_unpack,
) )
from ..utils import ( from ..utils import (
encodeFilename,
fix_xml_ampersands, fix_xml_ampersands,
sanitize_open,
xpath_text, xpath_text,
) )
@ -246,8 +243,17 @@ def remove_encrypted_media(media):
media)) media))
def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
def _add_ns(prop, ver=1):
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
def get_base_url(manifest):
base_url = xpath_text(
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
return base_url
class F4mFD(FragmentFD): class F4mFD(FragmentFD):
@ -333,13 +339,13 @@ class F4mFD(FragmentFD):
rate, media = list(filter( rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0] lambda f: int(f[0]) == requested_bitrate, formats))[0]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo')) bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
# From Adobe F4M 3.0 spec:
# The <baseURL> element SHALL be the base URL for all relative
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
# URLs should be relative to the location of the containing document.
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live'] live = boot_info['live']
metadata_node = media.find(_add_ns('metadata')) metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None: if metadata_node is not None:
@ -366,17 +372,21 @@ class F4mFD(FragmentFD):
dest_stream = ctx['dest_stream'] dest_stream = ctx['dest_stream']
write_flv_header(dest_stream)
if not live:
write_metadata_tag(dest_stream, metadata)
if ctx['complete_frags_downloaded_bytes'] == 0:
write_flv_header(dest_stream)
if not live:
write_metadata_tag(dest_stream, metadata)
base_url_parsed = compat_urllib_parse_urlparse(base_url) base_url_parsed = compat_urllib_parse_urlparse(base_url)
self._start_frag_download(ctx) self._start_frag_download(ctx)
frags_filenames = []
frag_index = 0
while fragments_list: while fragments_list:
seg_i, frag_i = fragments_list.pop(0) seg_i, frag_i = fragments_list.pop(0)
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
name = 'Seg%d-Frag%d' % (seg_i, frag_i) name = 'Seg%d-Frag%d' % (seg_i, frag_i)
query = [] query = []
if base_url_parsed.query: if base_url_parsed.query:
@ -386,17 +396,10 @@ class F4mFD(FragmentFD):
if info_dict.get('extra_param_to_segment_url'): if info_dict.get('extra_param_to_segment_url'):
query.append(info_dict['extra_param_to_segment_url']) query.append(info_dict['extra_param_to_segment_url'])
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
try: try:
success = ctx['dl'].download(frag_filename, {
'url': url_parsed.geturl(),
'http_headers': info_dict.get('http_headers'),
})
success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
if not success: if not success:
return False return False
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
down_data = down.read()
down.close()
reader = FlvReader(down_data) reader = FlvReader(down_data)
while True: while True:
try: try:
@ -411,12 +414,8 @@ class F4mFD(FragmentFD):
break break
raise raise
if box_type == b'mdat': if box_type == b'mdat':
dest_stream.write(box_data)
self._append_fragment(ctx, box_data)
break break
if live:
os.remove(encodeFilename(frag_sanitized))
else:
frags_filenames.append(frag_sanitized)
except (compat_urllib_error.HTTPError, ) as err: except (compat_urllib_error.HTTPError, ) as err:
if live and (err.code == 404 or err.code == 410): if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue # We didn't keep up with the live window. Continue
@ -436,7 +435,4 @@ class F4mFD(FragmentFD):
self._finish_frag_download(ctx) self._finish_frag_download(ctx)
for frag_file in frags_filenames:
os.remove(encodeFilename(frag_file))
return True return True

+ 124
- 15
youtube_dl/downloader/fragment.py View File

@ -2,6 +2,7 @@ from __future__ import division, unicode_literals
import os import os
import time import time
import json
from .common import FileDownloader from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
@ -28,15 +29,37 @@ class FragmentFD(FileDownloader):
and hlsnative only) and hlsnative only)
skip_unavailable_fragments: skip_unavailable_fragments:
Skip unavailable fragments (DASH and hlsnative only) Skip unavailable fragments (DASH and hlsnative only)
keep_fragments: Keep downloaded fragments on disk after downloading is
finished
For each incomplete fragment download youtube-dl keeps on disk a special
bookkeeping file with download state and metadata (in future such files will
be used for any incomplete download handled by youtube-dl). This file is
used to properly handle resuming, check download file consistency and detect
potential errors. The file has a .ytdl extension and represents a standard
JSON file of the following format:
extractor:
Dictionary of extractor related data. TBD.
downloader:
Dictionary of downloader related data. May contain following data:
current_fragment:
Dictionary with current (being downloaded) fragment data:
index: 0-based index of current fragment among all fragments
fragment_count:
Total count of fragments
This feature is experimental and file format may change in future.
""" """
def report_retry_fragment(self, err, fragment_name, count, retries):
def report_retry_fragment(self, err, frag_index, count, retries):
self.to_screen( self.to_screen(
'[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
% (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
'[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
def report_skip_fragment(self, fragment_name):
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
def report_skip_fragment(self, frag_index):
self.to_screen('[download] Skipping fragment %d...' % frag_index)
def _prepare_url(self, info_dict, url): def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers') headers = info_dict.get('http_headers')
@ -46,12 +69,64 @@ class FragmentFD(FileDownloader):
self._prepare_frag_download(ctx) self._prepare_frag_download(ctx)
self._start_frag_download(ctx) self._start_frag_download(ctx)
@staticmethod
def __do_ytdl_file(ctx):
return not ctx['live'] and not ctx['tmpfilename'] == '-'
def _read_ytdl_file(self, ctx):
stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
stream.close()
def _write_ytdl_file(self, ctx):
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
downloader = {
'current_fragment': {
'index': ctx['fragment_index'],
},
}
if ctx.get('fragment_count') is not None:
downloader['fragment_count'] = ctx['fragment_count']
frag_index_stream.write(json.dumps({'downloader': downloader}))
frag_index_stream.close()
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
success = ctx['dl'].download(fragment_filename, {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
})
if not success:
return False, None
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
down.close()
return True, frag_content
def _append_fragment(self, ctx, frag_content):
try:
ctx['dest_stream'].write(frag_content)
ctx['dest_stream'].flush()
finally:
if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False):
os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx): def _prepare_frag_download(self, ctx):
if 'live' not in ctx: if 'live' not in ctx:
ctx['live'] = False ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen( self.to_screen(
'[%s] Total fragments: %s'
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.report_destination(ctx['filename']) self.report_destination(ctx['filename'])
dl = HttpQuietDownloader( dl = HttpQuietDownloader(
self.ydl, self.ydl,
@ -61,15 +136,46 @@ class FragmentFD(FileDownloader):
'noprogress': True, 'noprogress': True,
'ratelimit': self.params.get('ratelimit'), 'ratelimit': self.params.get('ratelimit'),
'retries': self.params.get('retries', 0), 'retries': self.params.get('retries', 0),
'nopart': self.params.get('nopart', False),
'test': self.params.get('test', False), 'test': self.params.get('test', False),
} }
) )
tmpfilename = self.temp_name(ctx['filename']) tmpfilename = self.temp_name(ctx['filename'])
dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
open_mode = 'wb'
resume_len = 0
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
open_mode = 'ab'
resume_len = os.path.getsize(encodeFilename(tmpfilename))
# Should be initialized before ytdl file check
ctx.update({
'tmpfilename': tmpfilename,
'fragment_index': 0,
})
if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx)
if ctx['fragment_index'] > 0 and resume_len == 0:
self.report_warning(
'Inconsistent state of incomplete fragment download. '
'Restarting from the beginning...')
ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx)
else:
self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
ctx.update({ ctx.update({
'dl': dl, 'dl': dl,
'dest_stream': dest_stream, 'dest_stream': dest_stream,
'tmpfilename': tmpfilename, 'tmpfilename': tmpfilename,
# Total complete fragments downloaded so far in bytes
'complete_frags_downloaded_bytes': resume_len,
}) })
def _start_frag_download(self, ctx): def _start_frag_download(self, ctx):
@ -78,9 +184,9 @@ class FragmentFD(FileDownloader):
# hook # hook
state = { state = {
'status': 'downloading', 'status': 'downloading',
'downloaded_bytes': 0,
'frag_index': 0,
'frag_count': total_frags,
'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
'fragment_index': ctx['fragment_index'],
'fragment_count': total_frags,
'filename': ctx['filename'], 'filename': ctx['filename'],
'tmpfilename': ctx['tmpfilename'], 'tmpfilename': ctx['tmpfilename'],
} }
@ -88,8 +194,6 @@ class FragmentFD(FileDownloader):
start = time.time() start = time.time()
ctx.update({ ctx.update({
'started': start, 'started': start,
# Total complete fragments downloaded so far in bytes
'complete_frags_downloaded_bytes': 0,
# Amount of fragment's bytes downloaded by the time of the previous # Amount of fragment's bytes downloaded by the time of the previous
# frag progress hook invocation # frag progress hook invocation
'prev_frag_downloaded_bytes': 0, 'prev_frag_downloaded_bytes': 0,
@ -105,11 +209,12 @@ class FragmentFD(FileDownloader):
if not ctx['live']: if not ctx['live']:
estimated_size = ( estimated_size = (
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
(state['frag_index'] + 1) * total_frags)
(state['fragment_index'] + 1) * total_frags)
state['total_bytes_estimate'] = estimated_size state['total_bytes_estimate'] = estimated_size
if s['status'] == 'finished': if s['status'] == 'finished':
state['frag_index'] += 1
state['fragment_index'] += 1
ctx['fragment_index'] = state['fragment_index']
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
ctx['prev_frag_downloaded_bytes'] = 0 ctx['prev_frag_downloaded_bytes'] = 0
@ -131,6 +236,10 @@ class FragmentFD(FileDownloader):
def _finish_frag_download(self, ctx): def _finish_frag_download(self, ctx):
ctx['dest_stream'].close() ctx['dest_stream'].close()
if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
if os.path.isfile(ytdl_filename):
os.remove(ytdl_filename)
elapsed = time.time() - ctx['started'] elapsed = time.time() - ctx['started']
self.try_rename(ctx['tmpfilename'], ctx['filename']) self.try_rename(ctx['tmpfilename'], ctx['filename'])
fsize = os.path.getsize(encodeFilename(ctx['filename'])) fsize = os.path.getsize(encodeFilename(ctx['filename']))


+ 57
- 30
youtube_dl/downloader/hls.py View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os.path
import re import re
import binascii import binascii
try: try:
@ -18,8 +17,6 @@ from ..compat import (
compat_struct_pack, compat_struct_pack,
) )
from ..utils import ( from ..utils import (
encodeFilename,
sanitize_open,
parse_m3u8_attributes, parse_m3u8_attributes,
update_url_query, update_url_query,
) )
@ -34,7 +31,7 @@ class HlsFD(FragmentFD):
def can_download(manifest, info_dict): def can_download(manifest, info_dict):
UNSUPPORTED_FEATURES = ( UNSUPPORTED_FEATURES = (
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# Live streams heuristic does not always work (e.g. geo restricted to Germany # Live streams heuristic does not always work (e.g. geo restricted to Germany
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
@ -52,7 +49,9 @@ class HlsFD(FragmentFD):
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
) )
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
check_results.append(can_decrypt_frag or not is_aes128_enc)
check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
check_results.append(not info_dict.get('is_live')) check_results.append(not info_dict.get('is_live'))
return all(check_results) return all(check_results)
@ -60,9 +59,9 @@ class HlsFD(FragmentFD):
man_url = info_dict['url'] man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
s = manifest.decode('utf-8', 'ignore')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
s = urlh.read().decode('utf-8', 'ignore')
if not self.can_download(s, info_dict): if not self.can_download(s, info_dict):
if info_dict.get('extra_param_to_segment_url'): if info_dict.get('extra_param_to_segment_url'):
@ -76,15 +75,30 @@ class HlsFD(FragmentFD):
fd.add_progress_hook(ph) fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict) return fd.real_download(filename, info_dict)
total_frags = 0
def anvato_ad(s):
return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
media_frags = 0
ad_frags = 0
ad_frag_next = False
for line in s.splitlines(): for line in s.splitlines():
line = line.strip() line = line.strip()
if line and not line.startswith('#'):
total_frags += 1
if not line:
continue
if line.startswith('#'):
if anvato_ad(line):
ad_frags += 1
ad_frag_next = True
continue
if ad_frag_next:
ad_frag_next = False
continue
media_frags += 1
ctx = { ctx = {
'filename': filename, 'filename': filename,
'total_frags': total_frags,
'total_frags': media_frags,
'ad_frags': ad_frags,
} }
self._prepare_and_start_frag_download(ctx) self._prepare_and_start_frag_download(ctx)
@ -100,31 +114,35 @@ class HlsFD(FragmentFD):
i = 0 i = 0
media_sequence = 0 media_sequence = 0
decrypt_info = {'METHOD': 'NONE'} decrypt_info = {'METHOD': 'NONE'}
frags_filenames = []
byte_range = {}
frag_index = 0
ad_frag_next = False
for line in s.splitlines(): for line in s.splitlines():
line = line.strip() line = line.strip()
if line: if line:
if not line.startswith('#'): if not line.startswith('#'):
if ad_frag_next:
ad_frag_next = False
continue
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
frag_url = ( frag_url = (
line line
if re.match(r'^https?://', line) if re.match(r'^https?://', line)
else compat_urlparse.urljoin(man_url, line)) else compat_urlparse.urljoin(man_url, line))
frag_name = 'Frag%d' % i
frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
if extra_query: if extra_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_query)
count = 0 count = 0
headers = info_dict.get('http_headers', {})
if byte_range:
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success = ctx['dl'].download(frag_filename, {
'url': frag_url,
'http_headers': info_dict.get('http_headers'),
})
success, frag_content = self._download_fragment(
ctx, frag_url, info_dict, headers)
if not success: if not success:
return False return False
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
frag_content = down.read()
down.close()
break break
except compat_urllib_error.HTTPError as err: except compat_urllib_error.HTTPError as err:
# Unavailable (possibly temporary) fragments may be served. # Unavailable (possibly temporary) fragments may be served.
@ -133,28 +151,30 @@ class HlsFD(FragmentFD):
# https://github.com/rg3/youtube-dl/issues/10448). # https://github.com/rg3/youtube-dl/issues/10448).
count += 1 count += 1
if count <= fragment_retries: if count <= fragment_retries:
self.report_retry_fragment(err, frag_name, count, fragment_retries)
self.report_retry_fragment(err, frag_index, count, fragment_retries)
if count > fragment_retries: if count > fragment_retries:
if skip_unavailable_fragments: if skip_unavailable_fragments:
i += 1 i += 1
media_sequence += 1 media_sequence += 1
self.report_skip_fragment(frag_name)
self.report_skip_fragment(frag_index)
continue continue
self.report_error( self.report_error(
'giving up after %s fragment retries' % fragment_retries) 'giving up after %s fragment retries' % fragment_retries)
return False return False
if decrypt_info['METHOD'] == 'AES-128': if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
self._prepare_url(info_dict, decrypt_info['URI'])).read()
frag_content = AES.new( frag_content = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
ctx['dest_stream'].write(frag_content)
frags_filenames.append(frag_sanitized)
self._append_fragment(ctx, frag_content)
# We only download the first fragment during the test # We only download the first fragment during the test
if test: if test:
break break
i += 1 i += 1
media_sequence += 1 media_sequence += 1
elif line.startswith('#EXT-X-KEY'): elif line.startswith('#EXT-X-KEY'):
decrypt_url = decrypt_info.get('URI')
decrypt_info = parse_m3u8_attributes(line[11:]) decrypt_info = parse_m3u8_attributes(line[11:])
if decrypt_info['METHOD'] == 'AES-128': if decrypt_info['METHOD'] == 'AES-128':
if 'IV' in decrypt_info: if 'IV' in decrypt_info:
@ -164,13 +184,20 @@ class HlsFD(FragmentFD):
man_url, decrypt_info['URI']) man_url, decrypt_info['URI'])
if extra_query: if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
media_sequence = int(line[22:]) media_sequence = int(line[22:])
elif line.startswith('#EXT-X-BYTERANGE'):
splitted_byte_range = line[17:].split('@')
sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
byte_range = {
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
elif anvato_ad(line):
ad_frag_next = True
self._finish_frag_download(ctx) self._finish_frag_download(ctx)
for frag_file in frags_filenames:
os.remove(encodeFilename(frag_file))
return True return True

+ 195
- 151
youtube_dl/downloader/http.py View File

@ -22,8 +22,16 @@ from ..utils import (
class HttpFD(FileDownloader): class HttpFD(FileDownloader):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
url = info_dict['url'] url = info_dict['url']
tmpfilename = self.temp_name(filename)
stream = None
class DownloadContext(dict):
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
ctx = DownloadContext()
ctx.filename = filename
ctx.tmpfilename = self.temp_name(filename)
ctx.stream = None
# Do not include the Accept-Encoding header # Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'} headers = {'Youtubedl-no-compression': 'True'}
@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
if is_test: if is_test:
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1)) request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', True):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
ctx.open_mode = 'wb'
ctx.resume_len = 0
if self.params.get('continuedl', True):
# Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
count = 0 count = 0
retries = self.params.get('retries', 0) retries = self.params.get('retries', 0)
while count <= retries:
class SucceedDownload(Exception):
pass
class RetryDownload(Exception):
def __init__(self, source_error):
self.source_error = source_error
def establish_connection():
if ctx.resume_len != 0:
self.report_resuming_byte(ctx.resume_len)
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
ctx.open_mode = 'ab'
# Establish connection # Establish connection
try: try:
data = self.ydl.urlopen(request)
ctx.data = self.ydl.urlopen(request)
# When trying to resume, Content-Range HTTP header of response has to be checked # When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers # to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range # that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see # set in response despite of requested Range (see
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
if resume_len > 0:
content_range = data.headers.get('Content-Range')
if ctx.resume_len > 0:
content_range = ctx.data.headers.get('Content-Range')
if content_range: if content_range:
content_range_m = re.search(r'bytes (\d+)-', content_range) content_range_m = re.search(r'bytes (\d+)-', content_range)
# Content-Range is present and matches requested Range, resume is possible # Content-Range is present and matches requested Range, resume is possible
if content_range_m and resume_len == int(content_range_m.group(1)):
break
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
return
# Content-Range is either not present or invalid. Assuming remote webserver is # Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file # trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload # and performing entire redownload
self.report_unable_to_resume() self.report_unable_to_resume()
resume_len = 0
open_mode = 'wb'
break
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
except (compat_urllib_error.HTTPError, ) as err: except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416: if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error # Unexpected HTTP error
@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
# Unable to resume (requested range not satisfiable) # Unable to resume (requested range not satisfiable)
try: try:
# Open the connection again without the range header # Open the connection again without the range header
data = self.ydl.urlopen(basic_request)
content_length = data.info()['Content-Length']
ctx.data = self.ydl.urlopen(basic_request)
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err: except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600: if err.code < 500 or err.code >= 600:
raise raise
else: else:
# Examine the reported length # Examine the reported length
if (content_length is not None and if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
# The file had already been fully downloaded. # The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that # Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file, # YouTube sometimes adds or removes a few bytes from the end of the file,
@ -102,152 +115,183 @@ class HttpFD(FileDownloader):
# I decided to implement a suggested change and consider the file # I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from # completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive. # the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self.report_file_already_downloaded(ctx.filename)
self.try_rename(ctx.tmpfilename, ctx.filename)
self._hook_progress({ self._hook_progress({
'filename': filename,
'filename': ctx.filename,
'status': 'finished', 'status': 'finished',
'downloaded_bytes': resume_len,
'total_bytes': resume_len,
'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len,
}) })
return True
raise SucceedDownload()
else: else:
# The length does not match, we start the download over # The length does not match, we start the download over
self.report_unable_to_resume() self.report_unable_to_resume()
resume_len = 0
open_mode = 'wb'
break
except socket.error as e:
if e.errno != errno.ECONNRESET:
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
raise RetryDownload(err)
except socket.error as err:
if err.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry # Connection reset is no problem, just retry
raise raise
raise RetryDownload(err)
def download():
data_len = ctx.data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
data_len = self._TEST_FILE_SIZE
if data_len is not None:
data_len = int(data_len) + ctx.resume_len
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
if count > retries:
self.report_error('giving up after %s retries' % retries)
return False
data_len = data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
data_len = self._TEST_FILE_SIZE
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
byte_counter = 0 + ctx.resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
now = None # needed for slow_down() in the first loop run
before = start # start measuring
while True:
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
now = None # needed for slow_down() in the first loop run
before = start # start measuring
# Download and write
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
byte_counter += len(data_block)
def retry(e):
if ctx.tmpfilename != '-':
ctx.stream.close()
ctx.stream = None
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
raise RetryDownload(e)
# exit loop when download is finished
if len(data_block) == 0:
break
while True:
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have
# errno set
except socket.timeout as e:
retry(e)
except socket.error as e:
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
raise
retry(e)
byte_counter += len(data_block)
# exit loop when download is finished
if len(data_block) == 0:
break
# Open destination file just in time
if ctx.stream is None:
try:
ctx.stream, ctx.tmpfilename = sanitize_open(
ctx.tmpfilename, ctx.open_mode)
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
except (OSError, IOError) as err:
self.report_error('unable to open for writing: %s' % str(err))
return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
# Open destination file just in time
if stream is None:
try: try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error('unable to open for writing: %s' % str(err))
ctx.stream.write(data_block)
except (IOError, OSError) as err:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
return False return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
try:
stream.write(data_block)
except (IOError, OSError) as err:
# Apply rate limit
self.slow_down(start, now, byte_counter - ctx.resume_len)
# end measuring of one loop run
now = time.time()
after = now
# Adjust block size
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
before = after
# Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
if data_len is None:
eta = None
else:
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': ctx.tmpfilename,
'filename': ctx.filename,
'eta': eta,
'speed': speed,
'elapsed': now - start,
})
if is_test and byte_counter == data_len:
break
if ctx.stream is None:
self.to_stderr('\n') self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
self.report_error('Did not get any data blocks')
return False return False
if ctx.tmpfilename != '-':
ctx.stream.close()
# Apply rate limit
self.slow_down(start, now, byte_counter - resume_len)
if data_len is not None and byte_counter != data_len:
err = ContentTooShortError(byte_counter, int(data_len))
if count <= retries:
retry(err)
raise err
# end measuring of one loop run
now = time.time()
after = now
self.try_rename(ctx.tmpfilename, ctx.filename)
# Adjust block size
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
before = after
# Progress message
speed = self.calc_speed(start, now, byte_counter - resume_len)
if data_len is None:
eta = None
else:
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
self._hook_progress({ self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter, 'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'eta': eta,
'speed': speed,
'elapsed': now - start,
'total_bytes': byte_counter,
'filename': ctx.filename,
'status': 'finished',
'elapsed': time.time() - start,
}) })
if is_test and byte_counter == data_len:
break
if stream is None:
self.to_stderr('\n')
self.report_error('Did not get any data blocks')
return False
if tmpfilename != '-':
stream.close()
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
'elapsed': time.time() - start,
})
return True
return True
while count <= retries:
try:
establish_connection()
return download()
except RetryDownload as e:
count += 1
if count <= retries:
self.report_retry(e.source_error, count, retries)
continue
except SucceedDownload:
return True
self.report_error('giving up after %s retries' % retries)
return False

+ 10
- 24
youtube_dl/downloader/ism.py View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os
import time import time
import struct import struct
import binascii import binascii
@ -8,10 +7,6 @@ import io
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error from ..compat import compat_urllib_error
from ..utils import (
sanitize_open,
encodeFilename,
)
u8 = struct.Struct(b'>B') u8 = struct.Struct(b'>B')
@ -103,7 +98,7 @@ def write_piff_header(stream, params):
if is_audio: if is_audio:
smhd_payload = s88.pack(0) # balance smhd_payload = s88.pack(0) # balance
smhd_payload = u16.pack(0) # reserved
smhd_payload += u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
else: else:
vmhd_payload = u16.pack(0) # graphics mode vmhd_payload = u16.pack(0) # graphics mode
@ -131,7 +126,6 @@ def write_piff_header(stream, params):
if fourcc == 'AACL': if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload) sample_entry_box = box(b'mp4a', sample_entry_payload)
else: else:
sample_entry_payload = sample_entry_payload
sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined sample_entry_payload += u32.pack(0) * 3 # pre defined
@ -225,47 +219,39 @@ class IsmFD(FragmentFD):
self._prepare_and_start_frag_download(ctx) self._prepare_and_start_frag_download(ctx)
segments_filenames = []
fragment_retries = self.params.get('fragment_retries', 0) fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
track_written = False track_written = False
frag_index = 0
for i, segment in enumerate(segments): for i, segment in enumerate(segments):
segment_url = segment['url']
segment_name = 'Frag%d' % i
target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success = ctx['dl'].download(target_filename, {'url': segment_url})
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
if not success: if not success:
return False return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
down_data = down.read()
if not track_written: if not track_written:
tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
write_piff_header(ctx['dest_stream'], info_dict['_download_params']) write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
track_written = True track_written = True
ctx['dest_stream'].write(down_data)
down.close()
segments_filenames.append(target_sanitized)
self._append_fragment(ctx, frag_content)
break break
except compat_urllib_error.HTTPError as err: except compat_urllib_error.HTTPError as err:
count += 1 count += 1
if count <= fragment_retries: if count <= fragment_retries:
self.report_retry_fragment(err, segment_name, count, fragment_retries)
self.report_retry_fragment(err, frag_index, count, fragment_retries)
if count > fragment_retries: if count > fragment_retries:
if skip_unavailable_fragments: if skip_unavailable_fragments:
self.report_skip_fragment(segment_name)
self.report_skip_fragment(frag_index)
continue continue
self.report_error('giving up after %s fragment retries' % fragment_retries) self.report_error('giving up after %s fragment retries' % fragment_retries)
return False return False
self._finish_frag_download(ctx) self._finish_frag_download(ctx)
for segment_file in segments_filenames:
os.remove(encodeFilename(segment_file))
return True return True

+ 1
- 1
youtube_dl/downloader/rtmp.py View File

@ -169,7 +169,7 @@ class RtmpFD(FileDownloader):
self.report_error('[rtmpdump] Could not connect to RTMP server.') self.report_error('[rtmpdump] Could not connect to RTMP server.')
return False return False
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename)) prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize) self.to_screen('[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed time.sleep(5.0) # This seems to be needed


+ 41
- 9
youtube_dl/extractor/abc.py View File

@ -1,13 +1,19 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import hashlib
import hmac
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
js_to_json, js_to_json,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
try_get,
update_url_query,
) )
@ -99,21 +105,24 @@ class ABCIE(InfoExtractor):
class ABCIViewIE(InfoExtractor): class ABCIViewIE(InfoExtractor):
IE_NAME = 'abc.net.au:iview' IE_NAME = 'abc.net.au:iview'
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU']
# ABC iview programs are normally available for 14 days only. # ABC iview programs are normally available for 14 days only.
_TESTS = [{ _TESTS = [{
'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
'info_dict': { 'info_dict': {
'id': 'ZX9735A001S00',
'id': 'ZW0898A003S00',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Diaries Of A Broken Mind',
'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
'upload_date': '20161010',
'uploader_id': 'abc2',
'timestamp': 1476064920,
'title': 'Series 5 Ep 3',
'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
'upload_date': '20171228',
'uploader_id': 'abc1',
'timestamp': 1514499187,
},
'params': {
'skip_download': True,
}, },
'skip': 'Video gone',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -124,7 +133,30 @@ class ABCIViewIE(InfoExtractor):
title = video_params.get('title') or video_params['seriesTitle'] title = video_params.get('title') or video_params['seriesTitle']
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
house_number = video_params.get('episodeHouseNumber')
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
int(time.time()), house_number)
sig = hmac.new(
'android.content.res.Resources'.encode('utf-8'),
path.encode('utf-8'), hashlib.sha256).hexdigest()
token = self._download_webpage(
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
def tokenize_url(url, token):
return update_url_query(url, {
'hdnea': token,
})
for sd in ('sd', 'sd-low'):
sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str)
if not sd_url:
continue
formats = self._extract_m3u8_formats(
tokenize_url(sd_url, token), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
if formats:
break
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}


+ 15
- 5
youtube_dl/extractor/abcnews.py View File

@ -7,12 +7,21 @@ import time
from .amp import AMPIE from .amp import AMPIE
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
class AbcNewsVideoIE(AMPIE): class AbcNewsVideoIE(AMPIE):
IE_NAME = 'abcnews:video' IE_NAME = 'abcnews:video'
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_VALID_URL = r'''(?x)
https?://
abcnews\.go\.com/
(?:
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
video/embed\?.*?\bid=
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
@ -29,6 +38,9 @@ class AbcNewsVideoIE(AMPIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://abcnews.go.com/video/embed?id=46979033',
'only_matching': True,
}, { }, {
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
'only_matching': True, 'only_matching': True,
@ -97,9 +109,7 @@ class AbcNewsIE(InfoExtractor):
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
full_video_url = compat_urlparse.urljoin(url, video_url) full_video_url = compat_urlparse.urljoin(url, video_url)
youtube_url = self._html_search_regex(
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
webpage, 'YouTube URL', default=None)
youtube_url = YoutubeIE._extract_url(webpage)
timestamp = None timestamp = None
date_str = self._html_search_regex( date_str = self._html_search_regex(
@ -129,7 +139,7 @@ class AbcNewsIE(InfoExtractor):
} }
if youtube_url: if youtube_url:
entries = [entry, self.url_result(youtube_url, 'Youtube')]
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
return self.playlist_result(entries) return self.playlist_result(entries)
return entry return entry

+ 1
- 1
youtube_dl/extractor/abcotvs.py View File

@ -22,7 +22,7 @@ class ABCOTVSIE(InfoExtractor):
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
'ext': 'mp4', 'ext': 'mp4',
'title': 'East Bay museum celebrates vintage synthesizers', 'title': 'East Bay museum celebrates vintage synthesizers',
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421123075, 'timestamp': 1421123075,
'upload_date': '20150113', 'upload_date': '20150113',


+ 12
- 11
youtube_dl/extractor/acast.py View File

@ -8,7 +8,7 @@ from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601,
unified_timestamp,
OnDemandPagedList, OnDemandPagedList,
) )
@ -32,7 +32,7 @@ class ACastIE(InfoExtractor):
}, { }, {
# test with multiple blings # test with multiple blings
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'md5': '55c0097badd7095f494c99a172f86501',
'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
'info_dict': { 'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3', 'ext': 'mp3',
@ -40,23 +40,24 @@ class ACastIE(InfoExtractor):
'timestamp': 1477346700, 'timestamp': 1477346700,
'upload_date': '20161024', 'upload_date': '20161024',
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
'duration': 2797,
'duration': 2766,
} }
}] }]
def _real_extract(self, url): def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups() channel, display_id = re.match(self._VALID_URL, url).groups()
cast_data = self._download_json( cast_data = self._download_json(
'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
e = cast_data['result']['episode']
return { return {
'id': compat_str(cast_data['id']),
'id': compat_str(e['id']),
'display_id': display_id, 'display_id': display_id,
'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
'title': cast_data['name'],
'description': cast_data.get('description'),
'thumbnail': cast_data.get('image'),
'timestamp': parse_iso8601(cast_data.get('publishingDate')),
'duration': int_or_none(cast_data.get('duration')),
'url': e['mediaUrl'],
'title': e['name'],
'description': e.get('description'),
'thumbnail': e.get('image'),
'timestamp': unified_timestamp(e.get('publishingDate')),
'duration': int_or_none(e.get('duration')),
} }


+ 2
- 1
youtube_dl/extractor/addanime.py View File

@ -25,7 +25,8 @@ class AddAnimeIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'description': 'One Piece 606', 'description': 'One Piece 606',
'title': 'One Piece 606', 'title': 'One Piece 606',
}
},
'skip': 'Video is gone',
}, { }, {
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
'only_matching': True, 'only_matching': True,


+ 150
- 0
youtube_dl/extractor/adn.py View File

@ -0,0 +1,150 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import json
import os
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
from ..compat import compat_ord
from ..utils import (
bytes_to_intlist,
ExtractorError,
float_or_none,
intlist_to_bytes,
srt_subtitles_timecode,
strip_or_none,
urljoin,
)
class ADNIE(InfoExtractor):
IE_DESC = 'Anime Digital Network'
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'md5': 'e497370d847fd79d9d4c74be55575c7a',
'info_dict': {
'id': '7778',
'ext': 'mp4',
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
}
}
_BASE_URL = 'http://animedigitalnetwork.fr'
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
return None
enc_subtitles = self._download_webpage(
urljoin(self._BASE_URL, sub_path),
video_id, fatal=False, headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
})
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
None, fatal=False)
if not subtitles_json:
return None
subtitles = {}
for sub_lang, sub in subtitles_json.items():
srt = ''
for num, current in enumerate(sub):
start, end, text = (
float_or_none(current.get('startTime')),
float_or_none(current.get('endTime')),
current.get('text'))
if start is None or end is None or text is None:
continue
srt += os.linesep.join(
(
'%d' % num,
'%s --> %s' % (
srt_subtitles_timecode(start),
srt_subtitles_timecode(end)),
text,
os.linesep,
))
if sub_lang == 'vostf':
sub_lang = 'fr'
subtitles.setdefault(sub_lang, []).extend([{
'ext': 'json',
'data': json.dumps(sub),
}, {
'ext': 'srt',
'data': srt,
}])
return subtitles
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_config = self._parse_json(self._search_regex(
r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
video_info = {}
video_info_str = self._search_regex(
r'videoInfo\s*=\s*({.+});', webpage,
'video info', fatal=False)
if video_info_str:
video_info = self._parse_json(
video_info_str, video_id, fatal=False) or {}
options = player_config.get('options') or {}
metas = options.get('metas') or {}
title = metas.get('title') or video_info['title']
links = player_config.get('links') or {}
error = None
if not links:
links_url = player_config['linksurl']
links_data = self._download_json(urljoin(
self._BASE_URL, links_url), video_id)
links = links_data.get('links') or {}
error = links_data.get('error')
formats = []
for format_id, qualities in links.items():
if not isinstance(qualities, dict):
continue
for load_balancer_url in qualities.values():
load_balancer_data = self._download_json(
load_balancer_url, video_id, fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
continue
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False)
if format_id == 'vf':
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
if not error:
error = options.get('error')
if not formats and error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
'thumbnail': video_info.get('image'),
'formats': formats,
'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
'series': video_info.get('playlistTitle'),
}

+ 113
- 18
youtube_dl/extractor/adobepass.py View File

@ -6,12 +6,16 @@ import time
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import (
compat_kwargs,
compat_urlparse,
)
from ..utils import ( from ..utils import (
unescapeHTML, unescapeHTML,
urlencode_postdata, urlencode_postdata,
unified_timestamp, unified_timestamp,
ExtractorError, ExtractorError,
NO_DEFAULT,
) )
@ -21,6 +25,11 @@ MSO_INFO = {
'username_field': 'username', 'username_field': 'username',
'password_field': 'password', 'password_field': 'password',
}, },
'ATTOTT': {
'name': 'DIRECTV NOW',
'username_field': 'email',
'password_field': 'loginpassword',
},
'Rogers': { 'Rogers': {
'name': 'Rogers', 'name': 'Rogers',
'username_field': 'UserName', 'username_field': 'UserName',
@ -31,6 +40,26 @@ MSO_INFO = {
'username_field': 'user', 'username_field': 'user',
'password_field': 'passwd', 'password_field': 'passwd',
}, },
'TWC': {
'name': 'Time Warner Cable | Spectrum',
'username_field': 'Ecom_User_ID',
'password_field': 'Ecom_Password',
},
'Brighthouse': {
'name': 'Bright House Networks | Spectrum',
'username_field': 'j_username',
'password_field': 'j_password',
},
'Charter_Direct': {
'name': 'Charter Spectrum',
'username_field': 'IDToken1',
'password_field': 'IDToken2',
},
'Verizon': {
'name': 'Verizon FiOS',
'username_field': 'IDToken1',
'password_field': 'IDToken2',
},
'thr030': { 'thr030': {
'name': '3 Rivers Communications' 'name': '3 Rivers Communications'
}, },
@ -1293,6 +1322,15 @@ class AdobePassIE(InfoExtractor):
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MVPD_CACHE = 'ap-mvpd' _MVPD_CACHE = 'ap-mvpd'
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
def _download_webpage_handle(self, *args, **kwargs):
headers = kwargs.get('headers', {})
headers.update(self.geo_verification_headers())
kwargs['headers'] = headers
return super(AdobePassIE, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
@staticmethod @staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating): def _get_mvpd_resource(provider_id, title, guid, rating):
channel = etree.Element('channel') channel = etree.Element('channel')
@ -1335,6 +1373,21 @@ class AdobePassIE(InfoExtractor):
'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
def extract_redirect_url(html, url=None, fatal=False):
# TODO: eliminate code duplication with generic extractor and move
# redirection code into _download_webpage_handle
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
redirect_url = self._search_regex(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
html, 'meta refresh redirect',
default=NO_DEFAULT if fatal else None, fatal=fatal)
if not redirect_url:
return None
if url:
redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
return redirect_url
mvpd_headers = { mvpd_headers = {
'ap_42': 'anonymous', 'ap_42': 'anonymous',
'ap_11': 'Linux i686', 'ap_11': 'Linux i686',
@ -1374,42 +1427,82 @@ class AdobePassIE(InfoExtractor):
# Comcast page flow varies by video site and whether you # Comcast page flow varies by video site and whether you
# are on Comcast's network. # are on Comcast's network.
provider_redirect_page, urlh = provider_redirect_page_res provider_redirect_page, urlh = provider_redirect_page_res
# Check for Comcast auto login
if 'automatically signing you in' in provider_redirect_page: if 'automatically signing you in' in provider_redirect_page:
oauth_redirect_url = self._html_search_regex( oauth_redirect_url = self._html_search_regex(
r'window\.location\s*=\s*[\'"]([^\'"]+)', r'window\.location\s*=\s*[\'"]([^\'"]+)',
provider_redirect_page, 'oauth redirect') provider_redirect_page, 'oauth redirect')
# Just need to process the request. No useful data comes back
self._download_webpage( self._download_webpage(
oauth_redirect_url, video_id, 'Confirming auto login') oauth_redirect_url, video_id, 'Confirming auto login')
else: else:
if '<form name="signin"' in provider_redirect_page: if '<form name="signin"' in provider_redirect_page:
# already have the form, just fill it
provider_login_page_res = provider_redirect_page_res provider_login_page_res = provider_redirect_page_res
elif 'http-equiv="refresh"' in provider_redirect_page: elif 'http-equiv="refresh"' in provider_redirect_page:
# redirects to the login page
oauth_redirect_url = self._html_search_regex(
r'content="0;\s*url=([^\'"]+)',
provider_redirect_page, 'meta refresh redirect')
oauth_redirect_url = extract_redirect_url(
provider_redirect_page, fatal=True)
provider_login_page_res = self._download_webpage_handle( provider_login_page_res = self._download_webpage_handle(
oauth_redirect_url,
video_id, 'Downloading Provider Login Page')
oauth_redirect_url, video_id,
self._DOWNLOADING_LOGIN_PAGE)
else: else:
provider_login_page_res = post_form( provider_login_page_res = post_form(
provider_redirect_page_res, 'Downloading Provider Login Page')
provider_redirect_page_res,
self._DOWNLOADING_LOGIN_PAGE)
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
mso_info.get('username_field', 'username'): username,
mso_info.get('password_field', 'password'): password,
})
mvpd_confirm_page_res = post_form(
provider_login_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
mvpd_confirm_page, urlh = mvpd_confirm_page_res mvpd_confirm_page, urlh = mvpd_confirm_page_res
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page: if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
post_form(mvpd_confirm_page_res, 'Confirming Login') post_form(mvpd_confirm_page_res, 'Confirming Login')
elif mso_id == 'Verizon':
# In general, if you're connecting from a Verizon-assigned IP,
# you will not actually pass your credentials.
provider_redirect_page, urlh = provider_redirect_page_res
if 'Please wait ...' in provider_redirect_page:
saml_redirect_url = self._html_search_regex(
r'self\.parent\.location=(["\'])(?P<url>.+?)\1',
provider_redirect_page,
'SAML Redirect URL', group='url')
saml_login_page = self._download_webpage(
saml_redirect_url, video_id,
'Downloading SAML Login Page')
else:
saml_login_page_res = post_form(
provider_redirect_page_res, 'Logging in', {
mso_info['username_field']: username,
mso_info['password_field']: password,
})
saml_login_page, urlh = saml_login_page_res
if 'Please try again.' in saml_login_page:
raise ExtractorError(
'We\'re sorry, but either the User ID or Password entered is not correct.')
saml_login_url = self._search_regex(
r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P<url>.+?)\1',
saml_login_page, 'SAML Login URL', group='url')
saml_response_json = self._download_json(
saml_login_url, video_id, 'Downloading SAML Response',
headers={'Content-Type': 'text/xml'})
self._download_webpage(
saml_response_json['targetValue'], video_id,
'Confirming Login', data=urlencode_postdata({
'SAMLResponse': saml_response_json['SAMLResponse'],
'RelayState': saml_response_json['RelayState']
}), headers={
'Content-Type': 'application/x-www-form-urlencoded'
})
else: else:
# Normal, non-Comcast flow
# Some providers (e.g. DIRECTV NOW) have another meta refresh
# based redirect that should be followed.
provider_redirect_page, urlh = provider_redirect_page_res
provider_refresh_redirect_url = extract_redirect_url(
provider_redirect_page, url=urlh.geturl())
if provider_refresh_redirect_url:
provider_redirect_page_res = self._download_webpage_handle(
provider_refresh_redirect_url, video_id,
'Downloading Provider Redirect Page (meta refresh)')
provider_login_page_res = post_form( provider_login_page_res = post_form(
provider_redirect_page_res, 'Downloading Provider Login Page')
provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
mso_info.get('username_field', 'username'): username, mso_info.get('username_field', 'username'): username,
mso_info.get('password_field', 'password'): password, mso_info.get('password_field', 'password'): password,
@ -1448,6 +1541,8 @@ class AdobePassIE(InfoExtractor):
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1 count += 1
continue continue
if '<error' in authorize:
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token requestor_info[guid] = authz_token
self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)


+ 104
- 177
youtube_dl/extractor/adultswim.py View File

@ -5,91 +5,52 @@ import re
from .turner import TurnerBaseIE from .turner import TurnerBaseIE
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
strip_or_none,
) )
class AdultSwimIE(TurnerBaseIE): class AdultSwimIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://adultswim.com/videos/rick-and-morty/pilot', 'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
'playlist': [
{
'md5': '247572debc75c7652f253c8daa51a14d',
'info_dict': {
'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
'ext': 'flv',
'title': 'Rick and Morty - Pilot Part 1',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
},
{
'md5': '77b0e037a4b20ec6b98671c4c379f48d',
'info_dict': {
'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
'ext': 'flv',
'title': 'Rick and Morty - Pilot Part 4',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
},
],
'info_dict': { 'info_dict': {
'id': 'rQxZvXQ4ROaSOqq-or2Mow', 'id': 'rQxZvXQ4ROaSOqq-or2Mow',
'ext': 'mp4',
'title': 'Rick and Morty - Pilot', 'title': 'Rick and Morty - Pilot',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
'timestamp': 1493267400,
'upload_date': '20170427',
}, },
'skip': 'This video is only available for registered users',
}, {
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
'playlist': [
{
'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
'info_dict': {
'id': '-t8CamQlQ2aYZ49ItZCFog-0',
'ext': 'flv',
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}
],
'info_dict': {
'id': '-t8CamQlQ2aYZ49ItZCFog',
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
'params': {
# m3u8 download
'skip_download': True,
}, },
'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/', 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
'playlist': [
{
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
}
],
'info_dict': { 'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ', 'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
'upload_date': '20080124',
'timestamp': 1201150800,
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}
},
}, { }, {
# heroMetadata.trailer
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
'info_dict': { 'info_dict': {
'id': 'I0LQFQkaSUaFp8PnAWHhoQ', 'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Decker - Inside Decker: A New Hero', 'title': 'Decker - Inside Decker: A New Hero',
'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
'duration': 249.008,
'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
'timestamp': 1469480460,
'upload_date': '20160725',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -97,136 +58,102 @@ class AdultSwimIE(TurnerBaseIE):
}, },
'expected_warnings': ['Unable to download f4m manifest'], 'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
'url': 'http://www.adultswim.com/videos/attack-on-titan',
'info_dict': {
'id': 'b7A69dzfRzuaXIECdxW8XQ',
'title': 'Attack on Titan',
'description': 'md5:6c8e003ea0777b47013e894767f5e114',
},
'playlist_mincount': 12,
}, {
'url': 'http://www.adultswim.com/videos/streams/williams-stream',
'info_dict': { 'info_dict': {
'id': 'eYiLsKVgQ6qTC6agD67Sig',
'title': 'Toonami - Friday, October 14th, 2016',
'description': 'md5:99892c96ffc85e159a428de85c30acde',
'id': 'd8DEBj7QRfetLsRgFnGEyg',
'ext': 'mp4',
'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'description': 'original programming',
}, },
'playlist': [{
'md5': '',
'info_dict': {
'id': 'eYiLsKVgQ6qTC6agD67Sig',
'ext': 'mp4',
'title': 'Toonami - Friday, October 14th, 2016',
'description': 'md5:99892c96ffc85e159a428de85c30acde',
},
}],
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download f4m manifest'],
}] }]
@staticmethod
def find_video_info(collection, slug):
for video in collection.get('videos'):
if video.get('slug') == slug:
return video
@staticmethod
def find_collection_by_linkURL(collections, linkURL):
for collection in collections:
if collection.get('linkURL') == linkURL:
return collection
@staticmethod
def find_collection_containing_video(collections, slug):
for collection in collections:
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
return None, None
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_path = mobj.group('show_path')
episode_path = mobj.group('episode_path')
is_playlist = True if mobj.group('is_playlist') else False
webpage = self._download_webpage(url, episode_path)
# Extract the value of `bootstrappedData` from the Javascript in the page.
bootstrapped_data = self._parse_json(self._search_regex(
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
collections = bootstrapped_data['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
show_path, episode_path = re.match(self._VALID_URL, url).groups()
display_id = episode_path or show_path
webpage = self._download_webpage(url, display_id)
initial_data = self._parse_json(self._search_regex(
r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});',
webpage, 'initial data'), display_id)
is_stream = show_path == 'streams'
if is_stream:
if not episode_path:
episode_path = 'live-stream'
video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path)
video_id = video_data.get('stream')
if not video_id:
entries = []
for episode in video_data.get('archiveEpisodes', []):
episode_url = episode.get('url')
if not episode_url:
continue
entries.append(self.url_result(
episode_url, 'AdultSwim', episode.get('id')))
return self.playlist_result(
entries, video_data.get('id'), video_data.get('title'),
strip_or_none(video_data.get('description')))
else: else:
collections = bootstrapped_data['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
# Video wasn't found in the collections, let's try `slugged_video`.
if video_info is None:
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
video_info = bootstrapped_data['slugged_video']
if not video_info:
video_info = bootstrapped_data.get(
'heroMetadata', {}).get('trailer', {}).get('video')
if not video_info:
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
if not video_info:
raise ExtractorError('Unable to find video info')
show = bootstrapped_data['show']
show_title = show['title']
stream = video_info.get('stream')
if stream and stream.get('videoPlaybackID'):
segment_ids = [stream['videoPlaybackID']]
elif video_info.get('clips'):
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
elif video_info.get('videoPlaybackID'):
segment_ids = [video_info['videoPlaybackID']]
elif video_info.get('id'):
segment_ids = [video_info['id']]
else:
if video_info.get('auth') is True:
raise ExtractorError(
'This video is only available via cable service provider subscription that'
' is not currently supported. You may want to use --cookies.', expected=True)
else:
raise ExtractorError('Unable to find stream or clips')
episode_id = video_info['id']
episode_title = video_info['title']
episode_description = video_info.get('description')
episode_duration = int_or_none(video_info.get('duration'))
view_count = int_or_none(video_info.get('views'))
show_data = initial_data['show']
if not episode_path:
entries = []
for video in show_data.get('videos', []):
slug = video.get('slug')
if not slug:
continue
entries.append(self.url_result(
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
'AdultSwim', video.get('id')))
return self.playlist_result(
entries, show_data.get('id'), show_data.get('title'),
strip_or_none(show_data.get('metadata', {}).get('description')))
video_data = show_data['sluggedVideo']
video_id = video_data['id']
info = self._extract_cvp_info(
'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
video_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
},
}, {
'url': url,
'site_name': 'AdultSwim',
'auth_required': video_data.get('auth'),
})
entries = []
for part_num, segment_id in enumerate(segment_ids):
segement_info = self._extract_cvp_info(
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
segment_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
},
})
segment_title = '%s - %s' % (show_title, episode_title)
if len(segment_ids) > 1:
segment_title += ' Part %d' % (part_num + 1)
segement_info.update({
'id': segment_id,
'title': segment_title,
'description': episode_description,
info.update({
'id': video_id,
'display_id': display_id,
'description': info.get('description') or strip_or_none(video_data.get('description')),
})
if not is_stream:
info.update({
'duration': info.get('duration') or int_or_none(video_data.get('duration')),
'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
'episode': info['title'],
'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
}) })
entries.append(segement_info)
return {
'_type': 'playlist',
'id': episode_id,
'display_id': episode_path,
'entries': entries,
'title': '%s - %s' % (show_title, episode_title),
'description': episode_description,
'duration': episode_duration,
'view_count': view_count,
}
info['series'] = video_data.get('collection_title') or info.get('series')
if info['series'] and info['series'] != info['title']:
info['title'] = '%s - %s' % (info['series'], info['title'])
return info

+ 37
- 11
youtube_dl/extractor/aenetworks.py View File

@ -23,7 +23,19 @@ class AENetworksBaseIE(ThePlatformIE):
class AENetworksIE(AENetworksBaseIE): class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks' IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?P<domain>
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
fyi\.tv
)/
(?:
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
specials/(?P<special_display_id>[^/]+)/full-special
)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6', 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
@ -62,17 +74,24 @@ class AENetworksIE(AENetworksBaseIE):
}, { }, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True 'only_matching': True
}, {
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
'only_matching': True
}, {
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
'only_matching': True
}] }]
_DOMAIN_TO_REQUESTOR_ID = { _DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY', 'history.com': 'HISTORY',
'aetv.com': 'AETV', 'aetv.com': 'AETV',
'mylifetime.com': 'LIFETIME', 'mylifetime.com': 'LIFETIME',
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
'fyi.tv': 'FYI', 'fyi.tv': 'FYI',
} }
def _real_extract(self, url): def _real_extract(self, url):
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id or special_display_id
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
if show_path: if show_path:
url_parts = show_path.split('/') url_parts = show_path.split('/')
@ -82,18 +101,22 @@ class AENetworksIE(AENetworksBaseIE):
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
entries.append(self.url_result( entries.append(self.url_result(
compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeriesId', webpage),
self._html_search_meta('aetn:SeriesTitle', webpage))
elif url_parts_len == 2:
if entries:
return self.playlist_result(
entries, self._html_search_meta('aetn:SeriesId', webpage),
self._html_search_meta('aetn:SeriesTitle', webpage))
else:
# single season
url_parts_len = 2
if url_parts_len == 2:
entries = [] entries = []
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
episode_attributes = extract_attributes(episode_item) episode_attributes = extract_attributes(episode_item)
episode_url = compat_urlparse.urljoin( episode_url = compat_urlparse.urljoin(
url, episode_attributes['data-canonical']) url, episode_attributes['data-canonical'])
entries.append(self.url_result( entries.append(self.url_result(
episode_url, 'AENetworks', episode_url, 'AENetworks',
episode_attributes['data-videoid']))
episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
return self.playlist_result( return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage)) entries, self._html_search_meta('aetn:SeasonId', webpage))
@ -103,9 +126,12 @@ class AENetworksIE(AENetworksBaseIE):
} }
video_id = self._html_search_meta('aetn:VideoID', webpage) video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex( media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
[r"media_url\s*=\s*'(?P<url>[^']+)'",
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex( theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata) info = self._parse_theplatform_metadata(theplatform_metadata)
if theplatform_metadata.get('AETN$isBehindWall'): if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]


+ 175
- 38
youtube_dl/extractor/afreecatv.py View File

@ -4,20 +4,17 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_urlparse,
)
from ..compat import compat_xpath
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
update_url_query,
xpath_element,
xpath_text, xpath_text,
) )
class AfreecaTVIE(InfoExtractor): class AfreecaTVIE(InfoExtractor):
IE_NAME = 'afreecatv'
IE_DESC = 'afreecatv.com' IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
@ -42,7 +39,8 @@ class AfreecaTVIE(InfoExtractor):
'uploader': 'dailyapril', 'uploader': 'dailyapril',
'uploader_id': 'dailyapril', 'uploader_id': 'dailyapril',
'upload_date': '20160503', 'upload_date': '20160503',
}
},
'skip': 'Video is gone',
}, { }, {
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
'info_dict': { 'info_dict': {
@ -70,6 +68,93 @@ class AfreecaTVIE(InfoExtractor):
'upload_date': '20160502', 'upload_date': '20160502',
}, },
}], }],
'skip': 'Video is gone',
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': {
'id': '18650793',
'ext': 'mp4',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '윈아디',
'uploader_id': 'badkids',
'duration': 107,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
'info_dict': {
'id': '10481652',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'duration': 6492,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '20160502_c4c62b9d_174361386_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 3601,
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '20160502_39e739bb_174361386_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 2891,
},
}],
'params': {
'skip_download': True,
},
}, {
# non standard key
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
'info_dict': {
'id': '20170411_BE689A0E_190960999_1_2_h',
'ext': 'mp4',
'title': '혼자사는여자집',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': '♥이슬이',
'uploader_id': 'dasl8121',
'upload_date': '20170411',
'duration': 213,
},
'params': {
'skip_download': True,
},
}, {
# adult video
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
'info_dict': {
'id': '20171001_F1AE1711_196617479_1',
'ext': 'mp4',
'title': '[생]서아 초심 찾기 방송 (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'BJ서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20171001',
'duration': 3600,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True, 'only_matching': True,
@ -84,42 +169,93 @@ class AfreecaTVIE(InfoExtractor):
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key) m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
if m: if m:
video_key['upload_date'] = m.group('upload_date') video_key['upload_date'] = m.group('upload_date')
video_key['part'] = m.group('part')
video_key['part'] = int(m.group('part'))
return video_key return video_key
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
parsed_url = compat_urllib_parse_urlparse(url)
info_url = compat_urlparse.urlunparse(parsed_url._replace(
netloc='afbbs.afreecatv.com:8080',
path='/api/video/get_video_info.php'))
video_xml = self._download_xml( video_xml = self._download_xml(
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, query={
'nTitleNo': video_id,
'partialView': 'SKIP_ADULT',
})
if xpath_element(video_xml, './track/video/file') is None:
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
if flag and flag != 'SUCCEED':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, flag), expected=True)
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
if video_element is None or video_element.text is None:
raise ExtractorError('Specified AfreecaTV video does not exist', raise ExtractorError('Specified AfreecaTV video does not exist',
expected=True) expected=True)
title = xpath_text(video_xml, './track/title', 'title')
video_url = video_element.text.strip()
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
uploader = xpath_text(video_xml, './track/nickname', 'uploader') uploader = xpath_text(video_xml, './track/nickname', 'uploader')
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
duration = int_or_none(xpath_text(video_xml, './track/duration',
'duration'))
duration = int_or_none(xpath_text(
video_xml, './track/duration', 'duration'))
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
entries = []
for i, video_file in enumerate(video_xml.findall('./track/video/file')):
video_key = self.parse_video_key(video_file.get('key', ''))
if not video_key:
continue
entries.append({
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
'title': title,
'upload_date': video_key.get('upload_date'),
'duration': int_or_none(video_file.get('duration')),
'url': video_file.text,
common_entry = {
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}
info = common_entry.copy()
info.update({
'id': video_id,
'title': title,
'duration': duration,
})
if not video_url:
entries = []
file_elements = video_element.findall(compat_xpath('./file'))
one = len(file_elements) == 1
for file_num, file_element in enumerate(file_elements, start=1):
file_url = file_element.text
if not file_url:
continue
key = file_element.get('key', '')
upload_date = self._search_regex(
r'^(\d{8})_', key, 'upload date', default=None)
file_duration = int_or_none(file_element.get('duration'))
format_id = key if key else '%s_%s' % (video_id, file_num)
if determine_ext(file_url) == 'm3u8':
formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
else:
formats = [{
'url': file_url,
'format_id': 'http',
}]
if not formats:
continue
self._sort_formats(formats)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
'title': title if one else '%s (part %d)' % (title, file_num),
'upload_date': upload_date,
'duration': file_duration,
'formats': formats,
})
entries.append(file_info)
entries_info = info.copy()
entries_info.update({
'_type': 'multi_video',
'entries': entries,
}) })
return entries_info
info = { info = {
'id': video_id, 'id': video_id,
@ -130,16 +266,17 @@ class AfreecaTVIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }
if len(entries) > 1:
info['_type'] = 'multi_video'
info['entries'] = entries
elif len(entries) == 1:
info['url'] = entries[0]['url']
info['upload_date'] = entries[0].get('upload_date')
if determine_ext(video_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else: else:
raise ExtractorError(
'No files found for the specified AfreecaTV video, either'
' the URL is incorrect or the video has been made private.',
expected=True)
app, playpath = video_url.split('mp4:')
info.update({
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
})
return info return info

+ 10
- 18
youtube_dl/extractor/airmozilla.py View File

@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?' _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = { _TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
'md5': '8d02f53ee39cf006009180e21df1f3ba',
'info_dict': { 'info_dict': {
'id': '6x4q2w', 'id': '6x4q2w',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
'thumbnail': r're:https?://.*/poster\.jpg',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800, 'timestamp': 1422487800,
'upload_date': '20150128', 'upload_date': '20150128',
@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
metadata = self._parse_json(jwconfig, video_id)
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
jwconfig = self._parse_json(self._search_regex(
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)', r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)', r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False)) webpage, 'duration', fatal=False))
return {
info_dict.update({
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage), 'url': self._og_search_url(webpage),
'display_id': display_id, 'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'timestamp': timestamp, 'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None), 'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage), 'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
}
})
return info_dict

+ 53
- 0
youtube_dl/extractor/aliexpress.py View File

@ -0,0 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
try_get,
)
class AliExpressLiveIE(InfoExtractor):
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
_TEST = {
'url': 'https://live.aliexpress.com/live/2800002704436634',
'md5': 'e729e25d47c5e557f2630eaf99b740a5',
'info_dict': {
'id': '2800002704436634',
'ext': 'mp4',
'title': 'CASIMA7.22',
'thumbnail': r're:http://.*\.jpg',
'uploader': 'CASIMA Official Store',
'timestamp': 1500717600,
'upload_date': '20170722',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(
self._search_regex(
r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
webpage, 'runParams'),
video_id)
title = data['title']
formats = self._extract_m3u8_formats(
data['replyStreamUrl'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
return {
'id': video_id,
'title': title,
'thumbnail': data.get('coverUrl'),
'uploader': try_get(
data, lambda x: x['followBar']['name'], compat_str),
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
'formats': formats,
}

+ 6
- 3
youtube_dl/extractor/aljazeera.py View File

@ -4,9 +4,9 @@ from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor): class AlJazeeraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
_TEST = {
_TESTS = [{
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
'info_dict': { 'info_dict': {
'id': '3792260579001', 'id': '3792260579001',
@ -19,7 +19,10 @@ class AlJazeeraIE(InfoExtractor):
}, },
'add_ie': ['BrightcoveNew'], 'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server', 'skip': 'Not accessible from Travis CI server',
}
}, {
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):


+ 33
- 11
youtube_dl/extractor/allocine.py View File

@ -2,9 +2,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
remove_end,
int_or_none,
qualities, qualities,
remove_end,
try_get,
unified_timestamp,
url_basename, url_basename,
) )
@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor):
'title': 'Astérix - Le Domaine des Dieux Teaser VF', 'title': 'Astérix - Le Domaine des Dieux Teaser VF',
'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:http://.*\.jpg',
'duration': 39,
'timestamp': 1404273600,
'upload_date': '20140702',
'view_count': int,
}, },
}, { }, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor):
'title': 'Planes 2 Bande-annonce VF', 'title': 'Planes 2 Bande-annonce VF',
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:http://.*\.jpg',
'duration': 69,
'timestamp': 1385659800,
'upload_date': '20131128',
'view_count': int,
}, },
}, { }, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor):
'title': 'Dragons 2 - Bande annonce finale VF', 'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:http://.*\.jpg',
'duration': 144,
'timestamp': 1397589900,
'upload_date': '20140415',
'view_count': int,
}, },
}, { }, {
'url': 'http://www.allocine.fr/video/video-19550147/', 'url': 'http://www.allocine.fr/video/video-19550147/',
@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor):
r'data-model="([^"]+)"', webpage, 'data model', default=None) r'data-model="([^"]+)"', webpage, 'data model', default=None)
if model: if model:
model_data = self._parse_json(model, display_id) model_data = self._parse_json(model, display_id)
for video_url in model_data['sources'].values():
video = model_data['videos'][0]
title = video['title']
for video_url in video['sources'].values():
video_id, format_id = url_basename(video_url).split('_')[:2] video_id, format_id = url_basename(video_url).split('_')[:2]
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'quality': quality(format_id), 'quality': quality(format_id),
'url': video_url, 'url': video_url,
}) })
title = model_data['title']
duration = int_or_none(video.get('duration'))
view_count = int_or_none(video.get('view_count'))
timestamp = unified_timestamp(try_get(
video, lambda x: x['added_at']['date'], compat_str))
else: else:
video_id = display_id video_id = display_id
media_data = self._download_json( media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
title = remove_end(
self._html_search_regex(
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
' - AlloCiné')
for key, value in media_data['video'].items(): for key, value in media_data['video'].items():
if not key.endswith('Path'): if not key.endswith('Path'):
continue continue
format_id = key[:-len('Path')] format_id = key[:-len('Path')]
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
'quality': quality(format_id), 'quality': quality(format_id),
'url': value, 'url': value,
}) })
title = remove_end(self._html_search_regex(
r'(?s)<title>(.+?)</title>', webpage, 'title'
).strip(), ' - AlloCiné')
duration, view_count, timestamp = [None] * 3
self._sort_formats(formats) self._sort_formats(formats)
@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor):
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'formats': formats, 'formats': formats,
'description': self._og_search_description(webpage),
} }

+ 33
- 13
youtube_dl/extractor/amcnetworks.py View File

@ -3,14 +3,15 @@ from __future__ import unicode_literals
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
update_url_query,
parse_age_limit,
int_or_none, int_or_none,
parse_age_limit,
try_get,
update_url_query,
) )
class AMCNetworksIE(ThePlatformIE): class AMCNetworksIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '', 'md5': '',
@ -44,6 +45,12 @@ class AMCNetworksIE(ThePlatformIE):
}, { }, {
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention',
'only_matching': True,
}, {
'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -53,20 +60,31 @@ class AMCNetworksIE(ThePlatformIE):
'mbr': 'true', 'mbr': 'true',
'manifest': 'm3u', 'manifest': 'm3u',
} }
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
media_url = self._search_regex(
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
webpage, 'media url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex( theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
r'link\.theplatform\.com/s/([^?]+)',
media_url, 'theplatform_path'), display_id)
info = self._parse_theplatform_metadata(theplatform_metadata) info = self._parse_theplatform_metadata(theplatform_metadata)
video_id = theplatform_metadata['pid'] video_id = theplatform_metadata['pid']
title = theplatform_metadata['title'] title = theplatform_metadata['title']
rating = theplatform_metadata['ratings'][0]['rating']
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
rating = try_get(
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
auth_required = self._search_regex(
r'window\.authRequired\s*=\s*(true|false);',
webpage, 'auth required')
if auth_required == 'true': if auth_required == 'true':
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
requestor_id = self._search_regex(
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
webpage, 'requestor id')
resource = self._get_mvpd_resource(
requestor_id, title, video_id, rating)
query['auth'] = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
media_url = update_url_query(media_url, query) media_url = update_url_query(media_url, query)
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
formats, subtitles = self._extract_theplatform_smil(
media_url, video_id)
self._sort_formats(formats) self._sort_formats(formats)
info.update({ info.update({
'id': video_id, 'id': video_id,
@ -78,9 +96,11 @@ class AMCNetworksIE(ThePlatformIE):
if ns_keys: if ns_keys:
ns = list(ns_keys)[0] ns = list(ns_keys)[0]
series = theplatform_metadata.get(ns + '$show') series = theplatform_metadata.get(ns + '$show')
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
season_number = int_or_none(
theplatform_metadata.get(ns + '$season'))
episode = theplatform_metadata.get(ns + '$episodeTitle') episode = theplatform_metadata.get(ns + '$episodeTitle')
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
episode_number = int_or_none(
theplatform_metadata.get(ns + '$episode'))
if season_number: if season_number:
title = 'Season %d - %s' % (season_number, title) title = 'Season %d - %s' % (season_number, title)
if series: if series:


+ 85
- 0
youtube_dl/extractor/americastestkitchen.py View File

@ -0,0 +1,85 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
try_get,
unified_strdate,
)
class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
'info_dict': {
'id': '1_5g5zua6e',
'title': 'Summer Dinner Party',
'ext': 'mp4',
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1497285541,
'upload_date': '20170612',
'uploader_id': 'roger.metcalf@americastestkitchen.com',
'release_date': '20170617',
'series': "America's Test Kitchen",
'season_number': 17,
'episode': 'Summer Dinner Party',
'episode_number': 24,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id')
video_data = self._parse_json(
self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
webpage, 'initial context'),
video_id)
ep_data = try_get(
video_data,
(lambda x: x['episodeDetail']['content']['data'],
lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {})
external_id = ep_data.get('external_id') or ep_meta['external_id']
title = ep_data.get('title') or ep_meta.get('title')
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
'description') or ep_meta.get('description'))
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
release_date = unified_strdate(ep_data.get('aired_at'))
season_number = int_or_none(ep_meta.get('season_number'))
episode = ep_meta.get('title')
episode_number = int_or_none(ep_meta.get('episode_number'))
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, external_id),
'ie_key': 'Kaltura',
'title': title,
'description': description,
'thumbnail': thumbnail,
'release_date': release_date,
'series': "America's Test Kitchen",
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
}

+ 19
- 7
youtube_dl/extractor/amp.py View File

@ -7,15 +7,19 @@ from ..utils import (
parse_iso8601, parse_iso8601,
mimetype2ext, mimetype2ext,
determine_ext, determine_ext,
ExtractorError,
) )
class AMPIE(InfoExtractor): class AMPIE(InfoExtractor):
# parse Akamai Adaptive Media Player feed # parse Akamai Adaptive Media Player feed
def _extract_feed_info(self, url): def _extract_feed_info(self, url):
item = self._download_json(
feed = self._download_json(
url, None, 'Downloading Akamai AMP feed', url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed')['channel']['item']
'Unable to download Akamai AMP feed')
item = feed.get('channel', {}).get('item')
if not item:
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
video_id = item['guid'] video_id = item['guid']
@ -30,9 +34,12 @@ class AMPIE(InfoExtractor):
if isinstance(media_thumbnail, dict): if isinstance(media_thumbnail, dict):
media_thumbnail = [media_thumbnail] media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail: for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data['@attributes']
thumbnail = thumbnail_data.get('@attributes', {})
thumbnail_url = thumbnail.get('url')
if not thumbnail_url:
continue
thumbnails.append({ thumbnails.append({
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
'url': self._proto_relative_url(thumbnail_url, 'http:'),
'width': int_or_none(thumbnail.get('width')), 'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')), 'height': int_or_none(thumbnail.get('height')),
}) })
@ -43,9 +50,14 @@ class AMPIE(InfoExtractor):
if isinstance(media_subtitle, dict): if isinstance(media_subtitle, dict):
media_subtitle = [media_subtitle] media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle: for subtitle_data in media_subtitle:
subtitle = subtitle_data['@attributes']
lang = subtitle.get('lang') or 'en'
subtitles[lang] = [{'url': subtitle['href']}]
subtitle = subtitle_data.get('@attributes', {})
subtitle_href = subtitle.get('href')
if not subtitle_href:
continue
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
'url': subtitle_href,
'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
})
formats = [] formats = []
media_content = get_media_node('content') media_content = get_media_node('content')


+ 44
- 22
youtube_dl/extractor/animeondemand.py View File

@ -3,16 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_str,
)
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
extract_attributes, extract_attributes,
ExtractorError, ExtractorError,
sanitized_Request,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
@ -21,6 +18,8 @@ class AnimeOnDemandIE(InfoExtractor):
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in' _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
_NETRC_MACHINE = 'animeondemand' _NETRC_MACHINE = 'animeondemand'
# German-speaking countries of Europe
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
_TESTS = [{ _TESTS = [{
# jap, OmU # jap, OmU
'url': 'https://www.anime-on-demand.de/anime/161', 'url': 'https://www.anime-on-demand.de/anime/161',
@ -46,6 +45,10 @@ class AnimeOnDemandIE(InfoExtractor):
# Full length film, non-series, ger/jap, Dub/OmU, account required # Full length film, non-series, ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/185', 'url': 'https://www.anime-on-demand.de/anime/185',
'only_matching': True, 'only_matching': True,
}, {
# Flash videos
'url': 'https://www.anime-on-demand.de/anime/12',
'only_matching': True,
}] }]
def _login(self): def _login(self):
@ -72,19 +75,18 @@ class AnimeOnDemandIE(InfoExtractor):
'post url', default=self._LOGIN_URL, group='url') 'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'): if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = sanitized_Request(
post_url, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL)
post_url = urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage( response = self._download_webpage(
request, None, 'Logging in as %s' % username)
post_url, None, 'Logging in',
data=urlencode_postdata(login_form), headers={
'Referer': self._LOGIN_URL,
})
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')): if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
error = self._search_regex( error = self._search_regex(
r'<p class="alert alert-danger">(.+?)</p>',
response, 'error', default=None)
r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
response, 'error', default=None, group='error')
if error: if error:
raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
@ -120,10 +122,11 @@ class AnimeOnDemandIE(InfoExtractor):
formats = [] formats = []
for input_ in re.findall( for input_ in re.findall(
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
attributes = extract_attributes(input_) attributes = extract_attributes(input_)
title = attributes.get('data-dialog-header')
playlist_urls = [] playlist_urls = []
for playlist_key in ('data-playlist', 'data-otherplaylist'):
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
playlist_url = attributes.get(playlist_key) playlist_url = attributes.get(playlist_key)
if isinstance(playlist_url, compat_str) and re.match( if isinstance(playlist_url, compat_str) and re.match(
r'/?[\da-zA-Z]+', playlist_url): r'/?[\da-zA-Z]+', playlist_url):
@ -147,19 +150,38 @@ class AnimeOnDemandIE(InfoExtractor):
format_id_list.append(compat_str(num)) format_id_list.append(compat_str(num))
format_id = '-'.join(format_id_list) format_id = '-'.join(format_id_list)
format_note = ', '.join(filter(None, (kind, lang_note))) format_note = ', '.join(filter(None, (kind, lang_note)))
request = sanitized_Request(
compat_urlparse.urljoin(url, playlist_url),
item_id_list = []
if format_id:
item_id_list.append(format_id)
item_id_list.append('videomaterial')
playlist = self._download_json(
urljoin(url, playlist_url), video_id,
'Downloading %s JSON' % ' '.join(item_id_list),
headers={ headers={
'X-Requested-With': 'XMLHttpRequest', 'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token, 'X-CSRF-Token': csrf_token,
'Referer': url, 'Referer': url,
'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept': 'application/json, text/javascript, */*; q=0.01',
})
playlist = self._download_json(
request, video_id, 'Downloading %s playlist JSON' % format_id,
fatal=False)
}, fatal=False)
if not playlist: if not playlist:
continue continue
stream_url = playlist.get('streamurl')
if stream_url:
rtmp = re.search(
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
stream_url)
if rtmp:
formats.append({
'url': rtmp.group('url'),
'app': rtmp.group('app'),
'play_path': rtmp.group('playpath'),
'page_url': url,
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
'rtmp_real_time': True,
'format_id': 'rtmp',
'ext': 'flv',
})
continue
start_video = playlist.get('startvideo', 0) start_video = playlist.get('startvideo', 0)
playlist = playlist.get('playlist') playlist = playlist.get('playlist')
if not playlist or not isinstance(playlist, list): if not playlist or not isinstance(playlist, list):
@ -222,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor):
f.update({ f.update({
'id': '%s-%s' % (f['id'], m.group('kind').lower()), 'id': '%s-%s' % (f['id'], m.group('kind').lower()),
'title': m.group('title'), 'title': m.group('title'),
'url': compat_urlparse.urljoin(url, m.group('href')),
'url': urljoin(url, m.group('href')),
}) })
entries.append(f) entries.append(f)


+ 70
- 14
youtube_dl/extractor/anvato.py View File

@ -5,6 +5,7 @@ import base64
import hashlib import hashlib
import json import json
import random import random
import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
@ -16,6 +17,8 @@ from ..utils import (
intlist_to_bytes, intlist_to_bytes,
int_or_none, int_or_none,
strip_jsonp, strip_jsonp,
unescapeHTML,
unsmuggle_url,
) )
@ -26,6 +29,8 @@ def md5_text(s):
class AnvatoIE(InfoExtractor): class AnvatoIE(InfoExtractor):
_VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
# Copied from anvplayer.min.js # Copied from anvplayer.min.js
_ANVACK_TABLE = { _ANVACK_TABLE = {
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
@ -114,6 +119,22 @@ class AnvatoIE(InfoExtractor):
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ' 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
} }
_MCP_TO_ACCESS_KEY_TABLE = {
'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
}
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -177,17 +198,16 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None, 'tbr': tbr if tbr != 0 else None,
} }
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
# Not using _extract_m3u8_formats here as individual media
# playlists are also included in published_urls.
if tbr is None:
formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
continue
else:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
if media_format == 'm3u8' and tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
elif media_format == 'm3u8-variant' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
elif ext == 'mp3' or media_format == 'mp3': elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none' a_format['vcodec'] = 'none'
else: else:
@ -222,9 +242,45 @@ class AnvatoIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
} }
@staticmethod
def _extract_urls(ie, webpage, video_id):
entries = []
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
anvplayer_data = ie._parse_json(
mobj.group('anvp'), video_id, transform_source=unescapeHTML,
fatal=False)
if not anvplayer_data:
continue
video = anvplayer_data.get('video')
if not isinstance(video, compat_str) or not video.isdigit():
continue
access_key = anvplayer_data.get('accessKey')
if not access_key:
mcp = anvplayer_data.get('mcp')
if mcp:
access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
mcp.lower())
if not access_key:
continue
entries.append(ie.url_result(
'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
video_id=video))
return entries
def _extract_anvato_videos(self, webpage, video_id): def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(self._html_search_regex(
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
'Anvato player data'), video_id)
anvplayer_data = self._parse_json(
self._html_search_regex(
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
video_id)
return self._get_anvato_videos( return self._get_anvato_videos(
anvplayer_data['accessKey'], anvplayer_data['video']) anvplayer_data['accessKey'], anvplayer_data['video'])
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
return self._get_anvato_videos(access_key, video_id)

+ 30
- 19
youtube_dl/extractor/aparat.py View File

@ -3,13 +3,13 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
HEADRequest,
int_or_none,
mimetype2ext,
) )
class AparatIE(InfoExtractor): class AparatIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.aparat.com/v/wP8On', 'url': 'http://www.aparat.com/v/wP8On',
@ -29,30 +29,41 @@ class AparatIE(InfoExtractor):
# Note: There is an easier-to-parse configuration at # Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id # http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work # but the URL in there does not work
embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
webpage = self._download_webpage(embed_url, video_id)
file_list = self._parse_json(self._search_regex(
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
for i, item in enumerate(file_list[0]):
video_url = item['file']
req = HEADRequest(video_url)
res = self._request_webpage(
req, video_id, note='Testing video URL %d' % i, errnote=False)
if res:
break
else:
raise ExtractorError('No working video URLs found')
webpage = self._download_webpage(
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
video_id)
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
file_list = self._parse_json(
self._search_regex(
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
'file list'),
video_id)
formats = []
for item in file_list[0]:
file_url = item.get('file')
if not file_url:
continue
ext = mimetype2ext(item.get('type'))
label = item.get('label')
formats.append({
'url': file_url,
'ext': ext,
'format_id': label or ext,
'height': int_or_none(self._search_regex(
r'(\d+)[pP]', label or '', 'height', default=None)),
})
self._sort_formats(formats)
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'age_limit': self._family_friendly_search(webpage), 'age_limit': self._family_friendly_search(webpage),
'formats': formats,
} }

+ 2
- 2
youtube_dl/extractor/appleconnect.py View File

@ -12,13 +12,13 @@ class AppleConnectIE(InfoExtractor):
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)' _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
_TEST = { _TEST = {
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'md5': '10d0f2799111df4cb1c924520ca78f98',
'md5': 'e7c38568a01ea45402570e6029206723',
'info_dict': { 'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'ext': 'm4v', 'ext': 'm4v',
'title': 'Energy', 'title': 'Energy',
'uploader': 'Drake', 'uploader': 'Drake',
'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20150710', 'upload_date': '20150710',
'timestamp': 1436545535, 'timestamp': 1436545535,
}, },


+ 5
- 4
youtube_dl/extractor/appletrailers.py View File

@ -70,7 +70,8 @@ class AppleTrailersIE(InfoExtractor):
}, { }, {
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
'info_dict': { 'info_dict': {
'id': 'blackthorn',
'id': '4489',
'title': 'Blackthorn',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
'expected_warnings': ['Unable to download JSON metadata'], 'expected_warnings': ['Unable to download JSON metadata'],
@ -116,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
continue continue
formats.append({ formats.append({
'format_id': '%s-%s' % (version, size), 'format_id': '%s-%s' % (version, size),
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')), 'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')), 'height': int_or_none(size_data.get('height')),
'language': version[:2], 'language': version[:2],
@ -178,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
formats = [] formats = []
for format in settings['metadata']['sizes']: for format in settings['metadata']['sizes']:
# The src is a file pointing to the real video file # The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format': format['type'], 'format': format['type'],
@ -261,7 +262,7 @@ class AppleTrailersSectionIE(InfoExtractor):
'title': 'Most Popular', 'title': 'Most Popular',
'id': 'mostpopular', 'id': 'mostpopular',
}, },
'playlist_mincount': 80,
'playlist_mincount': 30,
}, { }, {
'url': 'http://trailers.apple.com/#section=moviestudios', 'url': 'http://trailers.apple.com/#section=moviestudios',
'info_dict': { 'info_dict': {


+ 4
- 4
youtube_dl/extractor/archiveorg.py View File

@ -1,13 +1,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .jwplatform import JWPlatformBaseIE
from .common import InfoExtractor
from ..utils import ( from ..utils import (
unified_strdate, unified_strdate,
clean_html, clean_html,
) )
class ArchiveOrgIE(JWPlatformBaseIE):
class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org' IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos' IE_DESC = 'archive.org videos'
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
@ -24,12 +24,12 @@ class ArchiveOrgIE(JWPlatformBaseIE):
} }
}, { }, {
'url': 'https://archive.org/details/Cops1922', 'url': 'https://archive.org/details/Cops1922',
'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
'md5': '0869000b4ce265e8ca62738b336b268a',
'info_dict': { 'info_dict': {
'id': 'Cops1922', 'id': 'Cops1922',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Buster Keaton\'s "Cops" (1922)', 'title': 'Buster Keaton\'s "Cops" (1922)',
'description': 'md5:b4544662605877edd99df22f9620d858',
'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
} }
}, { }, {
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',


+ 16
- 7
youtube_dl/extractor/ard.py View File

@ -5,6 +5,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .generic import GenericIE from .generic import GenericIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
@ -93,6 +94,7 @@ class ARDMediathekIE(InfoExtractor):
duration = int_or_none(media_info.get('_duration')) duration = int_or_none(media_info.get('_duration'))
thumbnail = media_info.get('_previewImage') thumbnail = media_info.get('_previewImage')
is_live = media_info.get('_isLive') is True
subtitles = {} subtitles = {}
subtitle_url = media_info.get('_subtitleUrl') subtitle_url = media_info.get('_subtitleUrl')
@ -106,6 +108,7 @@ class ARDMediathekIE(InfoExtractor):
'id': video_id, 'id': video_id,
'duration': duration, 'duration': duration,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'is_live': is_live,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
} }
@ -124,6 +127,8 @@ class ARDMediathekIE(InfoExtractor):
quality = stream.get('_quality') quality = stream.get('_quality')
server = stream.get('_server') server = stream.get('_server')
for stream_url in stream_urls: for stream_url in stream_urls:
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
continue
ext = determine_ext(stream_url) ext = determine_ext(stream_url)
if quality != 'auto' and ext in ('f4m', 'm3u8'): if quality != 'auto' and ext in ('f4m', 'm3u8'):
continue continue
@ -144,13 +149,11 @@ class ARDMediathekIE(InfoExtractor):
'play_path': stream_url, 'play_path': stream_url,
'format_id': 'a%s-rtmp-%s' % (num, quality), 'format_id': 'a%s-rtmp-%s' % (num, quality),
} }
elif stream_url.startswith('http'):
else:
f = { f = {
'url': stream_url, 'url': stream_url,
'format_id': 'a%s-%s-%s' % (num, ext, quality) 'format_id': 'a%s-%s-%s' % (num, ext, quality)
} }
else:
continue
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url) m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
if m: if m:
f.update({ f.update({
@ -166,9 +169,11 @@ class ARDMediathekIE(InfoExtractor):
# determine video id from url # determine video id from url
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)
document_id = None
numid = re.search(r'documentId=([0-9]+)', url) numid = re.search(r'documentId=([0-9]+)', url)
if numid: if numid:
video_id = numid.group(1)
document_id = video_id = numid.group(1)
else: else:
video_id = m.group('video_id') video_id = m.group('video_id')
@ -191,7 +196,7 @@ class ARDMediathekIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms.title" content="(.*?)"/>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>'], r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title') webpage, 'title')
description = self._html_search_meta( description = self._html_search_meta(
@ -228,12 +233,16 @@ class ARDMediathekIE(InfoExtractor):
'formats': formats, 'formats': formats,
} }
else: # request JSON file else: # request JSON file
if not document_id:
video_id = self._search_regex(
r'/play/(?:config|media)/(\d+)', webpage, 'media id')
info = self._extract_media_info( info = self._extract_media_info(
'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
'http://www.ardmediathek.de/play/media/%s' % video_id,
webpage, video_id)
info.update({ info.update({
'id': video_id, 'id': video_id,
'title': title,
'title': self._live_title(title) if info.get('is_live') else title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
}) })


+ 1
- 2
youtube_dl/extractor/arkena.py View File

@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor):
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None)) exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
if kind == 'm3u8' or 'm3u8' in exts: if kind == 'm3u8' or 'm3u8' in exts:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
f_url, video_id, 'mp4',
entry_protocol='m3u8' if is_live else 'm3u8_native',
f_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=kind, fatal=False, live=is_live)) m3u8_id=kind, fatal=False, live=is_live))
elif kind == 'flash' or 'f4m' in exts: elif kind == 'flash' or 'f4m' in exts:
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(


+ 19
- 3
youtube_dl/extractor/arte.py View File

@ -6,15 +6,18 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError,
find_xpath_attr, find_xpath_attr,
unified_strdate,
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
NO_DEFAULT, NO_DEFAULT,
qualities, qualities,
try_get,
unified_strdate,
) )
# There are different sources of video in arte.tv, the extraction process # There are different sources of video in arte.tv, the extraction process
@ -79,6 +82,16 @@ class ArteTVBaseIE(InfoExtractor):
info = self._download_json(json_url, video_id) info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer'] player_info = info['videoJsonPlayer']
vsr = try_get(player_info, lambda x: x['VSR'], dict)
if not vsr:
error = None
if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
error = try_get(
player_info, lambda x: x['custom_msg']['msg'], compat_str)
if not error:
error = 'Video %s is not available' % player_info.get('VID') or video_id
raise ExtractorError(error, expected=True)
upload_date_str = player_info.get('shootingDate') upload_date_str = player_info.get('shootingDate')
if not upload_date_str: if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
@ -107,7 +120,7 @@ class ArteTVBaseIE(InfoExtractor):
langcode = LANGS.get(lang, lang) langcode = LANGS.get(lang, lang)
formats = [] formats = []
for format_id, format_dict in player_info['VSR'].items():
for format_id, format_dict in vsr.items():
f = dict(format_dict) f = dict(format_dict)
versionCode = f.get('versionCode') versionCode = f.get('versionCode')
l = re.escape(langcode) l = re.escape(langcode)
@ -180,7 +193,7 @@ class ArteTVBaseIE(InfoExtractor):
class ArteTVPlus7IE(ArteTVBaseIE): class ArteTVPlus7IE(ArteTVBaseIE):
IE_NAME = 'arte.tv:+7' IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?P<lang>fr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
@ -188,6 +201,9 @@ class ArteTVPlus7IE(ArteTVBaseIE):
}, { }, {
'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn',
'only_matching': True,
}] }]
@classmethod @classmethod


+ 93
- 0
youtube_dl/extractor/asiancrush.py View File

@ -0,0 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
extract_attributes,
remove_end,
urlencode_postdata,
)
class AsianCrushIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
_TESTS = [{
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
'info_dict': {
'id': '1_y4tmjm5r',
'ext': 'mp4',
'title': 'Women Who Flirt',
'description': 'md5:3db14e9186197857e7063522cb89a805',
'timestamp': 1496936429,
'upload_date': '20170608',
'uploader_id': 'craig@crifkin.com',
},
}, {
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
data=urlencode_postdata({
'postid': video_id,
'action': 'get_channel_kaltura_vars',
}))
entry_id = data['entry_id']
return self.url_result(
'kaltura:%s:%s' % (data['partner_id'], entry_id),
ie=KalturaIE.ie_key(), video_id=entry_id,
video_title=data.get('vid_label'))
class AsianCrushPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
_TEST = {
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
'info_dict': {
'id': '12481',
'title': 'Scholar Who Walks the Night',
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
},
'playlist_count': 20,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = []
for mobj in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
webpage):
attrs = extract_attributes(mobj.group(0))
if attrs.get('class') == 'clearfix':
entries.append(self.url_result(
mobj.group('url'), ie=AsianCrushIE.ie_key()))
title = remove_end(
self._html_search_regex(
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
default=None) or self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
' | AsianCrush')
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage, 'description', fatal=False)
return self.playlist_result(entries, playlist_id, title, description)

+ 13
- 8
youtube_dl/extractor/atresplayer.py View File

@ -36,7 +36,7 @@ class AtresPlayerIE(InfoExtractor):
}, },
{ {
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
'md5': '0d0e918533bbd4b263f2de4d197d4aac',
'md5': '6e52cbb513c405e403dbacb7aacf8747',
'info_dict': { 'info_dict': {
'id': 'capitulo-112-david-bustamante', 'id': 'capitulo-112-david-bustamante',
'ext': 'flv', 'ext': 'flv',
@ -87,10 +87,11 @@ class AtresPlayerIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form)) self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage( response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
error = self._html_search_regex( error = self._html_search_regex(
r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None)
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
response, 'error', default=None)
if error: if error:
raise ExtractorError( raise ExtractorError(
'Unable to login: %s' % error, expected=True) 'Unable to login: %s' % error, expected=True)
@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor):
if format_id == 'token' or not video_url.startswith('http'): if format_id == 'token' or not video_url.startswith('http'):
continue continue
if 'geodeswowsmpra3player' in video_url: if 'geodeswowsmpra3player' in video_url:
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# this videos are protected by DRM, the f4m downloader doesn't support them # this videos are protected by DRM, the f4m downloader doesn't support them
continue continue
else:
f4m_url = video_url[:-9] + '/manifest.f4m'
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
video_url_hd = video_url.replace('free_es', 'es')
formats.extend(self._extract_f4m_formats(
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
fatal=False))
formats.extend(self._extract_mpd_formats(
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
path_data = player.get('pathData') path_data = player.get('pathData')


+ 73
- 0
youtube_dl/extractor/atvat.py View File

@ -0,0 +1,73 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
unescapeHTML,
)
class ATVAtIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
_TESTS = [{
'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
'md5': 'c3b6b975fb3150fc628572939df205f2',
'info_dict': {
'id': '1698447',
'ext': 'mp4',
'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
}
}, {
'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(unescapeHTML(self._search_regex(
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
webpage, 'player data')), display_id)['config']['initial_video']
video_id = video_data['id']
video_title = video_data['title']
parts = []
for part in video_data.get('parts', []):
part_id = part['id']
part_title = part['title']
formats = []
for source in part.get('sources', []):
source_url = source.get('src')
if not source_url:
continue
ext = determine_ext(source_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, part_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': source.get('delivery'),
'url': source_url,
})
self._sort_formats(formats)
parts.append({
'id': part_id,
'title': part_title,
'thumbnail': part.get('preview_image_url'),
'duration': int_or_none(part.get('duration')),
'is_live': part.get('is_livestream'),
'formats': formats,
})
return {
'_type': 'multi_video',
'id': video_id,
'title': video_title,
'entries': parts,
}

+ 2
- 2
youtube_dl/extractor/audioboom.py View File

@ -16,7 +16,7 @@ class AudioBoomIE(InfoExtractor):
'title': '3/09/2016 Czaban Hour 3', 'title': '3/09/2016 Czaban Hour 3',
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans', 'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
'duration': 2245.72, 'duration': 2245.72,
'uploader': 'Steve Czaban',
'uploader': 'SB Nation A.M.',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
} }
}, { }, {
@ -43,7 +43,7 @@ class AudioBoomIE(InfoExtractor):
def from_clip(field): def from_clip(field):
if clip: if clip:
clip.get(field)
return clip.get(field)
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
'audio', webpage, 'audio url') 'audio', webpage, 'audio url')


+ 78
- 0
youtube_dl/extractor/aws.py View File

@ -0,0 +1,78 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import hashlib
import hmac
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
class AWSIE(InfoExtractor):
_AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
_AWS_REGION = 'us-east-1'
def _aws_execute_api(self, aws_dict, video_id, query=None):
query = query or {}
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
date = amz_date[:8]
headers = {
'Accept': 'application/json',
'Host': self._AWS_PROXY_HOST,
'X-Amz-Date': amz_date,
'X-Api-Key': self._AWS_API_KEY
}
session_token = aws_dict.get('session_token')
if session_token:
headers['X-Amz-Security-Token'] = session_token
def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest()
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
canonical_querystring = compat_urllib_parse_urlencode(query)
canonical_headers = ''
for header_name, header_value in sorted(headers.items()):
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
canonical_request = '\n'.join([
'GET',
aws_dict['uri'],
canonical_querystring,
canonical_headers,
signed_headers,
aws_hash('')
])
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
credential_scope = '/'.join(credential_scope_list)
string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
def aws_hmac(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
def aws_hmac_digest(key, msg):
return aws_hmac(key, msg).digest()
def aws_hmac_hexdigest(key, msg):
return aws_hmac(key, msg).hexdigest()
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
for value in credential_scope_list:
k_signing = aws_hmac_digest(k_signing, value)
signature = aws_hmac_hexdigest(k_signing, string_to_sign)
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
headers['Authorization'] = ', '.join([
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
'SignedHeaders=%s' % signed_headers,
'Signature=%s' % signature,
])
return self._download_json(
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
video_id, headers=headers)

+ 213
- 0
youtube_dl/extractor/azmedien.py View File

@ -0,0 +1,213 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
from ..utils import (
get_element_by_class,
get_element_by_id,
strip_or_none,
urljoin,
)
class AZMedienBaseIE(InfoExtractor):
def _kaltura_video(self, partner_id, entry_id):
return self.url_result(
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
video_id=entry_id)
class AZMedienIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien videos'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
[0-9]+-show-[^/\#]+
(?:
/[0-9]+-episode-[^/\#]+
(?:
/[0-9]+-segment-(?:[^/\#]+\#)?|
\#
)|
\#
)
(?P<id>[^\#]+)
'''
_TESTS = [{
# URL with 'segment'
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'info_dict': {
'id': '1_2444peh4',
'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
'uploader_id': 'TeleZ?ri',
'upload_date': '20161218',
'timestamp': 1482084490,
},
'params': {
'skip_download': True,
},
}, {
# URL with 'segment' and fragment:
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
'only_matching': True
}, {
# URL with 'episode' and fragment:
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
'only_matching': True
}, {
# URL with 'show' and fragment:
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
webpage, 'kaltura partner id')
entry_id = self._html_search_regex(
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
return self._kaltura_video(partner_id, entry_id)
class AZMedienPlaylistIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien playlists'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?P<id>[0-9]+-
(?:
show|
topic|
themen
)-[^/\#]+
(?:
/[0-9]+-episode-[^/\#]+
)?
)$
'''
_TESTS = [{
# URL with 'episode'
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
'info_dict': {
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
'title': 'News - Donnerstag, 15. Dezember 2016',
},
'playlist_count': 9,
}, {
# URL with 'themen'
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
'info_dict': {
'id': '258-themen-tele-m1-classics',
'title': 'Tele M1 Classics',
},
'playlist_mincount': 15,
}, {
# URL with 'topic', contains nested playlists
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
'only_matching': True,
}, {
# URL with 'show' only
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
'only_matching': True
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
entries = []
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id', default=None)
if partner_id:
entries = [
self._kaltura_video(partner_id, m.group('id'))
for m in re.finditer(
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
if not entries:
entries = [
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
for m in re.finditer(
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
if not entries:
entries = [
# May contain nested playlists (e.g. [1]) thus no explicit
# ie_key
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
self.url_result(urljoin(url, m.group('url')))
for m in re.finditer(
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
title = self._search_regex(
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
webpage, 'title',
default=strip_or_none(get_element_by_id(
'video-title', webpage)), group='title')
return self.playlist_result(entries, show_id, title)
class AZMedienShowPlaylistIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien show playlists'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?:
all-episodes|
alle-episoden
)/
(?P<id>[^/?#&]+)
'''
_TEST = {
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
'info_dict': {
'id': 'astrotalk',
'title': 'TeleZüri: AstroTalk - alle episoden',
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
},
'playlist_mincount': 13,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
episodes = get_element_by_class('search-mobile-box', webpage)
entries = [self.url_result(
urljoin(url, m.group('url'))) for m in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
title = self._og_search_title(webpage, fatal=False)
description = self._og_search_description(webpage)
return self.playlist_result(entries, playlist_id, title, description)

+ 0
- 140
youtube_dl/extractor/azubu.py View File

@ -1,140 +0,0 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
)
class AzubuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
'info_dict': {
'id': '15575',
'ext': 'mp4',
'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
'thumbnail': r're:^https?://.*\.jpe?g',
'timestamp': 1417523507.334,
'upload_date': '20141202',
'duration': 9988.7,
'uploader': 'GSL',
'uploader_id': 414310,
'view_count': int,
},
},
{
'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
'info_dict': {
'id': '9344',
'ext': 'mp4',
'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
'thumbnail': r're:^https?://.*\.jpe?g',
'timestamp': 1410530893.320,
'upload_date': '20140912',
'duration': 172.385,
'uploader': 'FnaticTV',
'uploader_id': 272749,
'view_count': int,
},
'skip': 'Channel offline',
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
title = data['title'].strip()
description = data.get('description')
thumbnail = data.get('thumbnail')
view_count = data.get('view_count')
user = data.get('user', {})
uploader = user.get('username')
uploader_id = user.get('id')
stream_params = json.loads(data['stream_params'])
timestamp = float_or_none(stream_params.get('creationDate'), 1000)
duration = float_or_none(stream_params.get('length'), 1000)
renditions = stream_params.get('renditions') or []
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
if video:
renditions.append(video)
if not renditions and not user.get('channel', {}).get('is_live', True):
raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True)
formats = [{
'url': fmt['url'],
'width': fmt['frameWidth'],
'height': fmt['frameHeight'],
'vbr': float_or_none(fmt['encodingRate'], 1000),
'filesize': fmt['size'],
'vcodec': fmt['videoCodec'],
'container': fmt['videoContainer'],
} for fmt in renditions if fmt['url']]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'view_count': view_count,
'formats': formats,
}
class AzubuLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
_TESTS = [{
'url': 'http://www.azubu.tv/MarsTVMDLen',
'only_matching': True,
}, {
'url': 'http://azubu.uol.com.br/adolfz',
'only_matching': True,
}]
def _real_extract(self, url):
user = self._match_id(url)
info = self._download_json(
'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
user)['data']
if info['type'] != 'STREAM':
raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
req = sanitized_Request(
'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
bc_info = self._download_json(req, user)
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
self._sort_formats(formats)
return {
'id': info['id'],
'title': self._live_title(info['title']),
'uploader_id': user,
'formats': formats,
'is_live': True,
'thumbnail': bc_info['poster'],
}

+ 1
- 1
youtube_dl/extractor/bambuser.py View File

@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form)) self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL) request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage( response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
login_error = self._html_search_regex( login_error = self._html_search_regex(
r'(?s)<div class="messages error">(.+?)</div>', r'(?s)<div class="messages error">(.+?)</div>',


+ 128
- 11
youtube_dl/extractor/bandcamp.py View File

@ -14,14 +14,16 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
KNOWN_EXTENSIONS,
parse_filesize, parse_filesize,
unescapeHTML, unescapeHTML,
update_url_query, update_url_query,
unified_strdate,
) )
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439', 'md5': 'c557841d5e50261777a6585648adf439',
@ -34,12 +36,12 @@ class BandcampIE(InfoExtractor):
'_skip': 'There is a limit of 200 free downloads / month for the test song' '_skip': 'There is a limit of 200 free downloads / month for the test song'
}, { }, {
'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'md5': '73d0b3171568232574e45652f8720b5c',
'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
'info_dict': { 'info_dict': {
'id': '2650410135', 'id': '2650410135',
'ext': 'mp3',
'title': 'Lanius (Battle)',
'uploader': 'Ben Prunty Music',
'ext': 'aiff',
'title': 'Ben Prunty - Lanius (Battle)',
'uploader': 'Ben Prunty',
}, },
}] }]
@ -47,6 +49,7 @@ class BandcampIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
title = mobj.group('title') title = mobj.group('title')
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
thumbnail = self._html_search_meta('og:image', webpage, default=None)
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if not m_download: if not m_download:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
@ -75,6 +78,7 @@ class BandcampIE(InfoExtractor):
return { return {
'id': track_id, 'id': track_id,
'title': data['title'], 'title': data['title'],
'thumbnail': thumbnail,
'formats': formats, 'formats': formats,
'duration': float_or_none(data.get('duration')), 'duration': float_or_none(data.get('duration')),
} }
@ -143,7 +147,7 @@ class BandcampIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': info.get('thumb_url'),
'thumbnail': info.get('thumb_url') or thumbnail,
'uploader': info.get('artist'), 'uploader': info.get('artist'),
'artist': artist, 'artist': artist,
'track': track, 'track': track,
@ -153,7 +157,7 @@ class BandcampIE(InfoExtractor):
class BandcampAlbumIE(InfoExtractor): class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album' IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@ -209,20 +213,44 @@ class BandcampAlbumIE(InfoExtractor):
'id': 'entropy-ep', 'id': 'entropy-ep',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
}, {
# not all tracks have songs
'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
'info_dict': {
'id': 'we-are-the-plague',
'title': 'WE ARE THE PLAGUE',
'uploader_id': 'insulters',
},
'playlist_count': 2,
}] }]
@classmethod
def suitable(cls, url):
return (False
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
else super(BandcampAlbumIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
uploader_id = mobj.group('subdomain') uploader_id = mobj.group('subdomain')
album_id = mobj.group('album_id') album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
track_elements = re.findall(
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
if not track_elements:
raise ExtractorError('The page doesn\'t contain any tracks') raise ExtractorError('The page doesn\'t contain any tracks')
# Only tracks with duration info have songs
entries = [ entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths]
self.url_result(
compat_urlparse.urljoin(url, t_path),
ie=BandcampIE.ie_key(),
video_title=self._search_regex(
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
elem_content, 'track title', fatal=False))
for elem_content, t_path in track_elements
if self._html_search_meta('duration', elem_content, default=None)]
title = self._html_search_regex( title = self._html_search_regex(
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
webpage, 'title', fatal=False) webpage, 'title', fatal=False)
@ -235,3 +263,92 @@ class BandcampAlbumIE(InfoExtractor):
'title': title, 'title': title,
'entries': entries, 'entries': entries,
} }
class BandcampWeeklyIE(InfoExtractor):
IE_NAME = 'Bandcamp:weekly'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{
'url': 'https://bandcamp.com/?show=224',
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
'info_dict': {
'id': '224',
'ext': 'opus',
'title': 'BC Weekly April 4th 2017 - Magic Moments',
'description': 'md5:5d48150916e8e02d030623a48512c874',
'duration': 5829.77,
'release_date': '20170404',
'series': 'Bandcamp Weekly',
'episode': 'Magic Moments',
'episode_number': 208,
'episode_id': '224',
}
}, {
'url': 'https://bandcamp.com/?blah/blah@&show=228',
'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
'blob', group='blob'),
video_id, transform_source=unescapeHTML)
show = blob['bcw_show']
# This is desired because any invalid show id redirects to `bandcamp.com`
# which happens to expose the latest Bandcamp Weekly episode.
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
formats = []
for format_id, format_url in show['audio_stream'].items():
if not isinstance(format_url, compat_str):
continue
for known_ext in KNOWN_EXTENSIONS:
if known_ext in format_id:
ext = known_ext
break
else:
ext = None
formats.append({
'format_id': format_id,
'url': format_url,
'ext': ext,
'vcodec': 'none',
})
self._sort_formats(formats)
title = show.get('audio_title') or 'Bandcamp Weekly'
subtitle = show.get('subtitle')
if subtitle:
title += ' - %s' % subtitle
episode_number = None
seq = blob.get('bcw_seq')
if seq and isinstance(seq, list):
try:
episode_number = next(
int_or_none(e.get('episode_number'))
for e in seq
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
except StopIteration:
pass
return {
'id': video_id,
'title': title,
'description': show.get('desc') or show.get('short_desc'),
'duration': float_or_none(show.get('audio_duration')),
'is_live': False,
'release_date': unified_strdate(show.get('published_date')),
'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'),
'episode_number': episode_number,
'episode_id': compat_str(video_id),
'formats': formats
}

+ 74
- 9
youtube_dl/extractor/bbc.py View File

@ -6,14 +6,18 @@ import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
dict_get, dict_get,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
get_element_by_class,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
try_get, try_get,
unescapeHTML, unescapeHTML,
urlencode_postdata,
urljoin,
) )
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
@ -25,19 +29,23 @@ from ..compat import (
class BBCCoUkIE(InfoExtractor): class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk' IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer' IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'[pb][\da-z]{7}'
_ID_REGEX = r'[pbw][\da-z]{7}'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.)?bbc\.co\.uk/ (?:www\.)?bbc\.co\.uk/
(?: (?:
programmes/(?!articles/)| programmes/(?!articles/)|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)| iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/clips[/#]|
radio/player/
music/(?:clips|audiovideo/popular)[/#]|
radio/player/|
events/[^/]+/play/[^/]+/
) )
(?P<id>%s)(?!/(?:episodes|broadcasts|clips)) (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX ''' % _ID_REGEX
_LOGIN_URL = 'https://account.bbc.com/signin'
_NETRC_MACHINE = 'bbc'
_MEDIASELECTOR_URLS = [ _MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails # Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g. # with geolocation in some cases when it's even not geo restricted at all (e.g.
@ -222,8 +230,48 @@ class BBCCoUkIE(InfoExtractor):
}, { }, {
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf', 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
'only_matching': True, 'only_matching': True,
}
]
}, {
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
'only_matching': True,
}, {
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
'only_matching': True,
}]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading signin page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'password': password,
})
post_url = urljoin(self._LOGIN_URL, self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
'post url', default=self._LOGIN_URL, group='url'))
response, urlh = self._download_webpage_handle(
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
headers={'Referer': self._LOGIN_URL})
if self._LOGIN_URL in urlh.geturl():
error = clean_html(get_element_by_class('form-message', response))
if error:
raise ExtractorError(
'Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
def _real_initialize(self):
self._login()
class MediaSelectionError(Exception): class MediaSelectionError(Exception):
def __init__(self, id): def __init__(self, id):
@ -336,6 +384,15 @@ class BBCCoUkIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
href, programme_id, ext='mp4', entry_protocol='m3u8_native', href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)) m3u8_id=format_id, fatal=False))
if re.search(self._USP_RE, href):
usp_formats = self._extract_m3u8_formats(
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for f in usp_formats:
if f.get('height') and f['height'] > 720:
continue
formats.append(f)
elif transfer_format == 'hds': elif transfer_format == 'hds':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
href, programme_id, f4m_id=format_id, fatal=False)) href, programme_id, f4m_id=format_id, fatal=False))
@ -350,7 +407,7 @@ class BBCCoUkIE(InfoExtractor):
fmt.update({ fmt.update({
'width': width, 'width': width,
'height': height, 'height': height,
'vbr': bitrate,
'tbr': bitrate,
'vcodec': encoding, 'vcodec': encoding,
}) })
else: else:
@ -359,7 +416,7 @@ class BBCCoUkIE(InfoExtractor):
'acodec': encoding, 'acodec': encoding,
'vcodec': 'none', 'vcodec': 'none',
}) })
if protocol == 'http':
if protocol in ('http', 'https'):
# Direct link # Direct link
fmt.update({ fmt.update({
'url': href, 'url': href,
@ -378,6 +435,8 @@ class BBCCoUkIE(InfoExtractor):
'rtmp_live': False, 'rtmp_live': False,
'ext': 'flv', 'ext': 'flv',
}) })
else:
continue
formats.append(fmt) formats.append(fmt)
elif kind == 'captions': elif kind == 'captions':
subtitles = self.extract_subtitles(media, programme_id) subtitles = self.extract_subtitles(media, programme_id)
@ -396,7 +455,7 @@ class BBCCoUkIE(InfoExtractor):
description = smp_config['summary'] description = smp_config['summary']
for item in smp_config['items']: for item in smp_config['items']:
kind = item['kind'] kind = item['kind']
if kind != 'programme' and kind != 'radioProgramme':
if kind not in ('programme', 'radioProgramme'):
continue continue
programme_id = item.get('vpid') programme_id = item.get('vpid')
duration = int_or_none(item.get('duration')) duration = int_or_none(item.get('duration'))
@ -437,7 +496,7 @@ class BBCCoUkIE(InfoExtractor):
for item in self._extract_items(playlist): for item in self._extract_items(playlist):
kind = item.get('kind') kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
if kind not in ('programme', 'radioProgramme'):
continue continue
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS) description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
@ -470,6 +529,12 @@ class BBCCoUkIE(InfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page') webpage = self._download_webpage(url, group_id, 'Downloading video page')
error = self._search_regex(
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
webpage, 'error', default=None)
if error:
raise ExtractorError(error, expected=True)
programme_id = None programme_id = None
duration = None duration = None


+ 188
- 0
youtube_dl/extractor/beampro.py View File

@ -0,0 +1,188 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
compat_str,
float_or_none,
int_or_none,
parse_iso8601,
try_get,
urljoin,
)
class BeamProBaseIE(InfoExtractor):
_API_BASE = 'https://mixer.com/api/v1'
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
def _extract_channel_info(self, chan):
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
return {
'uploader': chan.get('token') or try_get(
chan, lambda x: x['user']['username'], compat_str),
'uploader_id': compat_str(user_id) if user_id else None,
'age_limit': self._RATINGS.get(chan.get('audience')),
}
class BeamProLiveIE(BeamProBaseIE):
IE_NAME = 'Mixer:live'
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://mixer.com/niterhayven',
'info_dict': {
'id': '261562',
'ext': 'mp4',
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
'thumbnail': r're:https://.*\.jpg$',
'timestamp': 1483477281,
'upload_date': '20170103',
'uploader': 'niterhayven',
'uploader_id': '373396',
'age_limit': 18,
'is_live': True,
'view_count': int,
},
'skip': 'niterhayven is offline',
'params': {
'skip_download': True,
},
}
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
@classmethod
def suitable(cls, url):
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
def _real_extract(self, url):
channel_name = self._match_id(url)
chan = self._download_json(
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
if chan.get('online') is False:
raise ExtractorError(
'{0} is offline'.format(channel_name), expected=True)
channel_id = chan['id']
def manifest_url(kind):
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
formats = self._extract_m3u8_formats(
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
fatal=False)
formats.extend(self._extract_smil_formats(
manifest_url('smil'), channel_name, fatal=False))
self._sort_formats(formats)
info = {
'id': compat_str(chan.get('id') or channel_name),
'title': self._live_title(chan.get('name') or channel_name),
'description': clean_html(chan.get('description')),
'thumbnail': try_get(
chan, lambda x: x['thumbnail']['url'], compat_str),
'timestamp': parse_iso8601(chan.get('updatedAt')),
'is_live': True,
'view_count': int_or_none(chan.get('viewersTotal')),
'formats': formats,
}
info.update(self._extract_channel_info(chan))
return info
class BeamProVodIE(BeamProBaseIE):
IE_NAME = 'Mixer:vod'
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
_TEST = {
'url': 'https://mixer.com/willow8714?vod=2259830',
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
'info_dict': {
'id': '2259830',
'ext': 'mp4',
'title': 'willow8714\'s Channel',
'duration': 6828.15,
'thumbnail': r're:https://.*source\.png$',
'timestamp': 1494046474,
'upload_date': '20170506',
'uploader': 'willow8714',
'uploader_id': '6085379',
'age_limit': 13,
'view_count': int,
},
'params': {
'skip_download': True,
},
}
@staticmethod
def _extract_format(vod, vod_type):
if not vod.get('baseUrl'):
return []
if vod_type == 'hls':
filename, protocol = 'manifest.m3u8', 'm3u8_native'
elif vod_type == 'raw':
filename, protocol = 'source.mp4', 'https'
else:
assert False
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
format_id = [vod_type]
if isinstance(data.get('Height'), compat_str):
format_id.append('%sp' % data['Height'])
return [{
'url': urljoin(vod['baseUrl'], filename),
'format_id': '-'.join(format_id),
'ext': 'mp4',
'protocol': protocol,
'width': int_or_none(data.get('Width')),
'height': int_or_none(data.get('Height')),
'fps': int_or_none(data.get('Fps')),
'tbr': int_or_none(data.get('Bitrate'), 1000),
}]
def _real_extract(self, url):
vod_id = self._match_id(url)
vod_info = self._download_json(
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
state = vod_info.get('state')
if state != 'AVAILABLE':
raise ExtractorError(
'VOD %s is not available (state: %s)' % (vod_id, state),
expected=True)
formats = []
thumbnail_url = None
for vod in vod_info['vods']:
vod_type = vod.get('format')
if vod_type in ('hls', 'raw'):
formats.extend(self._extract_format(vod, vod_type))
elif vod_type == 'thumbnail':
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
self._sort_formats(formats)
info = {
'id': vod_id,
'title': vod_info.get('name') or vod_id,
'duration': float_or_none(vod_info.get('duration')),
'thumbnail': thumbnail_url,
'timestamp': parse_iso8601(vod_info.get('createdAt')),
'view_count': int_or_none(vod_info.get('viewsTotal')),
'formats': formats,
}
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
return info

+ 13
- 6
youtube_dl/extractor/beeg.py View File

@ -9,6 +9,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
urljoin,
) )
@ -16,7 +17,7 @@ class BeegIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://beeg.com/5416503', 'url': 'http://beeg.com/5416503',
'md5': '46c384def73b33dbc581262e5ee67cef',
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
'info_dict': { 'info_dict': {
'id': '5416503', 'id': '5416503',
'ext': 'mp4', 'ext': 'mp4',
@ -36,9 +37,11 @@ class BeegIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
cpl_url = self._search_regex( cpl_url = self._search_regex(
r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
webpage, 'cpl', default=None, group='url') webpage, 'cpl', default=None, group='url')
cpl_url = urljoin(url, cpl_url)
beeg_version, beeg_salt = [None] * 2 beeg_version, beeg_salt = [None] * 2
if cpl_url: if cpl_url:
@ -54,12 +57,16 @@ class BeegIE(InfoExtractor):
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt', r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
default=None, group='beeg_salt') default=None, group='beeg_salt')
beeg_version = beeg_version or '2000'
beeg_version = beeg_version or '2185'
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H' beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
video = self._download_json(
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
video_id)
for api_path in ('', 'api.'):
video = self._download_json(
'https://%sbeeg.com/api/v6/%s/video/%s'
% (api_path, beeg_version, video_id), video_id,
fatal=api_path == 'api.')
if video:
break
def split(o, e): def split(o, e):
def cut(s, x): def cut(s, x):


+ 10
- 2
youtube_dl/extractor/bellmedia.py View File

@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor):
animalplanet| animalplanet|
bravo| bravo|
mtv| mtv|
space
space|
etalk
)\.ca| )\.ca|
much\.com much\.com
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966', 'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
@ -55,6 +56,12 @@ class BellMediaIE(InfoExtractor):
}, { }, {
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
'only_matching': True,
}, {
'url': 'http://www.etalk.ca/video?videoid=663455',
'only_matching': True,
}] }]
_DOMAINS = { _DOMAINS = {
'thecomedynetwork': 'comedy', 'thecomedynetwork': 'comedy',
@ -62,6 +69,7 @@ class BellMediaIE(InfoExtractor):
'sciencechannel': 'discsci', 'sciencechannel': 'discsci',
'investigationdiscovery': 'invdisc', 'investigationdiscovery': 'invdisc',
'animalplanet': 'aniplan', 'animalplanet': 'aniplan',
'etalk': 'ctv',
} }
def _real_extract(self, url): def _real_extract(self, url):


+ 149
- 13
youtube_dl/extractor/bilibili.py View File

@ -5,19 +5,27 @@ import hashlib
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
float_or_none, float_or_none,
parse_iso8601,
smuggle_url,
strip_jsonp,
unified_timestamp, unified_timestamp,
unsmuggle_url,
urlencode_postdata, urlencode_postdata,
) )
class BiliBiliIE(InfoExtractor): class BiliBiliIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/', 'url': 'http://www.bilibili.tv/video/av1074402/',
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
'info_dict': { 'info_dict': {
@ -32,25 +40,77 @@ class BiliBiliIE(InfoExtractor):
'uploader': '菊子桑', 'uploader': '菊子桑',
'uploader_id': '156160', 'uploader_id': '156160',
}, },
}
}, {
# Tested in BiliBiliBangumiIE
'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
'only_matching': True,
}, {
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
'md5': '3f721ad1e75030cc06faf73587cfec57',
'info_dict': {
'id': '100643',
'ext': 'mp4',
'title': 'CHAOS;CHILD',
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
},
'skip': 'Geo-restricted to China',
}, {
# Title with double quotes
'url': 'http://www.bilibili.com/video/av8903802/',
'info_dict': {
'id': '8903802',
'ext': 'mp4',
'title': '阿滴英文|英文歌分享#6 "Closer',
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
'uploader': '阿滴英文',
'uploader_id': '65880958',
'timestamp': 1488382620,
'upload_date': '20170301',
},
'params': {
'skip_download': True, # Test metadata only
},
}]
_APP_KEY = '6f90a59ac58a4123'
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
_APP_KEY = '84956560bc028eb7'
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
def _report_error(self, result):
if 'message' in result:
raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
elif 'code' in result:
raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
else:
raise ExtractorError('Can\'t extract Bangumi episode ID')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url)
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
anime_id = mobj.group('anime_id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if 'anime/v' not in url:
if 'anime/' not in url:
cid = compat_parse_qs(self._search_regex( cid = compat_parse_qs(self._search_regex(
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
webpage, 'player parameters'))['cid'][0] webpage, 'player parameters'))['cid'][0]
else: else:
if 'no_bangumi_tip' not in smuggled_data:
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
headers = {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}
headers.update(self.geo_verification_headers())
js = self._download_json( js = self._download_json(
'http://bangumi.bilibili.com/web_api/get_source', video_id, 'http://bangumi.bilibili.com/web_api/get_source', video_id,
data=urlencode_postdata({'episode_id': video_id}), data=urlencode_postdata({'episode_id': video_id}),
headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
headers=headers)
if 'result' not in js:
self._report_error(js)
cid = js['result']['cid'] cid = js['result']['cid']
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
@ -58,7 +118,11 @@ class BiliBiliIE(InfoExtractor):
video_info = self._download_json( video_info = self._download_json(
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign), 'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
video_id, note='Downloading video info page')
video_id, note='Downloading video info page',
headers=self.geo_verification_headers())
if 'durl' not in video_info:
self._report_error(video_info)
entries = [] entries = []
@ -74,6 +138,11 @@ class BiliBiliIE(InfoExtractor):
'preference': -2 if 'hd.mp4' in backup_url else -3, 'preference': -2 if 'hd.mp4' in backup_url else -3,
}) })
for a_format in formats:
a_format.setdefault('http_headers', {}).update({
'Referer': url,
})
self._sort_formats(formats) self._sort_formats(formats)
entries.append({ entries.append({
@ -82,10 +151,10 @@ class BiliBiliIE(InfoExtractor):
'formats': formats, 'formats': formats,
}) })
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex( timestamp = unified_timestamp(self._html_search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
# TODO 'view_count' requires deobfuscating Javascript # TODO 'view_count' requires deobfuscating Javascript
@ -99,7 +168,7 @@ class BiliBiliIE(InfoExtractor):
} }
uploader_mobj = re.search( uploader_mobj = re.search(
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
webpage) webpage)
if uploader_mobj: if uploader_mobj:
info.update({ info.update({
@ -123,3 +192,70 @@ class BiliBiliIE(InfoExtractor):
'description': description, 'description': description,
'entries': entries, 'entries': entries,
} }
class BiliBiliBangumiIE(InfoExtractor):
_VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
IE_NAME = 'bangumi.bilibili.com'
IE_DESC = 'BiliBili番剧'
_TESTS = [{
'url': 'http://bangumi.bilibili.com/anime/1869',
'info_dict': {
'id': '1869',
'title': '混沌武士',
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
},
'playlist_count': 26,
}, {
'url': 'http://bangumi.bilibili.com/anime/1869',
'info_dict': {
'id': '1869',
'title': '混沌武士',
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
},
'playlist': [{
'md5': '91da8621454dd58316851c27c68b0c13',
'info_dict': {
'id': '40062',
'ext': 'mp4',
'title': '混沌武士',
'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
'timestamp': 1414538739,
'upload_date': '20141028',
'episode': '疾风怒涛 Tempestuous Temperaments',
'episode_number': 1,
},
}],
'params': {
'playlist_items': '1',
},
}]
@classmethod
def suitable(cls, url):
return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
def _real_extract(self, url):
bangumi_id = self._match_id(url)
# Sometimes this API returns a JSONP response
season_info = self._download_json(
'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
bangumi_id, transform_source=strip_jsonp)['result']
entries = [{
'_type': 'url_transparent',
'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
'ie_key': BiliBiliIE.ie_key(),
'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
'episode': episode.get('index_title'),
'episode_number': int_or_none(episode.get('index')),
} for episode in season_info['episodes']]
entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
return self.playlist_result(
entries, bangumi_id,
season_info.get('bangumi_title'), season_info.get('evaluate'))

+ 3
- 7
youtube_dl/extractor/bleacherreport.py View File

@ -35,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961, 'timestamp': 1446839961,
'uploader': 'Sean Fay', 'uploader': 'Sean Fay',
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
'uploader_id': 6466954, 'uploader_id': 6466954,
'upload_date': '20151011', 'upload_date': '20151011',
}, },
@ -90,17 +90,13 @@ class BleacherReportCMSIE(AMPIE):
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
_TESTS = [{ _TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'md5': '8c2c12e3af7805152675446c905d159b',
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
'info_dict': { 'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'mp4',
'ext': 'flv',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
}, },
'params': {
# m3u8 download
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):


+ 8
- 3
youtube_dl/extractor/bloomberg.py View File

@ -33,6 +33,10 @@ class BloombergIE(InfoExtractor):
'params': { 'params': {
'format': 'best[format_id^=hds]', 'format': 'best[format_id^=hds]',
}, },
}, {
# data-bmmrid=
'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
'only_matching': True,
}, { }, {
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
'only_matching': True, 'only_matching': True,
@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
name = self._match_id(url) name = self._match_id(url)
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
video_id = self._search_regex( video_id = self._search_regex(
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
webpage, 'id', group='url', default=None)
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
webpage, 'id', group='id', default=None)
if not video_id: if not video_id:
bplayer_data = self._parse_json(self._search_regex( bplayer_data = self._parse_json(self._search_regex(
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)


+ 72
- 0
youtube_dl/extractor/bostonglobe.py View File

@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
)
class BostonGlobeIE(InfoExtractor):
_VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
_TESTS = [
{
'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
'md5': '0a62181079c85c2d2b618c9a738aedaf',
'info_dict': {
'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
'id': '5320421710001',
'ext': 'mp4',
'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
'timestamp': 1486877593,
'upload_date': '20170212',
'uploader_id': '245991542',
},
},
{
# Embedded youtube video; we hand it off to the Generic extractor.
'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
'md5': '582b40327089d5c0c949b3c54b13c24b',
'info_dict': {
'title': "Who Is Matt Damon's Favorite Batman?",
'id': 'ZW1QCnlA6Qc',
'ext': 'mp4',
'upload_date': '20170217',
'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
'uploader': 'The Late Late Show with James Corden',
'uploader_id': 'TheLateLateShow',
},
'expected_warnings': ['404'],
},
]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
page_title = self._og_search_title(webpage, default=None)
# <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
entries = []
for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
attrs = extract_attributes(video)
video_id = attrs.get('data-brightcove-video-id')
account_id = attrs.get('data-account')
player_id = attrs.get('data-player')
embed = attrs.get('data-embed')
if video_id and account_id and player_id and embed:
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
if len(entries) == 0:
return self.url_result(url, 'Generic')
elif len(entries) == 1:
return self.url_result(entries[0], 'BrightcoveNew')
else:
return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')

+ 9
- 4
youtube_dl/extractor/bpb.py View File

@ -33,13 +33,18 @@ class BpbIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
r'<h2 class="white">(.*?)</h2>', webpage, 'title') r'<h2 class="white">(.*?)</h2>', webpage, 'title')
video_info_dicts = re.findall( video_info_dicts = re.findall(
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
formats = [] formats = []
for video_info in video_info_dicts: for video_info in video_info_dicts:
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
quality = video_info['quality']
video_url = video_info['src']
video_info = self._parse_json(
video_info, video_id, transform_source=js_to_json, fatal=False)
if not video_info:
continue
video_url = video_info.get('src')
if not video_url:
continue
quality = 'high' if '_high' in video_url else 'low'
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'preference': 10 if quality == 'high' else 0, 'preference': 10 if quality == 'high' else 0,


+ 144
- 4
youtube_dl/extractor/br.py View File

@ -1,20 +1,23 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601,
xpath_element, xpath_element,
xpath_text, xpath_text,
) )
class BRIE(InfoExtractor): class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
IE_DESC = 'Bayerischer Rundfunk'
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_TESTS = [ _TESTS = [
@ -77,7 +80,7 @@ class BRIE(InfoExtractor):
'description': 'md5:bb659990e9e59905c3d41e369db1fbe3', 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
'duration': 893, 'duration': 893,
'uploader': 'Eva Maria Steimle', 'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
'upload_date': '20170208',
} }
}, },
] ]
@ -123,10 +126,10 @@ class BRIE(InfoExtractor):
for asset in assets.findall('asset'): for asset in assets.findall('asset'):
format_url = xpath_text(asset, ['downloadUrl', 'url']) format_url = xpath_text(asset, ['downloadUrl', 'url'])
asset_type = asset.get('type') asset_type = asset.get('type')
if asset_type == 'HDS':
if asset_type.startswith('HDS'):
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
elif asset_type == 'HLS':
elif asset_type.startswith('HLS'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
else: else:
@ -169,3 +172,140 @@ class BRIE(InfoExtractor):
} for variant in variants.findall('variant') if xpath_text(variant, 'url')] } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails return thumbnails
class BRMediathekIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
_TESTS = [{
'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
'md5': 'fdc3d485835966d1622587d08ba632ec',
'info_dict': {
'id': 'av:5a1e6a6e8fce6d001871cc8e',
'ext': 'mp4',
'title': 'Die Sendung vom 28.11.2017',
'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
'timestamp': 1511942766,
'upload_date': '20171129',
}
}]
def _real_extract(self, url):
clip_id = self._match_id(url)
clip = self._download_json(
'https://proxy-base.master.mango.express/graphql',
clip_id, data=json.dumps({
"query": """{
viewer {
clip(id: "%s") {
title
description
duration
createdAt
ageRestriction
videoFiles {
edges {
node {
publicLocation
fileSize
videoProfile {
width
height
bitrate
encoding
}
}
}
}
captionFiles {
edges {
node {
publicLocation
}
}
}
teaserImages {
edges {
node {
imageFiles {
edges {
node {
publicLocation
width
height
}
}
}
}
}
}
}
}
}""" % clip_id}).encode(), headers={
'Content-Type': 'application/json',
})['data']['viewer']['clip']
title = clip['title']
formats = []
for edge in clip.get('videoFiles', {}).get('edges', []):
node = edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
ext = determine_ext(n_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
n_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
video_profile = node.get('videoProfile', {})
tbr = int_or_none(video_profile.get('bitrate'))
format_id = 'http'
if tbr:
format_id += '-%d' % tbr
formats.append({
'format_id': format_id,
'url': n_url,
'width': int_or_none(video_profile.get('width')),
'height': int_or_none(video_profile.get('height')),
'tbr': tbr,
'filesize': int_or_none(node.get('fileSize')),
})
self._sort_formats(formats)
subtitles = {}
for edge in clip.get('captionFiles', {}).get('edges', []):
node = edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
subtitles.setdefault('de', []).append({
'url': n_url,
})
thumbnails = []
for edge in clip.get('teaserImages', {}).get('edges', []):
for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
node = image_edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
thumbnails.append({
'url': n_url,
'width': int_or_none(node.get('width')),
'height': int_or_none(node.get('height')),
})
return {
'id': clip_id,
'title': title,
'description': clip.get('description'),
'duration': int_or_none(clip.get('duration')),
'timestamp': parse_iso8601(clip.get('createdAt')),
'age_limit': int_or_none(clip.get('ageRestriction')),
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
}

+ 138
- 62
youtube_dl/extractor/brightcove.py View File

@ -5,6 +5,7 @@ import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from .adobepass import AdobePassIE
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_parse_qs, compat_parse_qs,
@ -17,6 +18,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
extract_attributes,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
float_or_none, float_or_none,
@ -109,6 +111,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'upload_date': '20140827', 'upload_date': '20140827',
'uploader_id': '710858724001', 'uploader_id': '710858724001',
}, },
'skip': 'Video gone',
}, },
{ {
# playlist with 'videoList' # playlist with 'videoList'
@ -129,6 +132,12 @@ class BrightcoveLegacyIE(InfoExtractor):
}, },
'playlist_mincount': 10, 'playlist_mincount': 10,
}, },
{
# playerID inferred from bcpid
# from http://www.un.org/chinese/News/story.asp?NewsID=27724
'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
'only_matching': True, # Tested in GenericIE
}
] ]
FLV_VCODECS = { FLV_VCODECS = {
1: 'SORENSON', 1: 'SORENSON',
@ -179,7 +188,7 @@ class BrightcoveLegacyIE(InfoExtractor):
params = {} params = {}
playerID = find_param('playerID')
playerID = find_param('playerID') or find_param('playerId')
if playerID is None: if playerID is None:
raise ExtractorError('Cannot find player ID') raise ExtractorError('Cannot find player ID')
params['playerID'] = playerID params['playerID'] = playerID
@ -191,6 +200,16 @@ class BrightcoveLegacyIE(InfoExtractor):
# These fields hold the id of the video # These fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
if videoPlayer is not None: if videoPlayer is not None:
if isinstance(videoPlayer, list):
videoPlayer = videoPlayer[0]
videoPlayer = videoPlayer.strip()
# UUID is also possible for videoPlayer (e.g.
# http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
# or http://www8.hp.com/cn/zh/home.html)
if not (re.match(
r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
videoPlayer) or videoPlayer.startswith('ref:')):
return None
params['@videoPlayer'] = videoPlayer params['@videoPlayer'] = videoPlayer
linkBase = find_param('linkBaseURL') linkBase = find_param('linkBaseURL')
if linkBase is not None: if linkBase is not None:
@ -204,7 +223,7 @@ class BrightcoveLegacyIE(InfoExtractor):
# // build Brightcove <object /> XML # // build Brightcove <object /> XML
# } # }
m = re.search( m = re.search(
r'''(?x)customBC.\createVideo\(
r'''(?x)customBC\.createVideo\(
.*? # skipping width and height .*? # skipping width and height
["\'](?P<playerID>\d+)["\']\s*,\s* # playerID ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
@ -254,9 +273,13 @@ class BrightcoveLegacyIE(InfoExtractor):
if matches: if matches:
return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
return list(filter(None, [
cls._build_brighcove_url_from_js(custom_bc)
for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
if matches:
return list(filter(None, [
cls._build_brighcove_url_from_js(custom_bc)
for custom_bc in matches]))
return [src for _, src in re.findall(
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
@ -273,6 +296,10 @@ class BrightcoveLegacyIE(InfoExtractor):
if videoPlayer: if videoPlayer:
# We set the original url as the default 'Referer' header # We set the original url as the default 'Referer' header
referer = smuggled_data.get('Referer', url) referer = smuggled_data.get('Referer', url)
if 'playerID' not in query:
mobj = re.search(r'/bcpid(\d+)', url)
if mobj is not None:
query['playerID'] = [mobj.group(1)]
return self._get_video_info( return self._get_video_info(
videoPlayer[0], query, referer=referer) videoPlayer[0], query, referer=referer)
elif 'playerKey' in query: elif 'playerKey' in query:
@ -422,7 +449,7 @@ class BrightcoveLegacyIE(InfoExtractor):
return info return info
class BrightcoveNewIE(InfoExtractor):
class BrightcoveNewIE(AdobePassIE):
IE_NAME = 'brightcove:new' IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)' _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
_TESTS = [{ _TESTS = [{
@ -437,7 +464,7 @@ class BrightcoveNewIE(InfoExtractor):
'timestamp': 1441391203, 'timestamp': 1441391203,
'upload_date': '20150904', 'upload_date': '20150904',
'uploader_id': '929656772001', 'uploader_id': '929656772001',
'formats': 'mincount:22',
'formats': 'mincount:20',
}, },
}, { }, {
# with rtmp streams # with rtmp streams
@ -451,7 +478,7 @@ class BrightcoveNewIE(InfoExtractor):
'timestamp': 1433556729, 'timestamp': 1433556729,
'upload_date': '20150606', 'upload_date': '20150606',
'uploader_id': '4036320279001', 'uploader_id': '4036320279001',
'formats': 'mincount:41',
'formats': 'mincount:39',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -472,17 +499,18 @@ class BrightcoveNewIE(InfoExtractor):
}] }]
@staticmethod @staticmethod
def _extract_url(webpage):
urls = BrightcoveNewIE._extract_urls(webpage)
def _extract_url(ie, webpage):
urls = BrightcoveNewIE._extract_urls(ie, webpage)
return urls[0] if urls else None return urls[0] if urls else None
@staticmethod @staticmethod
def _extract_urls(webpage):
def _extract_urls(ie, webpage):
# Reference: # Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
# 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
# 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
# 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
entries = [] entries = []
@ -491,59 +519,52 @@ class BrightcoveNewIE(InfoExtractor):
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url if url.startswith('http') else 'http:' + url) entries.append(url if url.startswith('http') else 'http:' + url)
# Look for embed_in_page embeds [2]
for video_id, account_id, player_id, embed in re.findall(
# According to examples from [3] it's unclear whether video id
# may be optional and what to do when it is
# According to [4] data-video-id may be prefixed with ref:
r'''(?sx)
<video[^>]+
data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
</video>.*?
<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
# Look for <video> tags [2] and embed_in_page embeds [3]
# [2] looks like:
for video, script_tag, account_id, player_id, embed in re.findall(
r'''(?isx)
(<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
(?:.*?
(<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
)
)?
''', webpage): ''', webpage):
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
return entries
attrs = extract_attributes(video)
def _real_extract(self, url):
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
# According to examples from [4] it's unclear whether video id
# may be optional and what to do when it is
video_id = attrs.get('data-video-id')
if not video_id:
continue
policy_key = None
account_id = account_id or attrs.get('data-account')
if not account_id:
continue
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
player_id = player_id or attrs.get('data-player') or 'default'
embed = embed or attrs.get('data-embed') or 'default'
bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
account_id, player_id, embed, video_id)
# Some brightcove videos may be embedded with video tag only and
# without script tag or any mentioning of brightcove at all. Such
# embeds are considered ambiguous since they are matched based only
# on data-video-id and data-account attributes and in the wild may
# not be brightcove embeds at all. Let's check reconstructed
# brightcove URLs in case of such embeds and only process valid
# ones. By this we ensure there is indeed a brightcove embed.
if not script_tag and not ie._is_valid_url(
bc_url, video_id, 'possible brightcove video'):
continue
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
entries.append(bc_url)
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
try:
json_data = self._download_json(api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
raise ExtractorError(
json_data.get('message') or json_data['error_code'], expected=True)
raise
return entries
def _parse_brightcove_metadata(self, json_data, video_id):
title = json_data['name'].strip() title = json_data['name'].strip()
formats = [] formats = []
@ -626,7 +647,7 @@ class BrightcoveNewIE(InfoExtractor):
is_live = False is_live = False
duration = float_or_none(json_data.get('duration'), 1000) duration = float_or_none(json_data.get('duration'), 1000)
if duration and duration < 0:
if duration is not None and duration <= 0:
is_live = True is_live = True
return { return {
@ -636,9 +657,64 @@ class BrightcoveNewIE(InfoExtractor):
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
'duration': duration, 'duration': duration,
'timestamp': parse_iso8601(json_data.get('published_at')), 'timestamp': parse_iso8601(json_data.get('published_at')),
'uploader_id': account_id,
'uploader_id': json_data.get('account_id'),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'tags': json_data.get('tags', []), 'tags': json_data.get('tags', []),
'is_live': is_live, 'is_live': is_live,
} }
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
try:
json_data = self._download_json(api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
custom_fields = json_data['custom_fields']
tve_token = self._extract_mvpd_auth(
smuggled_data['source_url'], video_id,
custom_fields['bcadobepassrequestorid'],
custom_fields['bcadobepassresourceid'])
json_data = self._download_json(
api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
}, query={
'tveToken': tve_token,
})
return self._parse_brightcove_metadata(json_data, video_id)

+ 4
- 3
youtube_dl/extractor/buzzfeed.py View File

@ -84,9 +84,10 @@ class BuzzFeedIE(InfoExtractor):
continue continue
entries.append(self.url_result(video['url'])) entries.append(self.url_result(video['url']))
facebook_url = FacebookIE._extract_url(webpage)
if facebook_url:
entries.append(self.url_result(facebook_url))
facebook_urls = FacebookIE._extract_urls(webpage)
entries.extend([
self.url_result(facebook_url)
for facebook_url in facebook_urls])
return { return {
'_type': 'playlist', '_type': 'playlist',


+ 18
- 53
youtube_dl/extractor/byutv.py View File

@ -3,20 +3,19 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError
class BYUtvIE(InfoExtractor): class BYUtvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': { 'info_dict': {
'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
'display_id': 'studio-c-season-5-episode-5', 'display_id': 'studio-c-season-5-episode-5',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Season 5 Episode 5', 'title': 'Season 5 Episode 5',
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65',
'thumbnail': r're:^https?://.*',
'duration': 1486.486, 'duration': 1486.486,
}, },
'params': { 'params': {
@ -26,6 +25,9 @@ class BYUtvIE(InfoExtractor):
}, { }, {
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -33,16 +35,16 @@ class BYUtvIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
episode_code = self._search_regex(
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
ep = self._parse_json(
episode_code, display_id, transform_source=lambda s:
re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
if ep['providerType'] != 'Ooyala':
raise ExtractorError('Unsupported provider %s' % ep['provider'])
ep = self._download_json(
'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id,
query={
'contentid': video_id,
'channel': 'byutv',
'x-byutv-context': 'web$US',
}, headers={
'x-byutv-context': 'web$US',
'x-byutv-platformkey': 'xsaaw9c7y5',
})['ooyalaVOD']
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
@ -50,44 +52,7 @@ class BYUtvIE(InfoExtractor):
'url': 'ooyala:%s' % ep['providerId'], 'url': 'ooyala:%s' % ep['providerId'],
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': ep['title'],
'title': ep.get('title'),
'description': ep.get('description'), 'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'), 'thumbnail': ep.get('imageThumbnail'),
} }
class BYUtvEventIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
_TEST = {
'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
'info_dict': {
'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
'ext': 'mp4',
'title': 'Toledo vs. BYU (9/30/16)',
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
ooyala_id = self._search_regex(
r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'ooyala id', group='id')
title = self._search_regex(
r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
'title').strip()
return {
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:%s' % ooyala_id,
'id': video_id,
'title': title,
}

+ 1
- 4
youtube_dl/extractor/canalc2.py View File

@ -16,13 +16,10 @@ class Canalc2IE(InfoExtractor):
'md5': '060158428b650f896c542dfbb3d6487f', 'md5': '060158428b650f896c542dfbb3d6487f',
'info_dict': { 'info_dict': {
'id': '12163', 'id': '12163',
'ext': 'flv',
'ext': 'mp4',
'title': 'Terrasses du Numérique', 'title': 'Terrasses du Numérique',
'duration': 122, 'duration': 122,
}, },
'params': {
'skip_download': True, # Requires rtmpdump
}
}, { }, {
'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui', 'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
'only_matching': True, 'only_matching': True,


+ 40
- 20
youtube_dl/extractor/canalplus.py View File

@ -7,8 +7,8 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse from ..compat import compat_urllib_parse_urlparse
from ..utils import ( from ..utils import (
dict_get, dict_get,
ExtractorError,
HEADRequest,
# ExtractorError,
# HEADRequest,
int_or_none, int_or_none,
qualities, qualities,
remove_end, remove_end,
@ -27,6 +27,7 @@ class CanalplusIE(InfoExtractor):
(?:www\.)?d8\.tv| (?:www\.)?d8\.tv|
(?:www\.)?c8\.fr| (?:www\.)?c8\.fr|
(?:www\.)?d17\.tv| (?:www\.)?d17\.tv|
(?:(?:football|www)\.)?cstar\.fr|
(?:www\.)?itele\.fr (?:www\.)?itele\.fr
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?| )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
player\.canalplus\.fr/#/(?P<id>\d+) player\.canalplus\.fr/#/(?P<id>\d+)
@ -40,9 +41,13 @@ class CanalplusIE(InfoExtractor):
'd8': 'd8', 'd8': 'd8',
'c8': 'd8', 'c8': 'd8',
'd17': 'd17', 'd17': 'd17',
'cstar': 'd17',
'itele': 'itele', 'itele': 'itele',
} }
# Only works for direct mp4 URLs
_GEO_COUNTRIES = ['FR']
_TESTS = [{ _TESTS = [{
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
'info_dict': { 'info_dict': {
@ -54,6 +59,7 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20160702', 'upload_date': '20160702',
}, },
}, { }, {
# geo restricted, bypassed
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
'info_dict': { 'info_dict': {
'id': '1108190', 'id': '1108190',
@ -63,19 +69,20 @@ class CanalplusIE(InfoExtractor):
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
'upload_date': '20140724', 'upload_date': '20140724',
}, },
'skip': 'Only works from France',
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, { }, {
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
'md5': '4b47b12b4ee43002626b97fad8fb1de5',
# geo restricted, bypassed
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
'info_dict': { 'info_dict': {
'id': '1420213',
'id': '1443684',
'display_id': 'pid6318-videos-integrales', 'display_id': 'pid6318-videos-integrales',
'ext': 'mp4', 'ext': 'mp4',
'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
'upload_date': '20161014',
'title': 'Guess my iep ! - TPMP - 07/04/2017',
'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
'upload_date': '20170407',
}, },
'skip': 'Only works from France',
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, { }, {
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
'info_dict': { 'info_dict': {
@ -86,6 +93,19 @@ class CanalplusIE(InfoExtractor):
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.', 'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
'upload_date': '20161014', 'upload_date': '20161014',
}, },
}, {
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
'info_dict': {
'id': '1416769',
'display_id': 'pid7566-feminines-videos',
'ext': 'mp4',
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
'upload_date': '20160921',
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://m.canalplus.fr/?vid=1398231', 'url': 'http://m.canalplus.fr/?vid=1398231',
'only_matching': True, 'only_matching': True,
@ -107,7 +127,7 @@ class CanalplusIE(InfoExtractor):
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
r'id=["\']canal_video_player(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)',
r'data-video=["\'](?P<id>\d+)'], r'data-video=["\'](?P<id>\d+)'],
webpage, 'video id', group='id')
webpage, 'video id', default=mobj.group('vid'), group='id')
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
video_data = self._download_json(info_url, video_id, 'Downloading video JSON') video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
@ -119,15 +139,15 @@ class CanalplusIE(InfoExtractor):
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD']) preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD'])
fmt_url = next(iter(media.get('VIDEOS')))
if '/geo' in fmt_url.lower():
response = self._request_webpage(
HEADRequest(fmt_url), video_id,
'Checking if the video is georestricted')
if '/blocage' in response.geturl():
raise ExtractorError(
'The video is not available in your country',
expected=True)
# _, fmt_url = next(iter(media['VIDEOS'].items()))
# if '/geo' in fmt_url.lower():
# response = self._request_webpage(
# HEADRequest(fmt_url), video_id,
# 'Checking if the video is georestricted')
# if '/blocage' in response.geturl():
# raise ExtractorError(
# 'The video is not available in your country',
# expected=True)
formats = [] formats = []
for format_id, format_url in media['VIDEOS'].items(): for format_id, format_url in media['VIDEOS'].items():


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save