Browse Source

[washingtonpost] Add support for embeds (closes #12699)

master-ytdl-org
John Hawkinson 8 years ago
committed by Sergey M․
parent
commit
557194591a
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
2 changed files with 27 additions and 0 deletions
  1. +21
    -0
      youtube_dl/extractor/generic.py
  2. +6
    -0
      youtube_dl/extractor/washingtonpost.py

+ 21
- 0
youtube_dl/extractor/generic.py View File

@ -87,6 +87,7 @@ from .videopress import VideoPressIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .limelight import LimelightBaseIE from .limelight import LimelightBaseIE
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1687,6 +1688,20 @@ class GenericIE(InfoExtractor):
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
}, },
{
# WashingtonPost embed
'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
'info_dict': {
'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
'ext': 'mp4',
'title': "No one has seen the drama series based on Trump's life \u2014 until now",
'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
'timestamp': 1455216756,
'uploader': 'The Washington Post',
'upload_date': '20160211',
},
'add_ie': [WashingtonPostIE.ie_key()],
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -2670,6 +2685,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
rutube_urls, ie=RutubeIE.ie_key()) rutube_urls, ie=RutubeIE.ie_key())
# Look for WashingtonPost embeds
wapo_urls = WashingtonPostIE._extract_urls(webpage)
if wapo_urls:
return self.playlist_from_matches(
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject') webpage, video_id, default={}, expected_type='VideoObject')


+ 6
- 0
youtube_dl/extractor/washingtonpost.py View File

@ -13,6 +13,7 @@ from ..utils import (
class WashingtonPostIE(InfoExtractor): class WashingtonPostIE(InfoExtractor):
IE_NAME = 'washingtonpost' IE_NAME = 'washingtonpost'
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_EMBED_URL = 'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
_TEST = { _TEST = {
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d', 'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'md5': '6f537e1334b714eb15f9563bd4b9cdfa', 'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
@ -27,6 +28,11 @@ class WashingtonPostIE(InfoExtractor):
}, },
} }
@classmethod
def _extract_urls(cls, webpage):
return re.findall(
r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(


Loading…
Cancel
Save