Browse Source

[cnn] Add an extractor for blogs (closes #2361)

totalwebcasting
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
0ae6b01937
2 changed files with 30 additions and 1 deletions
  1. +4
    -1
      youtube_dl/extractor/__init__.py
  2. +26
    -0
      youtube_dl/extractor/cnn.py

+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -32,7 +32,10 @@ from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE from .clipsyndicate import ClipsyndicateIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnn import CNNIE
from .cnn import (
CNNIE,
CNNBlogsIE,
)
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE from .condenast import CondeNastIE


+ 26
- 0
youtube_dl/extractor/cnn.py View File

@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
url_basename,
) )
@ -98,3 +99,28 @@ class CNNIE(InfoExtractor):
'duration': duration, 'duration': duration,
'upload_date': upload_date, 'upload_date': upload_date,
} }
class CNNBlogsIE(InfoExtractor):
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
_TEST = {
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
'info_dict': {
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
'ext': 'mp4',
'title': 'Criminalizing journalism?',
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return {
'_type': 'url',
'url': cnn_url,
'ie_key': CNNIE.ie_key(),
}

Loading…
Cancel
Save