Browse Source

[yahoo] Add an extractor for yahoo news (closes #1849)

totalwebcasting
Jaime Marquínez Ferrándiz 11 years ago
parent
commit
befd88b786
2 changed files with 38 additions and 2 deletions
  1. +5
    -1
      youtube_dl/extractor/__init__.py
  2. +33
    -1
      youtube_dl/extractor/yahoo.py

+ 5
- 1
youtube_dl/extractor/__init__.py View File

@ -172,7 +172,11 @@ from .xhamster import XHamsterIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xvideos import XVideosIE from .xvideos import XVideosIE
from .xtube import XTubeIE from .xtube import XTubeIE
from .yahoo import YahooIE, YahooSearchIE
from .yahoo import (
YahooIE,
YahooNewsIE,
YahooSearchIE,
)
from .youjizz import YouJizzIE from .youjizz import YouJizzIE
from .youku import YoukuIE from .youku import YoukuIE
from .youporn import YouPornIE from .youporn import YouPornIE


+ 33
- 1
youtube_dl/extractor/yahoo.py View File

@ -53,8 +53,11 @@ class YahooIE(InfoExtractor):
# The 'meta' field is not always in the video webpage, we request it # The 'meta' field is not always in the video webpage, we request it
# from another page # from another page
long_id = info['id'] long_id = info['id']
return self._get_info(info['id'], video_id)
def _get_info(self, long_id, video_id):
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id)
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"' % long_id)
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'q': query, 'q': query,
'env': 'prod', 'env': 'prod',
@ -100,6 +103,35 @@ class YahooIE(InfoExtractor):
} }
class YahooNewsIE(YahooIE):
IE_NAME = 'yahoo:news'
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
_TEST = {
u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
u'info_dict': {
u'id': u'104538833',
u'ext': u'flv',
u'title': u'China Moses Is Crazy About the Blues',
u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
}
# Overwrite YahooIE properties we don't want
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id')
return self._get_info(long_id, video_id)
class YahooSearchIE(SearchInfoExtractor): class YahooSearchIE(SearchInfoExtractor):
IE_DESC = u'Yahoo screen search' IE_DESC = u'Yahoo screen search'
_MAX_RESULTS = 1000 _MAX_RESULTS = 1000


Loading…
Cancel
Save