Browse Source

[nba] extract all video formats and extract more info

totalwebcasting
remitamine 9 years ago
parent
commit
8fc226ef99
2 changed files with 74 additions and 33 deletions
  1. +4
    -1
      youtube_dl/extractor/__init__.py
  2. +70
    -32
      youtube_dl/extractor/nba.py

+ 4
- 1
youtube_dl/extractor/__init__.py View File

@ -351,7 +351,10 @@ from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE from .myvidster import MyVidsterIE
from .nationalgeographic import NationalGeographicIE from .nationalgeographic import NationalGeographicIE
from .naver import NaverIE from .naver import NaverIE
from .nba import NBAIE
from .nba import (
NBAIE,
NBAWatchIE,
)
from .nbc import ( from .nbc import (
NBCIE, NBCIE,
NBCNewsIE, NBCNewsIE,


+ 70
- 32
youtube_dl/extractor/nba.py View File

@ -2,62 +2,100 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
remove_end,
parse_duration, parse_duration,
parse_iso8601,
int_or_none,
) )
class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
class NBABaseIE(InfoExtractor):
def _get_formats(self, video_id):
base_url = 'http://nba.cdn.turner.com/nba/big%s' % video_id
return [{
'url': base_url + '_nba_android_high.mp4',
'width': 480,
'height': 320,
'format_id': '320p',
},{
'url': base_url + '_640x360_664b.mp4',
'width': 640,
'height': 360,
'format_id': '360p',
},{
'url': base_url + '_768x432_1404.mp4',
'width': 768,
'height': 432,
'format_id': '432p',
},{
'url': base_url + '_1280x720.mp4',
'width': 1280,
'height': 720,
'format_id': '720p',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
ret = self._extract_metadata(webpage, video_id)
ret['id'] = video_id.rpartition('/')[2]
ret['formats'] = self._get_formats(video_id)
return ret
class NBAIE(NBABaseIE):
IE_NAME = 'nba'
_VALID_URL = r'https?://(?:www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
'info_dict': { 'info_dict': {
'id': '0021200253-okc-bkn-recap.nba', 'id': '0021200253-okc-bkn-recap.nba',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Thunder vs. Nets', 'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181, 'duration': 181,
'timestamp': 1354680189,
'upload_date': '20121205',
}, },
}, { }, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True, 'only_matching': True,
}, {
}]
def _extract_metadata(self, webpage, video_id):
return {
'title': self._html_search_meta('name', webpage),
'description': self._html_search_meta('description', webpage),
'duration': parse_duration(self._html_search_meta('duration', webpage)),
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage))
}
class NBAWatchIE(NBABaseIE):
IE_NAME = 'nba:watch'
_VALID_URL = r'https?://watch.nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', 'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
'info_dict': { 'info_dict': {
'id': '0041400301-cle-atl-recap.nba', 'id': '0041400301-cle-atl-recap.nba',
'ext': 'mp4', 'ext': 'mp4',
'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
'title': 'Hawks vs. Cavaliers Game 1',
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d', 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
'duration': 228, 'duration': 228,
},
'params': {
'skip_download': True,
'timestamp': 1432094400,
'upload_date': '20150520',
} }
}] }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
shortened_video_id = video_id.rpartition('/')[2]
title = remove_end(
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
description = self._og_search_description(webpage)
duration_str = self._html_search_meta(
'duration', webpage, 'duration', default=None)
if not duration_str:
duration_str = self._html_search_regex(
r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False)
duration = parse_duration(duration_str)
def _extract_metadata(self, webpage, video_id):
program_id = self._search_regex(r'var\s+programId\s*=\s*(\d+);', webpage, 'program id')
metadata = self._download_json(
'http://smbsolr.cdnak.neulion.com/solr_nbav6/nba/nba/mlt/?wt=json&fl=name,description,image,runtime,releaseDate&q=sequence%3A' + program_id, video_id)['match']['docs'][0]
return { return {
'id': shortened_video_id,
'url': video_url,
'title': title,
'description': description,
'duration': duration,
'title': metadata['name'],
'description': metadata.get('description'),
'duration': int_or_none(metadata.get('runtime')),
'thumbnail': metadata.get('image'),
'timestamp': parse_iso8601(metadata.get('releaseDate'))
} }

Loading…
Cancel
Save