Browse Source

[aftonbladet] add extractor for aftonbladet.se

totalwebcasting
Mattias Harrysson 11 years ago
parent
commit
27865b2169
2 changed files with 67 additions and 0 deletions
  1. +1
    -0
      youtube_dl/extractor/__init__.py
  2. +66
    -0
      youtube_dl/extractor/aftonbladet.py

+ 1
- 0
youtube_dl/extractor/__init__.py View File

@ -1,5 +1,6 @@
from .academicearth import AcademicEarthCourseIE from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE from .anitube import AnitubeIE
from .aparat import AparatIE from .aparat import AparatIE
from .appletrailers import AppleTrailersIE from .appletrailers import AppleTrailersIE


+ 66
- 0
youtube_dl/extractor/aftonbladet.py View File

@ -0,0 +1,66 @@
# encoding: utf-8
from __future__ import unicode_literals
import datetime
import re
from .common import InfoExtractor
class AftonbladetIE(InfoExtractor):
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+(?P<video_id>article\d+)\.ab$'
_TEST = {
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
'info_dict': {
'id': 'article36015',
'ext': 'mp4',
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
'upload_date': '20140306',
},
}
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
# find internal video meta data
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
internal_meta_id = self._html_search_regex(r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
internal_meta_url = META_URL % internal_meta_id
internal_meta_json = self._download_json(internal_meta_url, video_id, 'Downloading video meta data')
# find internal video formats
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
internal_video_id = internal_meta_json['videoId']
internal_formats_url = FORMATS_URL % internal_video_id
internal_formats_json = self._download_json(internal_formats_url, video_id, 'Downloading video formats')
self.report_extraction(video_id)
formats = []
for fmt in reversed(internal_formats_json['formats']['http']['pseudostreaming']['mp4']):
p = fmt['paths'][0]
formats.append({
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
'ext': 'mp4',
'width': fmt['width'],
'height': fmt['height'],
'tbr': fmt['bitrate'],
'protocol': 'http',
})
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
upload_date = timestamp.strftime('%Y%m%d')
return [{
'id': video_id,
'title': internal_meta_json['title'],
'formats': formats,
'thumbnail': internal_meta_json['imageUrl'],
'description': internal_meta_json['shortPreamble'],
'upload_date': upload_date,
'duration': internal_meta_json['duration'],
'view_count': internal_meta_json['views'],
}]

Loading…
Cancel
Save