Browse Source

[20min.ch] Add new extractor (closes #5977)

totalwebcasting
pingtux 9 years ago
committed by Sergey M․
parent
commit
e54c44eeab
2 changed files with 41 additions and 0 deletions
  1. +1
    -0
      youtube_dl/extractor/__init__.py
  2. +40
    -0
      youtube_dl/extractor/min20.py

+ 1
- 0
youtube_dl/extractor/__init__.py View File

@ -364,6 +364,7 @@ from .mdr import MDRIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mgoon import MgoonIE from .mgoon import MgoonIE
from .min20 import Min20IE
from .minhateca import MinhatecaIE from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE from .ministrygrid import MinistryGridIE
from .miomio import MioMioIE from .miomio import MioMioIE


+ 40
- 0
youtube_dl/extractor/min20.py View File

@ -0,0 +1,40 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class Min20IE(InfoExtractor):
_VALID_URL = r'http://www\.20min\.ch/.+?-(?P<id>[0-9]+)$'
_TEST = {
'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
'md5': 'cd4cbb99b94130cff423e967cd275e5e',
'info_dict': {
'id': '22050469',
'ext': 'flv',
'title': '«Wir müssen mutig nach vorne schauen»',
'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
}
}
# location of the flv videos, can't be extracted from the web page
_BASE_URL = "http://flv-rr.20min-tv.ch/videos/"
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1><span>(.+?)</span></h1>', webpage, 'title')
flash_id = self._search_regex(r"so\.addVariable\(\"file1\",\"([0-9]+)\"\)", webpage, 'flash_id')
description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', webpage, 'description')
thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)" />', webpage, 'thumbnail')
url = self._BASE_URL + flash_id + "m.flv"
return {
'id': video_id,
'url': url,
'title': title,
'description': description,
'thumbnail': thumbnail
}

Loading…
Cancel
Save