Browse Source

[vrak] Add extractor

master-ytdl-org
Olivier Bilodeau 8 years ago
committed by Sergey M․
parent
commit
cbb127568a
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
2 changed files with 69 additions and 0 deletions
  1. +1
    -0
      youtube_dl/extractor/extractors.py
  2. +68
    -0
      youtube_dl/extractor/vrak.py

+ 1
- 0
youtube_dl/extractor/extractors.py View File

@ -1165,6 +1165,7 @@ from .voicerepublic import VoiceRepublicIE
from .voxmedia import VoxMediaIE
from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE


+ 68
- 0
youtube_dl/extractor/vrak.py View File

@ -0,0 +1,68 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
class VrakIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P<id>[0-9\.]+).*'
_TEST = {
'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721',
'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948',
'info_dict': {
'id': '5231040869001',
'ext': 'mp4',
'title': 'Référendums américains, animés japonais et hooligans russes',
'upload_date': '20161201',
'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.',
'timestamp': 1480628425,
'uploader_id': '2890187628001',
}
}
def _real_extract(self, url):
url_id = self._match_id(url)
webpage = self._download_webpage(url, url_id)
result = {}
result['title'] = self._html_search_regex(
r'<h3 class="videoTitle">(.+?)</h3>', webpage, 'title')
# Inspired from BrightcoveNewIE._extract_url()
entries = []
for account_id, player_id, _, video_id in re.findall(
# account_id, player_id and embed from:
# <div class="video-player [...]
# data-publisher-id="2890187628001"
# data-player-id="VkSnGw3cx"
# video id is extracted from weird CMS Java/Javascript notation:
# RW java.lang.String value = '5231040869001';
# Need to use backtrack to pin to a ref since video is in grid
# layout with others
r'''(?sx)
<div[^>]+
data-publisher-id=["\'](\d+)["\']
[^>]*
data-player-id=["\']([^"\']+)["\']
[^>]*
refId&quot;:&quot;([^&]+)&quot;
[^>]*
>.*?
</div>.*?
RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\'
[^>]*
RW\ java\.lang\.String\ value\ =\ \'(\d+)\'
''', webpage):
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, 'default', video_id))
if entries:
result = self.url_result(entries[0], BrightcoveNewIE.ie_key())
return result

Loading…
Cancel
Save