Browse Source

[pluralsight] Add support for subtitles (Closes #9681)

totalwebcasting
Sergey M․ 8 years ago
parent
commit
8c3e35dd44
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
1 changed files with 62 additions and 10 deletions
  1. +62
    -10
      youtube_dl/extractor/pluralsight.py

+ 62
- 10
youtube_dl/extractor/pluralsight.py View File

@ -1,9 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import collections
import json import json
import os
import random import random
import collections
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -12,10 +13,12 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
parse_duration, parse_duration,
qualities, qualities,
sanitized_Request, sanitized_Request,
srt_subtitles_timecode,
urlencode_postdata, urlencode_postdata,
) )
@ -91,6 +94,51 @@ class PluralsightIE(PluralsightBaseIE):
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
captions_post = {
'a': author,
'cn': clip_id,
'lc': lang,
'm': name,
}
captions = self._download_json(
'%s/training/Player/Captions' % self._API_BASE, video_id,
'Downloading captions JSON', 'Unable to download captions JSON',
fatal=False, data=json.dumps(captions_post).encode('utf-8'),
headers={'Content-Type': 'application/json;charset=utf-8'})
if captions:
return {
lang: [{
'ext': 'json',
'data': json.dumps(captions),
}, {
'ext': 'srt',
'data': self._convert_subtitles(duration, captions),
}]
}
@staticmethod
def _convert_subtitles(duration, subs):
srt = ''
for num, current in enumerate(subs):
current = subs[num]
start, text = float_or_none(
current.get('DisplayTimeOffset')), current.get('Text')
if start is None or text is None:
continue
end = duration if num == len(subs) - 1 else float_or_none(
subs[num + 1].get('DisplayTimeOffset'))
srt += os.linesep.join(
(
'%d' % num,
'%s --> %s' % (
srt_subtitles_timecode(start),
srt_subtitles_timecode(end)),
text,
os.linesep,
))
return srt
def _real_extract(self, url): def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
@ -138,6 +186,8 @@ class PluralsightIE(PluralsightBaseIE):
if not clip: if not clip:
raise ExtractorError('Unable to resolve clip') raise ExtractorError('Unable to resolve clip')
title = '%s - %s' % (module['title'], clip['title'])
QUALITIES = { QUALITIES = {
'low': {'width': 640, 'height': 480}, 'low': {'width': 640, 'height': 480},
'medium': {'width': 848, 'height': 640}, 'medium': {'width': 848, 'height': 640},
@ -225,18 +275,20 @@ class PluralsightIE(PluralsightBaseIE):
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
# TODO: captions
# http://www.pluralsight.com/training/Player/ViewClip + cap = true
# or
# http://www.pluralsight.com/training/Player/Captions
# { a = author, cn = clip_id, lc = end, m = name }
duration = int_or_none(
clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
# TODO: other languages?
subtitles = self.extract_subtitles(
author, clip_id, 'en', name, duration, display_id)
return { return {
'id': clip.get('clipName') or clip['name'], 'id': clip.get('clipName') or clip['name'],
'title': '%s - %s' % (module['title'], clip['title']),
'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
'title': title,
'duration': duration,
'creator': author, 'creator': author,
'formats': formats
'formats': formats,
'subtitles': subtitles,
} }


Loading…
Cancel
Save