Browse Source

[flipagram] Add extractor

totalwebcasting
Déstin Reed 8 years ago
committed by Sergey M․
parent
commit
3fee7f636c
No known key found for this signature in database GPG Key ID: 2C393E0F18A9236D
3 changed files with 111 additions and 0 deletions
  1. +7
    -0
      youtube_dl/extractor/common.py
  2. +1
    -0
      youtube_dl/extractor/extractors.py
  3. +103
    -0
      youtube_dl/extractor/flipagram.py

+ 7
- 0
youtube_dl/extractor/common.py View File

@ -837,6 +837,13 @@ class InfoExtractor(object):
'title': unescapeHTML(json_ld.get('headline')),
'description': unescapeHTML(json_ld.get('articleBody')),
})
elif item_type == 'VideoObject':
info.update({
'title': unescapeHTML(json_ld.get('name')),
'description': unescapeHTML(json_ld.get('description')),
'upload_date': unified_strdate(json_ld.get('upload_date')),
'url': unescapeHTML(json_ld.get('contentUrl')),
})
return dict((k, v) for k, v in info.items() if v is not None)
@staticmethod


+ 1
- 0
youtube_dl/extractor/extractors.py View File

@ -256,6 +256,7 @@ from .fivemin import FiveMinIE
from .fivetv import FiveTVIE
from .fktv import FKTVIE
from .flickr import FlickrIE
from .flipagram import FlipagramIE
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
from .formula1 import Formula1IE


+ 103
- 0
youtube_dl/extractor/flipagram.py View File

@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
unified_strdate,
unified_timestamp,
)
class FlipagramIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?_]+)'
_TESTS = [{
'url': 'https://flipagram.com/f/myrWjW9RJw',
'md5': '541988fb6c4c7c375215ea22a4a21841',
'info_dict': {
'id': 'myrWjW9RJw',
'title': 'Flipagram by crystaldolce featuring King and Lionheart by Of Monsters and Men',
'description': 'Herbie\'s first bannana🍌🐢🍌. #animals #pets #reptile #tortoise #sulcata #tort #justatreat #snacktime #bannanas #rescuepets #ofmonstersandmen @animals',
'ext': 'mp4',
'uploader': 'Crystal Dolce',
'creator': 'Crystal Dolce',
'uploader_id': 'crystaldolce',
}
}, {
'url': 'https://flipagram.com/f/nyvTSJMKId',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
user_data = self._parse_json(self._search_regex(r'window.reactH2O\s*=\s*({.+});', webpage, 'user data'), video_id)
content_data = self._search_json_ld(webpage, video_id)
flipagram = user_data.get('flipagram', {})
counts = flipagram.get('counts', {})
user = flipagram.get('user', {})
video = flipagram.get('video', {})
thumbnails = []
for cover in flipagram.get('covers', []):
if not cover.get('url'):
continue
thumbnails.append({
'url': self._proto_relative_url(cover.get('url')),
'width': int_or_none(cover.get('width')),
'height': int_or_none(cover.get('height')),
})
# Note that this only retrieves comments that are initally loaded.
# For videos with large amounts of comments, most won't be retrieved.
comments = []
for comment in user_data.get('comments', {}).get(video_id, {}).get('items', []):
text = comment.get('comment', [])
comments.append({
'author': comment.get('user', {}).get('name'),
'author_id': comment.get('user', {}).get('username'),
'id': comment.get('id'),
'text': text[0] if text else '',
'timestamp': unified_timestamp(comment.get('created', '')),
})
tags = [tag for item in flipagram['story'][1:] for tag in item]
formats = []
if flipagram.get('music', {}).get('track', {}).get('previewUrl', {}):
formats.append({
'url': flipagram.get('music').get('track').get('previewUrl'),
'ext': 'm4a',
'vcodec': 'none',
})
formats.append({
'url': video.get('url'),
'ext': 'mp4',
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
'filesize': int_or_none(video.get('size')),
})
return {
'id': video_id,
'title': content_data['title'],
'formats': formats,
'thumbnails': thumbnails,
'description': content_data.get('description'),
'uploader': user.get('name'),
'creator': user.get('name'),
'timestamp': parse_iso8601(flipagram.get('iso801Created')),
'upload_date': unified_strdate(flipagram.get('created')),
'uploader_id': user.get('username'),
'view_count': int_or_none(counts.get('plays')),
'repost_count': int_or_none(counts.get('reflips')),
'comment_count': int_or_none(counts.get('comments')),
'comments': comments,
'tags': tags,
}

Loading…
Cancel
Save