Browse Source

Merge branch 'pyx-huajiao'

totalwebcasting
Yen Chi Hsuan 8 years ago
parent
commit
4102e64051
No known key found for this signature in database GPG Key ID: 3FDDD575826C5C30
3 changed files with 58 additions and 0 deletions
  1. +1
    -0
      ChangeLog
  2. +1
    -0
      youtube_dl/extractor/extractors.py
  3. +56
    -0
      youtube_dl/extractor/huajiao.py

+ 1
- 0
ChangeLog View File

@ -1,6 +1,7 @@
version <unreleased> version <unreleased>
Extractors Extractors
+ [huajiao] New extractor (#10917)
* [cmt] Fix mgid extraction (#10813) * [cmt] Fix mgid extraction (#10813)
* [chirbit] Fix extraction of user profile pages * [chirbit] Fix extraction of user profile pages
* [charambatv] Fix extraction * [charambatv] Fix extraction


+ 1
- 0
youtube_dl/extractor/extractors.py View File

@ -372,6 +372,7 @@ from .hrti import (
HRTiIE, HRTiIE,
HRTiPlaylistIE, HRTiPlaylistIE,
) )
from .huajiao import HuajiaoIE
from .huffpost import HuffPostIE from .huffpost import HuffPostIE
from .hypem import HypemIE from .hypem import HypemIE
from .iconosquare import IconosquareIE from .iconosquare import IconosquareIE


+ 56
- 0
youtube_dl/extractor/huajiao.py View File

@ -0,0 +1,56 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
)
class HuajiaoIE(InfoExtractor):
IE_DESC = '花椒直播'
_VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.huajiao.com/l/38941232',
'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
'info_dict': {
'id': '38941232',
'ext': 'mp4',
'title': '#新人求关注#',
'description': 're:.*',
'duration': 2424.0,
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1475866459,
'upload_date': '20161007',
'uploader': 'Penny_余姿昀',
'uploader_id': '75206005',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
feed_json = self._search_regex(
r'var\s*feed\s*=\s*({.*})', webpage, 'feed json str')
feed = self._parse_json(feed_json, video_id)
description = self._html_search_meta(
'description', webpage, 'description', fatal=False)
def get(section, field):
return feed.get(section, {}).get(field)
return {
'id': video_id,
'title': feed['feed']['formated_title'],
'description': description,
'duration': parse_duration(get('feed', 'duration')),
'thumbnail': get('feed', 'image'),
'timestamp': parse_iso8601(feed.get('creatime'), ' '),
'uploader': get('author', 'nickname'),
'uploader_id': get('author', 'uid'),
'formats': self._extract_m3u8_formats(
feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
}

Loading…
Cancel
Save