Browse Source

[bilibili] Fix extraction of videos with double quotes in titles

Closes #13387
master-ytdl-org
Yen Chi Hsuan 8 years ago
parent
commit
ca27037171
No known key found for this signature in database GPG Key ID: 7F902A182457CA23
2 changed files with 23 additions and 1 deletions
  1. +6
    -0
      ChangeLog
  2. +17
    -1
      youtube_dl/extractor/bilibili.py

+ 6
- 0
ChangeLog View File

@ -1,3 +1,9 @@
version <unreleased>
Extractors
* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
version 2017.06.12 version 2017.06.12
Core Core


+ 17
- 1
youtube_dl/extractor/bilibili.py View File

@ -54,6 +54,22 @@ class BiliBiliIE(InfoExtractor):
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
}, },
'skip': 'Geo-restricted to China', 'skip': 'Geo-restricted to China',
}, {
# Title with double quotes
'url': 'http://www.bilibili.com/video/av8903802/',
'info_dict': {
'id': '8903802',
'ext': 'mp4',
'title': '阿滴英文|英文歌分享#6 "Closer',
'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
'uploader': '阿滴英文',
'uploader_id': '65880958',
'timestamp': 1488382620,
'upload_date': '20170301',
},
'params': {
'skip_download': True, # Test metadata only
},
}] }]
_APP_KEY = '84956560bc028eb7' _APP_KEY = '84956560bc028eb7'
@ -135,7 +151,7 @@ class BiliBiliIE(InfoExtractor):
'formats': formats, 'formats': formats,
}) })
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
description = self._html_search_meta('description', webpage) description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex( timestamp = unified_timestamp(self._html_search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None)) r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))


Loading…
Cancel
Save