Browse Source

[thisav] Improve title extraction (closes #10682)

I didn't add a test case as the one in #10682 looks like a copyrighted
product.
totalwebcasting
Yen Chi Hsuan 8 years ago
parent
commit
d8dbf8707d
No known key found for this signature in database GPG Key ID: 3FDDD575826C5C30
2 changed files with 10 additions and 1 deletions
  1. +6
    -0
      ChangeLog
  2. +4
    -1
      youtube_dl/extractor/thisav.py

+ 6
- 0
ChangeLog View File

@ -1,3 +1,9 @@
version <unreleased>
Extractors
* [thisav] Improve title extraction (#10682)
version 2016.09.18 version 2016.09.18
Core Core


+ 4
- 1
youtube_dl/extractor/thisav.py View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .jwplatform import JWPlatformBaseIE from .jwplatform import JWPlatformBaseIE
from ..utils import remove_end
class ThisAVIE(JWPlatformBaseIE): class ThisAVIE(JWPlatformBaseIE):
@ -35,7 +36,9 @@ class ThisAVIE(JWPlatformBaseIE):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
title = remove_end(self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title'),
' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
video_url = self._html_search_regex( video_url = self._html_search_regex(
r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None)
if video_url: if video_url:


Loading…
Cancel
Save