From f9c68787146e6278df0f29d0d4e2f0d4199f49b0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Sep 2011 22:26:53 +0200 Subject: [PATCH] Support for The Escapist --- youtube-dl | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/youtube-dl b/youtube-dl index 5aff9c08c..719edeb9b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -23,6 +23,7 @@ import cookielib import datetime import gzip import htmlentitydefs +import HTMLParser import httplib import locale import math @@ -3189,6 +3190,93 @@ class ComedyCentralIE(InfoExtractor): continue +class EscapistIE(InfoExtractor): + """Information extractor for The Escapist """ + + _VALID_URL = r'^(https?://)?(www\.)escapistmagazine.com/videos/view/(?P[^/]+)/(?P[^/?]+)[/?].*$' + + @staticmethod + def suitable(url): + return (re.match(EscapistIE._VALID_URL, url) is not None) + + def report_extraction(self, showName): + self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName) + + def report_config_download(self, showName): + self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + htmlParser = HTMLParser.HTMLParser() + + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + showName = mobj.group('showname') + videoId = mobj.group('episode') + + self.report_extraction(showName) + try: + webPage = urllib2.urlopen(url).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err)) + return + + descMatch = re.search('