From 138b11f36ee5e8018c29621d39c324d98d8291cc Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Fri, 19 Nov 2010 19:31:26 +0100 Subject: [PATCH] Rework upload date mechanism after detecting problems in several tests --- youtube-dl | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/youtube-dl b/youtube-dl index e164d5c8c..3d20a9d6d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -5,7 +5,6 @@ # Author: Benjamin Johnson # License: Public domain code import cookielib -import datetime import htmlentitydefs import httplib import locale @@ -37,6 +36,21 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +month_name_to_number = { + 'January': '01', + 'February': '02', + 'March': '03', + 'April': '04', + 'May': '05', + 'June': '06', + 'July': '07', + 'August': '08', + 'September': '09', + 'October': '10', + 'November': '11', + 'December': '12', +} + def preferredencoding(): """Get preferred encoding. @@ -899,13 +913,18 @@ class YoutubeIE(InfoExtractor): upload_date = u'NA' mobj = re.search(r'id="eow-date".*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: - upload_date = mobj.group(1).split() - format_expressions = ['%d %B %Y', '%B %d, %Y'] - for expression in format_expressions: - try: - upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') - except: - pass + try: + if ',' in mobj.group(1): + # Month Day, Year + m, d, y = mobj.group(1).replace(',', '').split() + else: + # Day Month Year, we'll suppose + d, m, y = mobj.group(1).split() + m = month_name_to_number[m] + d = '%02d' % (long(d)) + upload_date = '%s%s%s' % (y, m, d) + except: + upload_date = u'NA' # description video_description = 'No description available.' @@ -961,7 +980,7 @@ class YoutubeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'uploaddate': upload_date, + 'upload_date': upload_date, 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1108,7 +1127,7 @@ class MetacafeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1197,7 +1216,7 @@ class DailymotionIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1307,7 +1326,7 @@ class GoogleIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': u'NA', - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1389,7 +1408,7 @@ class PhotobucketIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1544,7 +1563,7 @@ class YahooIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url, 'uploader': video_uploader, - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'), @@ -1647,7 +1666,7 @@ class GenericIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, - 'uploaddate': u'NA', + 'upload_date': u'NA', 'title': video_title, 'stitle': simple_title, 'ext': video_extension.decode('utf-8'),