You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
3.1 KiB

10 years ago
10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. parse_iso8601,
  6. str_to_int,
  7. )
  8. class CrackedIE(InfoExtractor):
  9. _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
  10. _TESTS = [{
  11. 'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html',
  12. 'md5': '89b90b9824e3806ca95072c4d78f13f7',
  13. 'info_dict': {
  14. 'id': '19070',
  15. 'ext': 'mp4',
  16. 'title': 'If Animal Actors Got E! True Hollywood Stories',
  17. 'timestamp': 1404954000,
  18. 'upload_date': '20140710',
  19. }
  20. }, {
  21. # youtube embed
  22. 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
  23. 'md5': 'ccd52866b50bde63a6ef3b35016ba8c7',
  24. 'info_dict': {
  25. 'id': 'EjI00A3rZD0',
  26. 'ext': 'mp4',
  27. 'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take",
  28. 'description': 'md5:c603708c718b796fe6079e2b3351ffc7',
  29. 'upload_date': '20140725',
  30. 'uploader_id': 'Cracked',
  31. 'uploader': 'Cracked',
  32. }
  33. }]
  34. def _real_extract(self, url):
  35. video_id = self._match_id(url)
  36. webpage = self._download_webpage(url, video_id)
  37. youtube_url = self._search_regex(
  38. r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
  39. webpage, 'youtube url', default=None)
  40. if youtube_url:
  41. return self.url_result(youtube_url, 'Youtube')
  42. video_url = self._html_search_regex(
  43. [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
  44. webpage, 'video URL')
  45. title = self._search_regex(
  46. [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'],
  47. webpage, 'title')
  48. description = self._search_regex(
  49. r'name="?(?:og:)?description"?\s+content="([^"]+)"',
  50. webpage, 'description', default=None)
  51. timestamp = self._html_search_regex(
  52. r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False)
  53. if timestamp:
  54. timestamp = parse_iso8601(timestamp[:-6])
  55. view_count = str_to_int(self._html_search_regex(
  56. r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>',
  57. webpage, 'view count', fatal=False))
  58. comment_count = str_to_int(self._html_search_regex(
  59. r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>',
  60. webpage, 'comment count', fatal=False))
  61. m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
  62. if m:
  63. width = int(m.group('width'))
  64. height = int(m.group('height'))
  65. else:
  66. width = height = None
  67. return {
  68. 'id': video_id,
  69. 'url': video_url,
  70. 'title': title,
  71. 'description': description,
  72. 'timestamp': timestamp,
  73. 'view_count': view_count,
  74. 'comment_count': comment_count,
  75. 'height': height,
  76. 'width': width,
  77. }