You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

71 lines
2.5 KiB

10 years ago
  1. from __future__ import unicode_literals
  2. import re
  3. from .common import InfoExtractor
  4. from ..utils import (
  5. parse_duration,
  6. )
  7. class NuvidIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
  9. _TEST = {
  10. 'url': 'http://m.nuvid.com/video/1310741/',
  11. 'md5': 'eab207b7ac4fccfb4e23c86201f11277',
  12. 'info_dict': {
  13. 'id': '1310741',
  14. 'ext': 'mp4',
  15. 'title': 'Horny babes show their awesome bodeis and',
  16. 'duration': 129,
  17. 'age_limit': 18,
  18. }
  19. }
  20. def _real_extract(self, url):
  21. video_id = self._match_id(url)
  22. page_url = 'http://m.nuvid.com/video/%s' % video_id
  23. webpage = self._download_webpage(
  24. page_url, video_id, 'Downloading video page')
  25. # When dwnld_speed exists and has a value larger than the MP4 file's
  26. # bitrate, Nuvid returns the MP4 URL
  27. # It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
  28. self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
  29. mp4_webpage = self._download_webpage(
  30. page_url, video_id, 'Downloading video page for MP4 format')
  31. html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
  32. video_url = self._html_search_regex(html5_video_re, webpage, video_id)
  33. mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
  34. formats = [{
  35. 'url': video_url,
  36. }]
  37. if mp4_video_url != video_url:
  38. formats.append({
  39. 'url': mp4_video_url,
  40. })
  41. title = self._html_search_regex(
  42. [r'<span title="([^"]+)">',
  43. r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
  44. r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
  45. thumbnails = [
  46. {
  47. 'url': thumb_url,
  48. } for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
  49. ]
  50. thumbnail = thumbnails[0]['url'] if thumbnails else None
  51. duration = parse_duration(self._html_search_regex(
  52. [r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
  53. r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
  54. return {
  55. 'id': video_id,
  56. 'title': title,
  57. 'thumbnails': thumbnails,
  58. 'thumbnail': thumbnail,
  59. 'duration': duration,
  60. 'age_limit': 18,
  61. 'formats': formats,
  62. }