You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.7 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. compat_urllib_parse,
  7. ExtractorError,
  8. )
  9. class NaverIE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
  11. _TEST = {
  12. 'url': 'http://tvcast.naver.com/v/81652',
  13. 'info_dict': {
  14. 'id': '81652',
  15. 'ext': 'mp4',
  16. 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
  17. 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
  18. 'upload_date': '20130903',
  19. },
  20. }
  21. def _real_extract(self, url):
  22. mobj = re.match(self._VALID_URL, url)
  23. video_id = mobj.group(1)
  24. webpage = self._download_webpage(url, video_id)
  25. m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
  26. webpage)
  27. if m_id is None:
  28. raise ExtractorError('couldn\'t extract vid and key')
  29. vid = m_id.group(1)
  30. key = m_id.group(2)
  31. query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
  32. query_urls = compat_urllib_parse.urlencode({
  33. 'masterVid': vid,
  34. 'protocol': 'p2p',
  35. 'inKey': key,
  36. })
  37. info = self._download_xml(
  38. 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
  39. video_id, 'Downloading video info')
  40. urls = self._download_xml(
  41. 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
  42. video_id, 'Downloading video formats info')
  43. formats = []
  44. for format_el in urls.findall('EncodingOptions/EncodingOption'):
  45. domain = format_el.find('Domain').text
  46. f = {
  47. 'url': domain + format_el.find('uri').text,
  48. 'ext': 'mp4',
  49. 'width': int(format_el.find('width').text),
  50. 'height': int(format_el.find('height').text),
  51. }
  52. if domain.startswith('rtmp'):
  53. f.update({
  54. 'ext': 'flv',
  55. 'rtmp_protocol': '1', # rtmpt
  56. })
  57. formats.append(f)
  58. self._sort_formats(formats)
  59. return {
  60. 'id': video_id,
  61. 'title': info.find('Subject').text,
  62. 'formats': formats,
  63. 'description': self._og_search_description(webpage),
  64. 'thumbnail': self._og_search_thumbnail(webpage),
  65. 'upload_date': info.find('WriteDate').text.replace('.', ''),
  66. 'view_count': int(info.find('PlayCount').text),
  67. }