You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
2.3 KiB

  1. import json
  2. import re
  3. import xml.etree.ElementTree
  4. from .common import InfoExtractor
  5. class TriluliluIE(InfoExtractor):
  6. _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
  7. _TEST = {
  8. u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
  9. u'file': u"big-buck-bunny-1.mp4",
  10. u'info_dict': {
  11. u"title": u"Big Buck Bunny",
  12. u"description": u":) pentru copilul din noi",
  13. },
  14. # Server ignores Range headers (--test)
  15. u"params": {
  16. u"skip_download": True
  17. }
  18. }
  19. def _real_extract(self, url):
  20. mobj = re.match(self._VALID_URL, url)
  21. video_id = mobj.group('video_id')
  22. webpage = self._download_webpage(url, video_id)
  23. title = self._og_search_title(webpage)
  24. thumbnail = self._og_search_thumbnail(webpage)
  25. description = self._og_search_description(webpage)
  26. log_str = self._search_regex(
  27. r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
  28. log = json.loads(log_str)
  29. format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
  30. u'video-formats2' % log)
  31. format_str = self._download_webpage(
  32. format_url, video_id,
  33. note=u'Downloading formats',
  34. errnote=u'Error while downloading formats')
  35. format_doc = xml.etree.ElementTree.fromstring(format_str)
  36. video_url_template = (
  37. u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
  38. u'&source=site&hash=%(hash)s&username=%(userid)s&'
  39. u'key=ministhebest&format=%%s&sig=&exp=' %
  40. log)
  41. formats = [
  42. {
  43. 'format': fnode.text,
  44. 'url': video_url_template % fnode.text,
  45. }
  46. for fnode in format_doc.findall('./formats/format')
  47. ]
  48. info = {
  49. '_type': 'video',
  50. 'id': video_id,
  51. 'formats': formats,
  52. 'title': title,
  53. 'description': description,
  54. 'thumbnail': thumbnail,
  55. }
  56. # TODO: Remove when #980 has been merged
  57. info['url'] = formats[-1]['url']
  58. info['ext'] = formats[-1]['format'].partition('-')[0]
  59. return info