[utlis] add extract_attributes for extracting html tags attributes

9 years ago · 689fb748ee
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -248,6 +248,14 @@ def get_element_by_attribute(attribute, value, html):
    return unescapeHTML(res)


 def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'):
    attributes = re.findall(attributes_regex, attributes_str)
    attributes_dict = {}
    if attributes:
        attributes_dict = {attribute_name: attribute_value for (attribute_name, attribute_value) in attributes}
    return attributes_dict


 def clean_html(html):
    """Clean an HTML snippet into a readable string"""