|
|
@ -248,6 +248,14 @@ def get_element_by_attribute(attribute, value, html): |
|
|
|
return unescapeHTML(res) |
|
|
|
|
|
|
|
|
|
|
|
def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'): |
|
|
|
attributes = re.findall(attributes_regex, attributes_str) |
|
|
|
attributes_dict = {} |
|
|
|
if attributes: |
|
|
|
attributes_dict = {attribute_name: attribute_value for (attribute_name, attribute_value) in attributes} |
|
|
|
return attributes_dict |
|
|
|
|
|
|
|
|
|
|
|
def clean_html(html): |
|
|
|
"""Clean an HTML snippet into a readable string""" |
|
|
|
|
|
|
|