From 2fa7e823771bf9b1ce00438ebf3fa32255407104 Mon Sep 17 00:00:00 2001 From: Intel1 Date: Fri, 7 Sep 2018 11:15:51 -0500 Subject: [PATCH] Eliminado mechanize Ya no se usa en los canales --- plugin.video.alfa/lib/mechanize/__init__.py | 211 -- plugin.video.alfa/lib/mechanize/_auth.py | 68 - .../lib/mechanize/_beautifulsoup.py | 1077 ------ .../lib/mechanize/_clientcookie.py | 1725 --------- plugin.video.alfa/lib/mechanize/_debug.py | 28 - .../lib/mechanize/_firefox3cookiejar.py | 248 -- plugin.video.alfa/lib/mechanize/_form.py | 3280 ----------------- plugin.video.alfa/lib/mechanize/_gzip.py | 105 - .../lib/mechanize/_headersutil.py | 241 -- plugin.video.alfa/lib/mechanize/_html.py | 629 ---- plugin.video.alfa/lib/mechanize/_http.py | 447 --- .../lib/mechanize/_lwpcookiejar.py | 185 - .../lib/mechanize/_markupbase.py | 393 -- plugin.video.alfa/lib/mechanize/_mechanize.py | 669 ---- .../lib/mechanize/_mozillacookiejar.py | 161 - .../lib/mechanize/_msiecookiejar.py | 388 -- plugin.video.alfa/lib/mechanize/_opener.py | 442 --- .../lib/mechanize/_pullparser.py | 391 -- plugin.video.alfa/lib/mechanize/_request.py | 40 - plugin.video.alfa/lib/mechanize/_response.py | 525 --- plugin.video.alfa/lib/mechanize/_rfc3986.py | 245 -- .../lib/mechanize/_sgmllib_copy.py | 559 --- .../lib/mechanize/_sockettimeout.py | 6 - plugin.video.alfa/lib/mechanize/_testcase.py | 162 - plugin.video.alfa/lib/mechanize/_urllib2.py | 50 - .../lib/mechanize/_urllib2_fork.py | 1414 ------- plugin.video.alfa/lib/mechanize/_useragent.py | 367 -- plugin.video.alfa/lib/mechanize/_util.py | 305 -- plugin.video.alfa/lib/mechanize/_version.py | 2 - 29 files changed, 14363 deletions(-) delete mode 100755 plugin.video.alfa/lib/mechanize/__init__.py delete mode 100755 plugin.video.alfa/lib/mechanize/_auth.py delete mode 100755 plugin.video.alfa/lib/mechanize/_beautifulsoup.py delete mode 100755 plugin.video.alfa/lib/mechanize/_clientcookie.py delete mode 100755 plugin.video.alfa/lib/mechanize/_debug.py delete mode 100755 plugin.video.alfa/lib/mechanize/_firefox3cookiejar.py delete mode 100755 plugin.video.alfa/lib/mechanize/_form.py delete mode 100755 plugin.video.alfa/lib/mechanize/_gzip.py delete mode 100755 plugin.video.alfa/lib/mechanize/_headersutil.py delete mode 100755 plugin.video.alfa/lib/mechanize/_html.py delete mode 100755 plugin.video.alfa/lib/mechanize/_http.py delete mode 100755 plugin.video.alfa/lib/mechanize/_lwpcookiejar.py delete mode 100755 plugin.video.alfa/lib/mechanize/_markupbase.py delete mode 100755 plugin.video.alfa/lib/mechanize/_mechanize.py delete mode 100755 plugin.video.alfa/lib/mechanize/_mozillacookiejar.py delete mode 100755 plugin.video.alfa/lib/mechanize/_msiecookiejar.py delete mode 100755 plugin.video.alfa/lib/mechanize/_opener.py delete mode 100755 plugin.video.alfa/lib/mechanize/_pullparser.py delete mode 100755 plugin.video.alfa/lib/mechanize/_request.py delete mode 100755 plugin.video.alfa/lib/mechanize/_response.py delete mode 100755 plugin.video.alfa/lib/mechanize/_rfc3986.py delete mode 100755 plugin.video.alfa/lib/mechanize/_sgmllib_copy.py delete mode 100755 plugin.video.alfa/lib/mechanize/_sockettimeout.py delete mode 100755 plugin.video.alfa/lib/mechanize/_testcase.py delete mode 100755 plugin.video.alfa/lib/mechanize/_urllib2.py delete mode 100755 plugin.video.alfa/lib/mechanize/_urllib2_fork.py delete mode 100755 plugin.video.alfa/lib/mechanize/_useragent.py delete mode 100755 plugin.video.alfa/lib/mechanize/_util.py delete mode 100755 plugin.video.alfa/lib/mechanize/_version.py diff --git a/plugin.video.alfa/lib/mechanize/__init__.py b/plugin.video.alfa/lib/mechanize/__init__.py deleted file mode 100755 index 43a3324a..00000000 --- a/plugin.video.alfa/lib/mechanize/__init__.py +++ /dev/null @@ -1,211 +0,0 @@ -__all__ = [ - 'AbstractBasicAuthHandler', - 'AbstractDigestAuthHandler', - 'BaseHandler', - 'Browser', - 'BrowserStateError', - 'CacheFTPHandler', - 'ContentTooShortError', - 'Cookie', - 'CookieJar', - 'CookiePolicy', - 'DefaultCookiePolicy', - 'DefaultFactory', - 'FTPHandler', - 'Factory', - 'FileCookieJar', - 'FileHandler', - 'FormNotFoundError', - 'FormsFactory', - 'HTTPBasicAuthHandler', - 'HTTPCookieProcessor', - 'HTTPDefaultErrorHandler', - 'HTTPDigestAuthHandler', - 'HTTPEquivProcessor', - 'HTTPError', - 'HTTPErrorProcessor', - 'HTTPHandler', - 'HTTPPasswordMgr', - 'HTTPPasswordMgrWithDefaultRealm', - 'HTTPProxyPasswordMgr', - 'HTTPRedirectDebugProcessor', - 'HTTPRedirectHandler', - 'HTTPRefererProcessor', - 'HTTPRefreshProcessor', - 'HTTPResponseDebugProcessor', - 'HTTPRobotRulesProcessor', - 'HTTPSClientCertMgr', - 'HeadParser', - 'History', - 'LWPCookieJar', - 'Link', - 'LinkNotFoundError', - 'LinksFactory', - 'LoadError', - 'MSIECookieJar', - 'MozillaCookieJar', - 'OpenerDirector', - 'OpenerFactory', - 'ParseError', - 'ProxyBasicAuthHandler', - 'ProxyDigestAuthHandler', - 'ProxyHandler', - 'Request', - 'RobotExclusionError', - 'RobustFactory', - 'RobustFormsFactory', - 'RobustLinksFactory', - 'RobustTitleFactory', - 'SeekableResponseOpener', - 'TitleFactory', - 'URLError', - 'USE_BARE_EXCEPT', - 'UnknownHandler', - 'UserAgent', - 'UserAgentBase', - 'XHTMLCompatibleHeadParser', - '__version__', - 'build_opener', - 'install_opener', - 'lwp_cookie_str', - 'make_response', - 'request_host', - 'response_seek_wrapper', # XXX deprecate in public interface? - 'seek_wrapped_response', # XXX should probably use this internally in place of response_seek_wrapper() - 'str2time', - 'urlopen', - 'urlretrieve', - 'urljoin', - - # ClientForm API - 'AmbiguityError', - 'ControlNotFoundError', - 'FormParser', - 'ItemCountError', - 'ItemNotFoundError', - 'LocateError', - 'Missing', - 'ParseFile', - 'ParseFileEx', - 'ParseResponse', - 'ParseResponseEx', - 'ParseString', - 'XHTMLCompatibleFormParser', - # deprecated - 'CheckboxControl', - 'Control', - 'FileControl', - 'HTMLForm', - 'HiddenControl', - 'IgnoreControl', - 'ImageControl', - 'IsindexControl', - 'Item', - 'Label', - 'ListControl', - 'PasswordControl', - 'RadioControl', - 'ScalarControl', - 'SelectControl', - 'SubmitButtonControl', - 'SubmitControl', - 'TextControl', - 'TextareaControl', - ] - -import logging -import sys - -from _version import __version__ - -# high-level stateful browser-style interface -from _mechanize import \ - Browser, History, \ - BrowserStateError, LinkNotFoundError, FormNotFoundError - -# configurable URL-opener interface -from _useragent import UserAgentBase, UserAgent -from _html import \ - Link, \ - Factory, DefaultFactory, RobustFactory, \ - FormsFactory, LinksFactory, TitleFactory, \ - RobustFormsFactory, RobustLinksFactory, RobustTitleFactory - -# urllib2 work-alike interface. This is a superset of the urllib2 interface. -from _urllib2 import * -import _urllib2 -if hasattr(_urllib2, "HTTPSHandler"): - __all__.append("HTTPSHandler") -del _urllib2 - -# misc -from _http import HeadParser -from _http import XHTMLCompatibleHeadParser -from _opener import ContentTooShortError, OpenerFactory, urlretrieve -from _response import \ - response_seek_wrapper, seek_wrapped_response, make_response -from _rfc3986 import urljoin -from _util import http2time as str2time - -# cookies -from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \ - CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \ - effective_request_host -from _lwpcookiejar import LWPCookieJar, lwp_cookie_str -# 2.4 raises SyntaxError due to generator / try/finally use -if sys.version_info[:2] > (2,4): - try: - import sqlite3 - except ImportError: - pass - else: - from _firefox3cookiejar import Firefox3CookieJar -from _mozillacookiejar import MozillaCookieJar -from _msiecookiejar import MSIECookieJar - -# forms -from _form import ( - AmbiguityError, - ControlNotFoundError, - FormParser, - ItemCountError, - ItemNotFoundError, - LocateError, - Missing, - ParseError, - ParseFile, - ParseFileEx, - ParseResponse, - ParseResponseEx, - ParseString, - XHTMLCompatibleFormParser, - # deprecated - CheckboxControl, - Control, - FileControl, - HTMLForm, - HiddenControl, - IgnoreControl, - ImageControl, - IsindexControl, - Item, - Label, - ListControl, - PasswordControl, - RadioControl, - ScalarControl, - SelectControl, - SubmitButtonControl, - SubmitControl, - TextControl, - TextareaControl, - ) - -# If you hate the idea of turning bugs into warnings, do: -# import mechanize; mechanize.USE_BARE_EXCEPT = False -USE_BARE_EXCEPT = True - -logger = logging.getLogger("mechanize") -if logger.level is logging.NOTSET: - logger.setLevel(logging.CRITICAL) -del logger diff --git a/plugin.video.alfa/lib/mechanize/_auth.py b/plugin.video.alfa/lib/mechanize/_auth.py deleted file mode 100755 index 9fa7e8e3..00000000 --- a/plugin.video.alfa/lib/mechanize/_auth.py +++ /dev/null @@ -1,68 +0,0 @@ -"""HTTP Authentication and Proxy support. - - -Copyright 2006 John J. Lee - -This code is free software; you can redistribute it and/or modify it under -the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt -included with the distribution). - -""" - -from _urllib2_fork import HTTPPasswordMgr - - -# TODO: stop deriving from HTTPPasswordMgr -class HTTPProxyPasswordMgr(HTTPPasswordMgr): - # has default realm and host/port - def add_password(self, realm, uri, user, passwd): - # uri could be a single URI or a sequence - if uri is None or isinstance(uri, basestring): - uris = [uri] - else: - uris = uri - passwd_by_domain = self.passwd.setdefault(realm, {}) - for uri in uris: - for default_port in True, False: - reduced_uri = self.reduce_uri(uri, default_port) - passwd_by_domain[reduced_uri] = (user, passwd) - - def find_user_password(self, realm, authuri): - attempts = [(realm, authuri), (None, authuri)] - # bleh, want default realm to take precedence over default - # URI/authority, hence this outer loop - for default_uri in False, True: - for realm, authuri in attempts: - authinfo_by_domain = self.passwd.get(realm, {}) - for default_port in True, False: - reduced_authuri = self.reduce_uri(authuri, default_port) - for uri, authinfo in authinfo_by_domain.iteritems(): - if uri is None and not default_uri: - continue - if self.is_suburi(uri, reduced_authuri): - return authinfo - user, password = None, None - - if user is not None: - break - return user, password - - def reduce_uri(self, uri, default_port=True): - if uri is None: - return None - return HTTPPasswordMgr.reduce_uri(self, uri, default_port) - - def is_suburi(self, base, test): - if base is None: - # default to the proxy's host/port - hostport, path = test - base = (hostport, "/") - return HTTPPasswordMgr.is_suburi(self, base, test) - - -class HTTPSClientCertMgr(HTTPPasswordMgr): - # implementation inheritance: this is not a proper subclass - def add_key_cert(self, uri, key_file, cert_file): - self.add_password(None, uri, key_file, cert_file) - def find_key_cert(self, authuri): - return HTTPPasswordMgr.find_user_password(self, None, authuri) diff --git a/plugin.video.alfa/lib/mechanize/_beautifulsoup.py b/plugin.video.alfa/lib/mechanize/_beautifulsoup.py deleted file mode 100755 index 5ec6755a..00000000 --- a/plugin.video.alfa/lib/mechanize/_beautifulsoup.py +++ /dev/null @@ -1,1077 +0,0 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" -v2.1.1 -http://www.crummy.com/software/BeautifulSoup/ - -Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance -into a tree representation. It provides methods and Pythonic idioms -that make it easy to search and modify the tree. - -A well-formed XML/HTML document will yield a well-formed data -structure. An ill-formed XML/HTML document will yield a -correspondingly ill-formed data structure. If your document is only -locally well-formed, you can use this library to find and process the -well-formed part of it. The BeautifulSoup class has heuristics for -obtaining a sensible parse tree in the face of common HTML errors. - -Beautiful Soup has no external dependencies. It works with Python 2.2 -and up. - -Beautiful Soup defines classes for four different parsing strategies: - - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific - language that kind of looks like XML. - - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid - or invalid. - - * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML - that trips up BeautifulSoup. - - * BeautifulSOAP, for making it easier to parse XML documents that use - lots of subelements containing a single string, where you'd prefer - they put that string into an attribute (such as SOAP messages). - -You can subclass BeautifulStoneSoup or BeautifulSoup to create a -parsing strategy specific to an XML schema or a particular bizarre -HTML document. Typically your subclass would just override -SELF_CLOSING_TAGS and/or NESTABLE_TAGS. -""" #" -from __future__ import generators - -__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "2.1.1" -__date__ = "$Date: 2004/10/18 00:14:20 $" -__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson" -__license__ = "PSF" - -from _sgmllib_copy import SGMLParser, SGMLParseError -import types -import re -import _sgmllib_copy as sgmllib - -class NullType(object): - - """Similar to NoneType with a corresponding singleton instance - 'Null' that, unlike None, accepts any message and returns itself. - - Examples: - >>> Null("send", "a", "message")("and one more", - ... "and what you get still") is Null - True - """ - - def __new__(cls): return Null - def __call__(self, *args, **kwargs): return Null -## def __getstate__(self, *args): return Null - def __getattr__(self, attr): return Null - def __getitem__(self, item): return Null - def __setattr__(self, attr, value): pass - def __setitem__(self, item, value): pass - def __len__(self): return 0 - # FIXME: is this a python bug? otherwise ``for x in Null: pass`` - # never terminates... - def __iter__(self): return iter([]) - def __contains__(self, item): return False - def __repr__(self): return "Null" -Null = object.__new__(NullType) - -class PageElement: - """Contains the navigational information for some part of the page - (either a tag or a piece of text)""" - - def setup(self, parent=Null, previous=Null): - """Sets up the initial relations between this element and - other elements.""" - self.parent = parent - self.previous = previous - self.next = Null - self.previousSibling = Null - self.nextSibling = Null - if self.parent and self.parent.contents: - self.previousSibling = self.parent.contents[-1] - self.previousSibling.nextSibling = self - - def findNext(self, name=None, attrs={}, text=None): - """Returns the first item that matches the given criteria and - appears after this Tag in the document.""" - return self._first(self.fetchNext, name, attrs, text) - firstNext = findNext - - def fetchNext(self, name=None, attrs={}, text=None, limit=None): - """Returns all items that match the given criteria and appear - before after Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.nextGenerator) - - def findNextSibling(self, name=None, attrs={}, text=None): - """Returns the closest sibling to this Tag that matches the - given criteria and appears after this Tag in the document.""" - return self._first(self.fetchNextSiblings, name, attrs, text) - firstNextSibling = findNextSibling - - def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None): - """Returns the siblings of this Tag that match the given - criteria and appear after this Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator) - - def findPrevious(self, name=None, attrs={}, text=None): - """Returns the first item that matches the given criteria and - appears before this Tag in the document.""" - return self._first(self.fetchPrevious, name, attrs, text) - - def fetchPrevious(self, name=None, attrs={}, text=None, limit=None): - """Returns all items that match the given criteria and appear - before this Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.previousGenerator) - firstPrevious = findPrevious - - def findPreviousSibling(self, name=None, attrs={}, text=None): - """Returns the closest sibling to this Tag that matches the - given criteria and appears before this Tag in the document.""" - return self._first(self.fetchPreviousSiblings, name, attrs, text) - firstPreviousSibling = findPreviousSibling - - def fetchPreviousSiblings(self, name=None, attrs={}, text=None, - limit=None): - """Returns the siblings of this Tag that match the given - criteria and appear before this Tag in the document.""" - return self._fetch(name, attrs, text, limit, - self.previousSiblingGenerator) - - def findParent(self, name=None, attrs={}): - """Returns the closest parent of this Tag that matches the given - criteria.""" - r = Null - l = self.fetchParents(name, attrs, 1) - if l: - r = l[0] - return r - firstParent = findParent - - def fetchParents(self, name=None, attrs={}, limit=None): - """Returns the parents of this Tag that match the given - criteria.""" - return self._fetch(name, attrs, None, limit, self.parentGenerator) - - #These methods do the real heavy lifting. - - def _first(self, method, name, attrs, text): - r = Null - l = method(name, attrs, text, 1) - if l: - r = l[0] - return r - - def _fetch(self, name, attrs, text, limit, generator): - "Iterates over a generator looking for things that match." - if not hasattr(attrs, 'items'): - attrs = {'class' : attrs} - - results = [] - g = generator() - while True: - try: - i = g.next() - except StopIteration: - break - found = None - if isinstance(i, Tag): - if not text: - if not name or self._matches(i, name): - match = True - for attr, matchAgainst in attrs.items(): - check = i.get(attr) - if not self._matches(check, matchAgainst): - match = False - break - if match: - found = i - elif text: - if self._matches(i, text): - found = i - if found: - results.append(found) - if limit and len(results) >= limit: - break - return results - - #Generators that can be used to navigate starting from both - #NavigableTexts and Tags. - def nextGenerator(self): - i = self - while i: - i = i.next - yield i - - def nextSiblingGenerator(self): - i = self - while i: - i = i.nextSibling - yield i - - def previousGenerator(self): - i = self - while i: - i = i.previous - yield i - - def previousSiblingGenerator(self): - i = self - while i: - i = i.previousSibling - yield i - - def parentGenerator(self): - i = self - while i: - i = i.parent - yield i - - def _matches(self, chunk, howToMatch): - #print 'looking for %s in %s' % (howToMatch, chunk) - # - # If given a list of items, return true if the list contains a - # text element that matches. - if isList(chunk) and not isinstance(chunk, Tag): - for tag in chunk: - if isinstance(tag, NavigableText) and self._matches(tag, howToMatch): - return True - return False - if callable(howToMatch): - return howToMatch(chunk) - if isinstance(chunk, Tag): - #Custom match methods take the tag as an argument, but all other - #ways of matching match the tag name as a string - chunk = chunk.name - #Now we know that chunk is a string - if not isinstance(chunk, basestring): - chunk = str(chunk) - if hasattr(howToMatch, 'match'): - # It's a regexp object. - return howToMatch.search(chunk) - if isList(howToMatch): - return chunk in howToMatch - if hasattr(howToMatch, 'items'): - return howToMatch.has_key(chunk) - #It's just a string - return str(howToMatch) == chunk - -class NavigableText(PageElement): - - def __getattr__(self, attr): - "For backwards compatibility, text.string gives you text" - if attr == 'string': - return self - else: - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) - -class NavigableString(str, NavigableText): - pass - -class NavigableUnicodeString(unicode, NavigableText): - pass - -class Tag(PageElement): - - """Represents a found HTML tag with its attributes and contents.""" - - def __init__(self, name, attrs=None, parent=Null, previous=Null): - "Basic constructor." - self.name = name - if attrs == None: - attrs = [] - self.attrs = attrs - self.contents = [] - self.setup(parent, previous) - self.hidden = False - - def get(self, key, default=None): - """Returns the value of the 'key' attribute for the tag, or - the value given for 'default' if it doesn't have that - attribute.""" - return self._getAttrMap().get(key, default) - - def __getitem__(self, key): - """tag[key] returns the value of the 'key' attribute for the tag, - and throws an exception if it's not there.""" - return self._getAttrMap()[key] - - def __iter__(self): - "Iterating over a tag iterates over its contents." - return iter(self.contents) - - def __len__(self): - "The length of a tag is the length of its list of contents." - return len(self.contents) - - def __contains__(self, x): - return x in self.contents - - def __nonzero__(self): - "A tag is non-None even if it has no contents." - return True - - def __setitem__(self, key, value): - """Setting tag[key] sets the value of the 'key' attribute for the - tag.""" - self._getAttrMap() - self.attrMap[key] = value - found = False - for i in range(0, len(self.attrs)): - if self.attrs[i][0] == key: - self.attrs[i] = (key, value) - found = True - if not found: - self.attrs.append((key, value)) - self._getAttrMap()[key] = value - - def __delitem__(self, key): - "Deleting tag[key] deletes all 'key' attributes for the tag." - for item in self.attrs: - if item[0] == key: - self.attrs.remove(item) - #We don't break because bad HTML can define the same - #attribute multiple times. - self._getAttrMap() - if self.attrMap.has_key(key): - del self.attrMap[key] - - def __call__(self, *args, **kwargs): - """Calling a tag like a function is the same as calling its - fetch() method. Eg. tag('a') returns a list of all the A tags - found within this tag.""" - return apply(self.fetch, args, kwargs) - - def __getattr__(self, tag): - if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: - return self.first(tag[:-3]) - elif tag.find('__') != 0: - return self.first(tag) - - def __eq__(self, other): - """Returns true iff this tag has the same name, the same attributes, - and the same contents (recursively) as the given tag. - - NOTE: right now this will return false if two tags have the - same attributes in a different order. Should this be fixed?""" - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): - return False - for i in range(0, len(self.contents)): - if self.contents[i] != other.contents[i]: - return False - return True - - def __ne__(self, other): - """Returns true iff this tag is not identical to the other tag, - as defined in __eq__.""" - return not self == other - - def __repr__(self): - """Renders this tag as a string.""" - return str(self) - - def __unicode__(self): - return self.__str__(1) - - def __str__(self, needUnicode=None, showStructureIndent=None): - """Returns a string or Unicode representation of this tag and - its contents. - - NOTE: since Python's HTML parser consumes whitespace, this - method is not certain to reproduce the whitespace present in - the original string.""" - - attrs = [] - if self.attrs: - for key, val in self.attrs: - attrs.append('%s="%s"' % (key, val)) - close = '' - closeTag = '' - if self.isSelfClosing(): - close = ' /' - else: - closeTag = '' % self.name - indentIncrement = None - if showStructureIndent != None: - indentIncrement = showStructureIndent - if not self.hidden: - indentIncrement += 1 - contents = self.renderContents(indentIncrement, needUnicode=needUnicode) - if showStructureIndent: - space = '\n%s' % (' ' * showStructureIndent) - if self.hidden: - s = contents - else: - s = [] - attributeString = '' - if attrs: - attributeString = ' ' + ' '.join(attrs) - if showStructureIndent: - s.append(space) - s.append('<%s%s%s>' % (self.name, attributeString, close)) - s.append(contents) - if closeTag and showStructureIndent != None: - s.append(space) - s.append(closeTag) - s = ''.join(s) - isUnicode = type(s) == types.UnicodeType - if needUnicode and not isUnicode: - s = unicode(s) - elif isUnicode and needUnicode==False: - s = str(s) - return s - - def prettify(self, needUnicode=None): - return self.__str__(needUnicode, showStructureIndent=True) - - def renderContents(self, showStructureIndent=None, needUnicode=None): - """Renders the contents of this tag as a (possibly Unicode) - string.""" - s=[] - for c in self: - text = None - if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType: - text = unicode(c) - elif isinstance(c, Tag): - s.append(c.__str__(needUnicode, showStructureIndent)) - elif needUnicode: - text = unicode(c) - else: - text = str(c) - if text: - if showStructureIndent != None: - if text[-1] == '\n': - text = text[:-1] - s.append(text) - return ''.join(s) - - #Soup methods - - def firstText(self, text, recursive=True): - """Convenience method to retrieve the first piece of text matching the - given criteria. 'text' can be a string, a regular expression object, - a callable that takes a string and returns whether or not the - string 'matches', etc.""" - return self.first(recursive=recursive, text=text) - - def fetchText(self, text, recursive=True, limit=None): - """Convenience method to retrieve all pieces of text matching the - given criteria. 'text' can be a string, a regular expression object, - a callable that takes a string and returns whether or not the - string 'matches', etc.""" - return self.fetch(recursive=recursive, text=text, limit=limit) - - def first(self, name=None, attrs={}, recursive=True, text=None): - """Return only the first child of this - Tag matching the given criteria.""" - r = Null - l = self.fetch(name, attrs, recursive, text, 1) - if l: - r = l[0] - return r - findChild = first - - def fetch(self, name=None, attrs={}, recursive=True, text=None, - limit=None): - """Extracts a list of Tag objects that match the given - criteria. You can specify the name of the Tag and any - attributes you want the Tag to have. - - The value of a key-value pair in the 'attrs' map can be a - string, a list of strings, a regular expression object, or a - callable that takes a string and returns whether or not the - string matches for some custom definition of 'matches'. The - same is true of the tag name.""" - generator = self.recursiveChildGenerator - if not recursive: - generator = self.childGenerator - return self._fetch(name, attrs, text, limit, generator) - fetchChildren = fetch - - #Utility methods - - def isSelfClosing(self): - """Returns true iff this is a self-closing tag as defined in the HTML - standard. - - TODO: This is specific to BeautifulSoup and its subclasses, but it's - used by __str__""" - return self.name in BeautifulSoup.SELF_CLOSING_TAGS - - def append(self, tag): - """Appends the given tag to the contents of this tag.""" - self.contents.append(tag) - - #Private methods - - def _getAttrMap(self): - """Initializes a map representation of this tag's attributes, - if not already initialized.""" - if not getattr(self, 'attrMap'): - self.attrMap = {} - for (key, value) in self.attrs: - self.attrMap[key] = value - return self.attrMap - - #Generator methods - def childGenerator(self): - for i in range(0, len(self.contents)): - yield self.contents[i] - raise StopIteration - - def recursiveChildGenerator(self): - stack = [(self, 0)] - while stack: - tag, start = stack.pop() - if isinstance(tag, Tag): - for i in range(start, len(tag.contents)): - a = tag.contents[i] - yield a - if isinstance(a, Tag) and tag.contents: - if i < len(tag.contents) - 1: - stack.append((tag, i+1)) - stack.append((a, 0)) - break - raise StopIteration - - -def isList(l): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is listlike.""" - return hasattr(l, '__iter__') \ - or (type(l) in (types.ListType, types.TupleType)) - -def buildTagMap(default, *args): - """Turns a list of maps, lists, or scalars into a single map. - Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out - of lists and partial maps.""" - built = {} - for portion in args: - if hasattr(portion, 'items'): - #It's a map. Merge it. - for k,v in portion.items(): - built[k] = v - elif isList(portion): - #It's a list. Map each item to the default. - for k in portion: - built[k] = default - else: - #It's a scalar. Map it to the default. - built[portion] = default - return built - -class BeautifulStoneSoup(Tag, SGMLParser): - - """This class contains the basic parser and fetch code. It defines - a parser that knows nothing about tag behavior except for the - following: - - You can't close a tag without closing all the tags it encloses. - That is, "" actually means - "". - - [Another possible explanation is "", but since - this class defines no SELF_CLOSING_TAGS, it will never use that - explanation.] - - This class is useful for parsing XML or made-up markup languages, - or when BeautifulSoup makes an assumption counter to what you were - expecting.""" - - SELF_CLOSING_TAGS = {} - NESTABLE_TAGS = {} - RESET_NESTING_TAGS = {} - QUOTE_TAGS = {} - - #As a public service we will by default silently replace MS smart quotes - #and similar characters with their HTML or ASCII equivalents. - MS_CHARS = { '\x80' : '€', - '\x81' : ' ', - '\x82' : '‚', - '\x83' : 'ƒ', - '\x84' : '„', - '\x85' : '…', - '\x86' : '†', - '\x87' : '‡', - '\x88' : '⁁', - '\x89' : '%', - '\x8A' : 'Š', - '\x8B' : '<', - '\x8C' : 'Œ', - '\x8D' : '?', - '\x8E' : 'Z', - '\x8F' : '?', - '\x90' : '?', - '\x91' : '‘', - '\x92' : '’', - '\x93' : '“', - '\x94' : '”', - '\x95' : '•', - '\x96' : '–', - '\x97' : '—', - '\x98' : '˜', - '\x99' : '™', - '\x9a' : 'š', - '\x9b' : '>', - '\x9c' : 'œ', - '\x9d' : '?', - '\x9e' : 'z', - '\x9f' : 'Ÿ',} - - PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'), - lambda(x):x.group(1) + ' />'), - (re.compile(']*)>'), - lambda(x):''), - (re.compile("([\x80-\x9f])"), - lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1))) - ] - - ROOT_TAG_NAME = '[document]' - - def __init__(self, text=None, avoidParserProblems=True, - initialTextIsEverything=True): - """Initialize this as the 'root tag' and feed in any text to - the parser. - - NOTE about avoidParserProblems: sgmllib will process most bad - HTML, and BeautifulSoup has tricks for dealing with some HTML - that kills sgmllib, but Beautiful Soup can nonetheless choke - or lose data if your data uses self-closing tags or - declarations incorrectly. By default, Beautiful Soup sanitizes - its input to avoid the vast majority of these problems. The - problems are relatively rare, even in bad HTML, so feel free - to pass in False to avoidParserProblems if they don't apply to - you, and you'll get better performance. The only reason I have - this turned on by default is so I don't get so many tech - support questions. - - The two most common instances of invalid HTML that will choke - sgmllib are fixed by the default parser massage techniques: - -
(No space between name of closing tag and tag close) - (Extraneous whitespace in declaration) - - You can pass in a custom list of (RE object, replace method) - tuples to get Beautiful Soup to scrub your input the way you - want.""" - Tag.__init__(self, self.ROOT_TAG_NAME) - if avoidParserProblems \ - and not isList(avoidParserProblems): - avoidParserProblems = self.PARSER_MASSAGE - self.avoidParserProblems = avoidParserProblems - SGMLParser.__init__(self) - self.quoteStack = [] - self.hidden = 1 - self.reset() - if hasattr(text, 'read'): - #It's a file-type object. - text = text.read() - if text: - self.feed(text) - if initialTextIsEverything: - self.done() - - def __getattr__(self, methodName): - """This method routes method call requests to either the SGMLParser - superclass or the Tag superclass, depending on the method name.""" - if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ - or methodName.find('do_') == 0: - return SGMLParser.__getattr__(self, methodName) - elif methodName.find('__') != 0: - return Tag.__getattr__(self, methodName) - else: - raise AttributeError - - def feed(self, text): - if self.avoidParserProblems: - for fix, m in self.avoidParserProblems: - text = fix.sub(m, text) - SGMLParser.feed(self, text) - - def done(self): - """Called when you're done parsing, so that the unclosed tags can be - correctly processed.""" - self.endData() #NEW - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def reset(self): - SGMLParser.reset(self) - self.currentData = [] - self.currentTag = None - self.tagStack = [] - self.pushTag(self) - - def popTag(self): - tag = self.tagStack.pop() - # Tags with just one string-owning child get the child as a - # 'string' property, so that soup.tag.string is shorthand for - # soup.tag.contents[0] - if len(self.currentTag.contents) == 1 and \ - isinstance(self.currentTag.contents[0], NavigableText): - self.currentTag.string = self.currentTag.contents[0] - - #print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: - self.currentTag.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - - def endData(self): - currentData = ''.join(self.currentData) - if currentData: - if not currentData.strip(): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - c = NavigableString - if type(currentData) == types.UnicodeType: - c = NavigableUnicodeString - o = c(currentData) - o.setup(self.currentTag, self.previous) - if self.previous: - self.previous.next = o - self.previous = o - self.currentTag.contents.append(o) - self.currentData = [] - - def _popToTag(self, name, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - if name == self.ROOT_TAG_NAME: - return - - numPops = 0 - mostRecentTag = None - for i in range(len(self.tagStack)-1, 0, -1): - if name == self.tagStack[i].name: - numPops = len(self.tagStack)-i - break - if not inclusivePop: - numPops = numPops - 1 - - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag - - def _smartPop(self, name): - - """We need to pop up to the previous tag of this type, unless - one of this tag's nesting reset triggers comes between this - tag and the previous tag of this type, OR unless this tag is a - generic nesting trigger and another generic nesting trigger - comes between this tag and the previous tag of this type. - - Examples: -

FooBar

should pop to 'p', not 'b'. -

FooBar

should pop to 'table', not 'p'. -

Foo

Bar

should pop to 'tr', not 'p'. -

FooBar

should pop to 'p', not 'b'. - -

    • *
    • * should pop to 'ul', not the first 'li'. -
  • ** should pop to 'table', not the first 'tr' - tag should - implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' - """ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): - p = self.tagStack[i] - if (not p or p.name == name) and not isNestable: - #Non-nestable tags get popped to the top or to their - #last occurance. - popTo = name - break - if (nestingResetTriggers != None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers == None and isResetNesting - and self.RESET_NESTING_TAGS.has_key(p.name)): - - #If we encounter one of the nesting reset triggers - #peculiar to this tag, or we encounter another tag - #that causes nesting to reset, pop up to but not - #including that tag. - - popTo = p.name - inclusive = False - break - p = p.parent - if popTo: - self._popToTag(popTo, inclusive) - - def unknown_starttag(self, name, attrs, selfClosing=0): - #print "Start tag %s" % name - if self.quoteStack: - #This is not a real tag. - #print "<%s> is not real!" % name - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) - self.handle_data('<%s%s>' % (name, attrs)) - return - self.endData() - if not name in self.SELF_CLOSING_TAGS and not selfClosing: - self._smartPop(name) - tag = Tag(name, attrs, self.currentTag, self.previous) - if self.previous: - self.previous.next = tag - self.previous = tag - self.pushTag(tag) - if selfClosing or name in self.SELF_CLOSING_TAGS: - self.popTag() - if name in self.QUOTE_TAGS: - #print "Beginning quote (%s)" % name - self.quoteStack.append(name) - self.literal = 1 - - def unknown_endtag(self, name): - if self.quoteStack and self.quoteStack[-1] != name: - #This is not a real end tag. - #print " is not real!" % name - self.handle_data('' % name) - return - self.endData() - self._popToTag(name) - if self.quoteStack and self.quoteStack[-1] == name: - self.quoteStack.pop() - self.literal = (len(self.quoteStack) > 0) - - def handle_data(self, data): - self.currentData.append(data) - - def handle_pi(self, text): - "Propagate processing instructions right through." - self.handle_data("" % text) - - def handle_comment(self, text): - "Propagate comments right through." - self.handle_data("" % text) - - def handle_charref(self, ref): - "Propagate char refs right through." - self.handle_data('&#%s;' % ref) - - def handle_entityref(self, ref): - "Propagate entity refs right through." - self.handle_data('&%s;' % ref) - - def handle_decl(self, data): - "Propagate DOCTYPEs and the like right through." - self.handle_data('' % data) - - def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as regular data.""" - j = None - if self.rawdata[i:i+9] == '', i) - if k == -1: - k = len(self.rawdata) - self.handle_data(self.rawdata[i+9:k]) - j = k+3 - else: - try: - j = SGMLParser.parse_declaration(self, i) - except SGMLParseError: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j - -class BeautifulSoup(BeautifulStoneSoup): - - """This parser knows the following facts about HTML: - - * Some tags have no closing tag and should be interpreted as being - closed as soon as they are encountered. - - * The text inside some tags (ie. 'script') may contain tags which - are not really part of the document and which should be parsed - as text, not tags. If you want to parse the text as tags, you can - always fetch it and parse it explicitly. - - * Tag nesting rules: - - Most tags can't be nested at all. For instance, the occurance of - a

    tag should implicitly close the previous

    tag. - -

    Para1

    Para2 - should be transformed into: -

    Para1

    Para2 - - Some tags can be nested arbitrarily. For instance, the occurance - of a

    tag should _not_ implicitly close the previous -
    tag. - - Alice said:
    Bob said:
    Blah - should NOT be transformed into: - Alice said:
    Bob said:
    Blah - - Some tags can be nested, but the nesting is reset by the - interposition of other tags. For instance, a
    , - but not close a tag in another table. - -
    BlahBlah - should be transformed into: -
    BlahBlah - but, - Blah
    Blah - should NOT be transformed into - Blah
    Blah - - Differing assumptions about tag nesting rules are a major source - of problems with the BeautifulSoup class. If BeautifulSoup is not - treating as nestable a tag your page author treats as nestable, - try ICantBelieveItsBeautifulSoup before writing your own - subclass.""" - - SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) - - QUOTE_TAGS = {'script': None} - - #According to the HTML standard, each of these inline tags can - #contain another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', - 'center'] - - #According to the HTML standard, these block tags can contain - #another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] - - #Lists can contain other lists, but there are restrictions. - NESTABLE_LIST_TAGS = { 'ol' : [], - 'ul' : [], - 'li' : ['ul', 'ol'], - 'dl' : [], - 'dd' : ['dl'], - 'dt' : ['dl'] } - - #Tables can contain other tables, but there are restrictions. - NESTABLE_TABLE_TAGS = {'table' : [], - 'tr' : ['table', 'tbody', 'tfoot', 'thead'], - 'td' : ['tr'], - 'th' : ['tr'], - } - - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] - - #If one of these tags is encountered, all tags up to the next tag of - #this type are popped. - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', - NON_NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, - NESTABLE_TABLE_TAGS) - - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) - -class ICantBelieveItsBeautifulSoup(BeautifulSoup): - - """The BeautifulSoup class is oriented towards skipping over - common HTML errors like unclosed tags. However, sometimes it makes - errors of its own. For instance, consider this fragment: - - FooBar - - This is perfectly valid (if bizarre) HTML. However, the - BeautifulSoup class will implicitly close the first b tag when it - encounters the second 'b'. It will think the author wrote - "FooBar", and didn't close the first 'b' tag, because - there's no real-world reason to bold something that's already - bold. When it encounters '' it will close two more 'b' - tags, for a grand total of three tags closed instead of two. This - can throw off the rest of your document structure. The same is - true of a number of other tags, listed below. - - It's much more common for someone to forget to close (eg.) a 'b' - tag than to actually use nested 'b' tags, and the BeautifulSoup - class handles the common case. This class handles the - not-co-common case: where you can't believe someone wrote what - they did, but it's valid HTML and BeautifulSoup screwed up by - assuming it wouldn't be. - - If this doesn't do what you need, try subclassing this class or - BeautifulSoup, and providing your own list of NESTABLE_TAGS.""" - - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big'] - - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] - - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) - -class BeautifulSOAP(BeautifulStoneSoup): - """This class will push a tag with only a single string child into - the tag's parent as an attribute. The attribute's name is the tag - name, and the value is the string child. An example should give - the flavor of the change: - - baz - => - baz - - You can then access fooTag['bar'] instead of fooTag.barTag.string. - - This is, of course, useful for scraping structures that tend to - use subelements instead of attributes, such as SOAP messages. Note - that it modifies its input, so don't print the modified version - out. - - I'm not sure how many people really want to use this class; let me - know if you do. Mainly I like the name.""" - - def popTag(self): - if len(self.tagStack) > 1: - tag = self.tagStack[-1] - parent = self.tagStack[-2] - parent._getAttrMap() - if (isinstance(tag, Tag) and len(tag.contents) == 1 and - isinstance(tag.contents[0], NavigableText) and - not parent.attrMap.has_key(tag.name)): - parent[tag.name] = tag.contents[0] - BeautifulStoneSoup.popTag(self) - -#Enterprise class names! It has come to our attention that some people -#think the names of the Beautiful Soup parser classes are too silly -#and "unprofessional" for use in enterprise screen-scraping. We feel -#your pain! For such-minded folk, the Beautiful Soup Consortium And -#All-Night Kosher Bakery recommends renaming this file to -#"RobustParser.py" (or, in cases of extreme enterprisitude, -#"RobustParserBeanInterface.class") and using the following -#enterprise-friendly class aliases: -class RobustXMLParser(BeautifulStoneSoup): - pass -class RobustHTMLParser(BeautifulSoup): - pass -class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): - pass -class SimplifyingSOAPParser(BeautifulSOAP): - pass - -### - - -#By default, act as an HTML pretty-printer. -if __name__ == '__main__': - import sys - soup = BeautifulStoneSoup(sys.stdin.read()) - print soup.prettify() diff --git a/plugin.video.alfa/lib/mechanize/_clientcookie.py b/plugin.video.alfa/lib/mechanize/_clientcookie.py deleted file mode 100755 index d29feaae..00000000 --- a/plugin.video.alfa/lib/mechanize/_clientcookie.py +++ /dev/null @@ -1,1725 +0,0 @@ -"""HTTP cookie handling for web clients. - -This module originally developed from my port of Gisle Aas' Perl module -HTTP::Cookies, from the libwww-perl library. - -Docstrings, comments and debug strings in this code refer to the -attributes of the HTTP cookie system as cookie-attributes, to distinguish -them clearly from Python attributes. - - CookieJar____ - / \ \ - FileCookieJar \ \ - / | \ \ \ - MozillaCookieJar | LWPCookieJar \ \ - | | \ - | ---MSIEBase | \ - | / | | \ - | / MSIEDBCookieJar BSDDBCookieJar - |/ - MSIECookieJar - -Comments to John J Lee . - - -Copyright 2002-2006 John J Lee -Copyright 1997-1999 Gisle Aas (original libwww-perl code) -Copyright 2002-2003 Johnny Lee (original MSIE Perl code) - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import sys, re, copy, time, urllib, types, logging -try: - import threading - _threading = threading; del threading -except ImportError: - import dummy_threading - _threading = dummy_threading; del dummy_threading - -MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " - "instance initialised with one)") -DEFAULT_HTTP_PORT = "80" - -from _headersutil import split_header_words, parse_ns_headers -from _util import isstringlike -import _rfc3986 - -debug = logging.getLogger("mechanize.cookies").debug - - -def reraise_unmasked_exceptions(unmasked=()): - # There are a few catch-all except: statements in this module, for - # catching input that's bad in unexpected ways. - # This function re-raises some exceptions we don't want to trap. - import mechanize, warnings - if not mechanize.USE_BARE_EXCEPT: - raise - unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError) - etype = sys.exc_info()[0] - if issubclass(etype, unmasked): - raise - # swallowed an exception - import traceback, StringIO - f = StringIO.StringIO() - traceback.print_exc(None, f) - msg = f.getvalue() - warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2) - - -IPV4_RE = re.compile(r"\.\d+$") -def is_HDN(text): - """Return True if text is a host domain name.""" - # XXX - # This may well be wrong. Which RFC is HDN defined in, if any (for - # the purposes of RFC 2965)? - # For the current implementation, what about IPv6? Remember to look - # at other uses of IPV4_RE also, if change this. - return not (IPV4_RE.search(text) or - text == "" or - text[0] == "." or text[-1] == ".") - -def domain_match(A, B): - """Return True if domain A domain-matches domain B, according to RFC 2965. - - A and B may be host domain names or IP addresses. - - RFC 2965, section 1: - - Host names can be specified either as an IP address or a HDN string. - Sometimes we compare one host name with another. (Such comparisons SHALL - be case-insensitive.) Host A's name domain-matches host B's if - - * their host name strings string-compare equal; or - - * A is a HDN string and has the form NB, where N is a non-empty - name string, B has the form .B', and B' is a HDN string. (So, - x.y.com domain-matches .Y.com but not Y.com.) - - Note that domain-match is not a commutative operation: a.b.c.com - domain-matches .c.com, but not the reverse. - - """ - # Note that, if A or B are IP addresses, the only relevant part of the - # definition of the domain-match algorithm is the direct string-compare. - A = A.lower() - B = B.lower() - if A == B: - return True - if not is_HDN(A): - return False - i = A.rfind(B) - has_form_nb = not (i == -1 or i == 0) - return ( - has_form_nb and - B.startswith(".") and - is_HDN(B[1:]) - ) - -def liberal_is_HDN(text): - """Return True if text is a sort-of-like a host domain name. - - For accepting/blocking domains. - - """ - return not IPV4_RE.search(text) - -def user_domain_match(A, B): - """For blocking/accepting domains. - - A and B may be host domain names or IP addresses. - - """ - A = A.lower() - B = B.lower() - if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False - initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False - -cut_port_re = re.compile(r":\d+$") -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = _rfc3986.urlsplit(url)[1] - if host is None: - host = request.get_header("Host", "") - # remove port, if present - return cut_port_re.sub("", host, 1) - -def request_host_lc(request): - return request_host(request).lower() - -def eff_request_host(request): - """Return a tuple (request-host, effective request-host name).""" - erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): - erhn = req_host + ".local" - return req_host, erhn - -def eff_request_host_lc(request): - req_host, erhn = eff_request_host(request) - return req_host.lower(), erhn.lower() - -def effective_request_host(request): - """Return the effective request-host, as defined by RFC 2965.""" - return eff_request_host(request)[1] - -def request_path(request): - """Return path component of request-URI, as defined by RFC 2965.""" - url = request.get_full_url() - path = escape_path(_rfc3986.urlsplit(url)[2]) - if not path.startswith("/"): - path = "/" + path - return path - -def request_port(request): - host = request.get_host() - i = host.find(':') - if i >= 0: - port = host[i+1:] - try: - int(port) - except ValueError: - debug("nonnumeric port: '%s'", port) - return None - else: - port = DEFAULT_HTTP_PORT - return port - -def request_is_unverifiable(request): - try: - return request.is_unverifiable() - except AttributeError: - if hasattr(request, "unverifiable"): - return request.unverifiable - else: - raise - -# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't -# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). -HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" -ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") -def uppercase_escaped_char(match): - return "%%%s" % match.group(1).upper() -def escape_path(path): - """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" - # There's no knowing what character encoding was used to create URLs - # containing %-escapes, but since we have to pick one to escape invalid - # path characters, we pick UTF-8, as recommended in the HTML 4.0 - # specification: - # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 - # And here, kind of: draft-fielding-uri-rfc2396bis-03 - # (And in draft IRI specification: draft-duerst-iri-05) - # (And here, for new URI schemes: RFC 2718) - if isinstance(path, types.UnicodeType): - path = path.encode("utf-8") - path = urllib.quote(path, HTTP_PATH_SAFE) - path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) - return path - -def reach(h): - """Return reach of host h, as defined by RFC 2965, section 1. - - The reach R of a host name H is defined as follows: - - * If - - - H is the host domain name of a host; and, - - - H has the form A.B; and - - - A has no embedded (that is, interior) dots; and - - - B has at least one embedded dot, or B is the string "local". - then the reach of H is .B. - - * Otherwise, the reach of H is H. - - >>> reach("www.acme.com") - '.acme.com' - >>> reach("acme.com") - 'acme.com' - >>> reach("acme.local") - '.local' - - """ - i = h.find(".") - if i >= 0: - #a = h[:i] # this line is only here to show what a is - b = h[i+1:] - i = b.find(".") - if is_HDN(h) and (i >= 0 or b == "local"): - return "."+b - return h - -def is_third_party(request): - """ - - RFC 2965, section 3.3.6: - - An unverifiable transaction is to a third-party host if its request- - host U does not domain-match the reach R of the request-host O in the - origin transaction. - - """ - req_host = request_host_lc(request) - # the origin request's request-host was stuffed into request by - # _urllib2_support.AbstractHTTPHandler - return not domain_match(req_host, reach(request.origin_req_host)) - - -try: - all -except NameError: - # python 2.4 - def all(iterable): - for x in iterable: - if not x: - return False - return True - - -class Cookie: - """HTTP Cookie. - - This class represents both Netscape and RFC 2965 cookies. - - This is deliberately a very simple class. It just holds attributes. It's - possible to construct Cookie instances that don't comply with the cookie - standards. CookieJar.make_cookies is the factory function for Cookie - objects -- it deals with cookie parsing, supplying defaults, and - normalising to the representation used in this class. CookiePolicy is - responsible for checking them to see whether they should be accepted from - and returned to the server. - - version: integer; - name: string; - value: string (may be None); - port: string; None indicates no attribute was supplied (e.g. "Port", rather - than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list - string (e.g. "80,8080") - port_specified: boolean; true if a value was supplied with the Port - cookie-attribute - domain: string; - domain_specified: boolean; true if Domain was explicitly set - domain_initial_dot: boolean; true if Domain as set in HTTP header by server - started with a dot (yes, this really is necessary!) - path: string; - path_specified: boolean; true if Path was explicitly set - secure: boolean; true if should only be returned over secure connection - expires: integer; seconds since epoch (RFC 2965 cookies should calculate - this value from the Max-Age attribute) - discard: boolean, true if this is a session cookie; (if no expires value, - this should be true) - comment: string; - comment_url: string; - rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not - Set-Cookie2:) header, but had a version cookie-attribute of 1 - rest: mapping of other cookie-attributes - - Note that the port may be present in the headers, but unspecified ("Port" - rather than"Port=80", for example); if this is the case, port is None. - - """ - - - _attrs = ("version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - "rfc2109", "_rest") - - def __init__(self, version, name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest, - rfc2109=False, - ): - - if version is not None: version = int(version) - if expires is not None: expires = int(expires) - if port is None and port_specified is True: - raise ValueError("if port is None, port_specified must be false") - - self.version = version - self.name = name - self.value = value - self.port = port - self.port_specified = port_specified - # normalise case, as per RFC 2965 section 3.3.3 - self.domain = domain.lower() - self.domain_specified = domain_specified - # Sigh. We need to know whether the domain given in the - # cookie-attribute had an initial dot, in order to follow RFC 2965 - # (as clarified in draft errata). Needed for the returned $Domain - # value. - self.domain_initial_dot = domain_initial_dot - self.path = path - self.path_specified = path_specified - self.secure = secure - self.expires = expires - self.discard = discard - self.comment = comment - self.comment_url = comment_url - self.rfc2109 = rfc2109 - - self._rest = copy.copy(rest) - - def has_nonstandard_attr(self, name): - return self._rest.has_key(name) - def get_nonstandard_attr(self, name, default=None): - return self._rest.get(name, default) - def set_nonstandard_attr(self, name, value): - self._rest[name] = value - def nonstandard_attr_keys(self): - return self._rest.keys() - - def is_expired(self, now=None): - if now is None: now = time.time() - return (self.expires is not None) and (self.expires <= now) - - def __eq__(self, other): - return all(getattr(self, a) == getattr(other, a) for a in self._attrs) - - def __ne__(self, other): - return not (self == other) - - def __str__(self): - if self.port is None: p = "" - else: p = ":"+self.port - limit = self.domain + p + self.path - if self.value is not None: - namevalue = "%s=%s" % (self.name, self.value) - else: - namevalue = self.name - return "" % (namevalue, limit) - - def __repr__(self): - args = [] - for name in ["version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - ]: - attr = getattr(self, name) - args.append("%s=%s" % (name, repr(attr))) - args.append("rest=%s" % repr(self._rest)) - args.append("rfc2109=%s" % repr(self.rfc2109)) - return "Cookie(%s)" % ", ".join(args) - - -class CookiePolicy: - """Defines which cookies get accepted from and returned to server. - - May also modify cookies. - - The subclass DefaultCookiePolicy defines the standard rules for Netscape - and RFC 2965 cookies -- override that if you want a customised policy. - - As well as implementing set_ok and return_ok, implementations of this - interface must also supply the following attributes, indicating which - protocols should be used, and how. These can be read and set at any time, - though whether that makes complete sense from the protocol point of view is - doubtful. - - Public attributes: - - netscape: implement netscape protocol - rfc2965: implement RFC 2965 protocol - rfc2109_as_netscape: - WARNING: This argument will change or go away if is not accepted into - the Python standard library in this form! - If true, treat RFC 2109 cookies as though they were Netscape cookies. The - default is for this attribute to be None, which means treat 2109 cookies - as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is, - by default), and as Netscape cookies otherwise. - hide_cookie2: don't add Cookie2 header to requests (the presence of - this header indicates to the server that we understand RFC 2965 - cookies) - - """ - def set_ok(self, cookie, request): - """Return true if (and only if) cookie should be accepted from server. - - Currently, pre-expired cookies never get this far -- the CookieJar - class deletes such cookies itself. - - cookie: mechanize.Cookie object - request: object implementing the interface defined by - CookieJar.extract_cookies.__doc__ - - """ - raise NotImplementedError() - - def return_ok(self, cookie, request): - """Return true if (and only if) cookie should be returned to server. - - cookie: mechanize.Cookie object - request: object implementing the interface defined by - CookieJar.add_cookie_header.__doc__ - - """ - raise NotImplementedError() - - def domain_return_ok(self, domain, request): - """Return false if cookies should not be returned, given cookie domain. - - This is here as an optimization, to remove the need for checking every - cookie with a particular domain (which may involve reading many files). - The default implementations of domain_return_ok and path_return_ok - (return True) leave all the work to return_ok. - - If domain_return_ok returns true for the cookie domain, path_return_ok - is called for the cookie path. Otherwise, path_return_ok and return_ok - are never called for that cookie domain. If path_return_ok returns - true, return_ok is called with the Cookie object itself for a full - check. Otherwise, return_ok is never called for that cookie path. - - Note that domain_return_ok is called for every *cookie* domain, not - just for the *request* domain. For example, the function might be - called with both ".acme.com" and "www.acme.com" if the request domain - is "www.acme.com". The same goes for path_return_ok. - - For argument documentation, see the docstring for return_ok. - - """ - return True - - def path_return_ok(self, path, request): - """Return false if cookies should not be returned, given cookie path. - - See the docstring for domain_return_ok. - - """ - return True - - -class DefaultCookiePolicy(CookiePolicy): - """Implements the standard rules for accepting and returning cookies. - - Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is - switched off by default. - - The easiest way to provide your own policy is to override this class and - call its methods in your overriden implementations before adding your own - additional checks. - - import mechanize - class MyCookiePolicy(mechanize.DefaultCookiePolicy): - def set_ok(self, cookie, request): - if not mechanize.DefaultCookiePolicy.set_ok( - self, cookie, request): - return False - if i_dont_want_to_store_this_cookie(): - return False - return True - - In addition to the features required to implement the CookiePolicy - interface, this class allows you to block and allow domains from setting - and receiving cookies. There are also some strictness switches that allow - you to tighten up the rather loose Netscape protocol rules a little bit (at - the cost of blocking some benign cookies). - - A domain blacklist and whitelist is provided (both off by default). Only - domains not in the blacklist and present in the whitelist (if the whitelist - is active) participate in cookie setting and returning. Use the - blocked_domains constructor argument, and blocked_domains and - set_blocked_domains methods (and the corresponding argument and methods for - allowed_domains). If you set a whitelist, you can turn it off again by - setting it to None. - - Domains in block or allow lists that do not start with a dot must - string-compare equal. For example, "acme.com" matches a blacklist entry of - "acme.com", but "www.acme.com" does not. Domains that do start with a dot - are matched by more specific domains too. For example, both "www.acme.com" - and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does - not). IP addresses are an exception, and must match exactly. For example, - if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is - blocked, but 193.168.1.2 is not. - - Additional Public Attributes: - - General strictness switches - - strict_domain: don't allow sites to set two-component domains with - country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc. - This is far from perfect and isn't guaranteed to work! - - RFC 2965 protocol strictness switches - - strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable - transactions (usually, an unverifiable transaction is one resulting from - a redirect or an image hosted on another site); if this is false, cookies - are NEVER blocked on the basis of verifiability - - Netscape protocol strictness switches - - strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions - even to Netscape cookies - strict_ns_domain: flags indicating how strict to be with domain-matching - rules for Netscape cookies: - DomainStrictNoDots: when setting cookies, host prefix must not contain a - dot (e.g. www.foo.bar.com can't set a cookie for .bar.com, because - www.foo contains a dot) - DomainStrictNonDomain: cookies that did not explicitly specify a Domain - cookie-attribute can only be returned to a domain that string-compares - equal to the domain that set the cookie (e.g. rockets.acme.com won't - be returned cookies from acme.com that had no Domain cookie-attribute) - DomainRFC2965Match: when setting cookies, require a full RFC 2965 - domain-match - DomainLiberal and DomainStrict are the most useful combinations of the - above flags, for convenience - strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that - have names starting with '$' - strict_ns_set_path: don't allow setting cookies whose path doesn't - path-match request URI - - """ - - DomainStrictNoDots = 1 - DomainStrictNonDomain = 2 - DomainRFC2965Match = 4 - - DomainLiberal = 0 - DomainStrict = DomainStrictNoDots|DomainStrictNonDomain - - def __init__(self, - blocked_domains=None, allowed_domains=None, - netscape=True, rfc2965=False, - # WARNING: this argument will change or go away if is not - # accepted into the Python standard library in this form! - # default, ie. treat 2109 as netscape iff not rfc2965 - rfc2109_as_netscape=None, - hide_cookie2=False, - strict_domain=False, - strict_rfc2965_unverifiable=True, - strict_ns_unverifiable=False, - strict_ns_domain=DomainLiberal, - strict_ns_set_initial_dollar=False, - strict_ns_set_path=False, - ): - """ - Constructor arguments should be used as keyword arguments only. - - blocked_domains: sequence of domain names that we never accept cookies - from, nor return cookies to - allowed_domains: if not None, this is a sequence of the only domains - for which we accept and return cookies - - For other arguments, see CookiePolicy.__doc__ and - DefaultCookiePolicy.__doc__.. - - """ - self.netscape = netscape - self.rfc2965 = rfc2965 - self.rfc2109_as_netscape = rfc2109_as_netscape - self.hide_cookie2 = hide_cookie2 - self.strict_domain = strict_domain - self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable - self.strict_ns_unverifiable = strict_ns_unverifiable - self.strict_ns_domain = strict_ns_domain - self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar - self.strict_ns_set_path = strict_ns_set_path - - if blocked_domains is not None: - self._blocked_domains = tuple(blocked_domains) - else: - self._blocked_domains = () - - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def blocked_domains(self): - """Return the sequence of blocked domains (as a tuple).""" - return self._blocked_domains - def set_blocked_domains(self, blocked_domains): - """Set the sequence of blocked domains.""" - self._blocked_domains = tuple(blocked_domains) - - def is_blocked(self, domain): - for blocked_domain in self._blocked_domains: - if user_domain_match(domain, blocked_domain): - return True - return False - - def allowed_domains(self): - """Return None, or the sequence of allowed domains (as a tuple).""" - return self._allowed_domains - def set_allowed_domains(self, allowed_domains): - """Set the sequence of allowed domains, or None.""" - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def is_not_allowed(self, domain): - if self._allowed_domains is None: - return False - for allowed_domain in self._allowed_domains: - if user_domain_match(domain, allowed_domain): - return False - return True - - def set_ok(self, cookie, request): - """ - If you override set_ok, be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to accept). - - """ - debug(" - checking cookie %s", cookie) - - assert cookie.name is not None - - for n in "version", "verifiability", "name", "path", "domain", "port": - fn_name = "set_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - - return True - - def set_ok_version(self, cookie, request): - if cookie.version is None: - # Version is always set to 0 by parse_ns_headers if it's a Netscape - # cookie, so this must be an invalid RFC 2965 cookie. - debug(" Set-Cookie2 without version attribute (%s)", cookie) - return False - if cookie.version > 0 and not self.rfc2965: - debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - debug(" Netscape cookies are switched off") - return False - return True - - def set_ok_verifiability(self, cookie, request): - if request_is_unverifiable(request) and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - debug(" third-party RFC 2965 cookie during " - "unverifiable transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - debug(" third-party Netscape cookie during " - "unverifiable transaction") - return False - return True - - def set_ok_name(self, cookie, request): - # Try and stop servers setting V0 cookies designed to hack other - # servers that know both V0 and V1 protocols. - if (cookie.version == 0 and self.strict_ns_set_initial_dollar and - cookie.name.startswith("$")): - debug(" illegal name (starts with '$'): '%s'", cookie.name) - return False - return True - - def set_ok_path(self, cookie, request): - if cookie.path_specified: - req_path = request_path(request) - if ((cookie.version > 0 or - (cookie.version == 0 and self.strict_ns_set_path)) and - not req_path.startswith(cookie.path)): - debug(" path attribute %s is not a prefix of request " - "path %s", cookie.path, req_path) - return False - return True - - def set_ok_countrycode_domain(self, cookie, request): - """Return False if explicit cookie domain is not acceptable. - - Called by set_ok_domain, for convenience of overriding by - subclasses. - - """ - if cookie.domain_specified and self.strict_domain: - domain = cookie.domain - # since domain was specified, we know that: - assert domain.startswith(".") - if domain.count(".") == 2: - # domain like .foo.bar - i = domain.rfind(".") - tld = domain[i+1:] - sld = domain[1:i] - if (sld.lower() in [ - "co", "ac", - "com", "edu", "org", "net", "gov", "mil", "int", - "aero", "biz", "cat", "coop", "info", "jobs", "mobi", - "museum", "name", "pro", "travel", - ] and - len(tld) == 2): - # domain like .co.uk - return False - return True - - def set_ok_domain(self, cookie, request): - if self.is_blocked(cookie.domain): - debug(" domain %s is in user block-list", cookie.domain) - return False - if self.is_not_allowed(cookie.domain): - debug(" domain %s is not in user allow-list", cookie.domain) - return False - if not self.set_ok_countrycode_domain(cookie, request): - debug(" country-code second level domain %s", cookie.domain) - return False - if cookie.domain_specified: - req_host, erhn = eff_request_host_lc(request) - domain = cookie.domain - if domain.startswith("."): - undotted_domain = domain[1:] - else: - undotted_domain = domain - embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": - debug(" non-local domain %s contains no embedded dot", - domain) - return False - if cookie.version == 0: - if (not erhn.endswith(domain) and - (not erhn.startswith(".") and - not ("."+erhn).endswith(domain))): - debug(" effective request-host %s (even with added " - "initial dot) does not end end with %s", - erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainRFC2965Match)): - if not domain_match(erhn, domain): - debug(" effective request-host %s does not domain-match " - "%s", erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainStrictNoDots)): - host_prefix = req_host[:-len(domain)] - if (host_prefix.find(".") >= 0 and - not IPV4_RE.search(req_host)): - debug(" host prefix %s for domain %s contains a dot", - host_prefix, domain) - return False - return True - - def set_ok_port(self, cookie, request): - if cookie.port_specified: - req_port = request_port(request) - if req_port is None: - req_port = "80" - else: - req_port = str(req_port) - for p in cookie.port.split(","): - try: - int(p) - except ValueError: - debug(" bad port %s (not numeric)", p) - return False - if p == req_port: - break - else: - debug(" request port (%s) not found in %s", - req_port, cookie.port) - return False - return True - - def return_ok(self, cookie, request): - """ - If you override return_ok, be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to return). - - """ - # Path has already been checked by path_return_ok, and domain blocking - # done by domain_return_ok. - debug(" - checking cookie %s", cookie) - - for n in ("version", "verifiability", "secure", "expires", "port", - "domain"): - fn_name = "return_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - return True - - def return_ok_version(self, cookie, request): - if cookie.version > 0 and not self.rfc2965: - debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - debug(" Netscape cookies are switched off") - return False - return True - - def return_ok_verifiability(self, cookie, request): - if request_is_unverifiable(request) and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - debug(" third-party RFC 2965 cookie during unverifiable " - "transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - debug(" third-party Netscape cookie during unverifiable " - "transaction") - return False - return True - - def return_ok_secure(self, cookie, request): - if cookie.secure and request.get_type() != "https": - debug(" secure cookie with non-secure request") - return False - return True - - def return_ok_expires(self, cookie, request): - if cookie.is_expired(self._now): - debug(" cookie expired") - return False - return True - - def return_ok_port(self, cookie, request): - if cookie.port: - req_port = request_port(request) - if req_port is None: - req_port = "80" - for p in cookie.port.split(","): - if p == req_port: - break - else: - debug(" request port %s does not match cookie port %s", - req_port, cookie.port) - return False - return True - - def return_ok_domain(self, cookie, request): - req_host, erhn = eff_request_host_lc(request) - domain = cookie.domain - - # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't - if (cookie.version == 0 and - (self.strict_ns_domain & self.DomainStrictNonDomain) and - not cookie.domain_specified and domain != erhn): - debug(" cookie with unspecified domain does not string-compare " - "equal to request domain") - return False - - if cookie.version > 0 and not domain_match(erhn, domain): - debug(" effective request-host name %s does not domain-match " - "RFC 2965 cookie domain %s", erhn, domain) - return False - if cookie.version == 0 and not ("."+erhn).endswith(domain): - debug(" request-host %s does not match Netscape cookie domain " - "%s", req_host, domain) - return False - return True - - def domain_return_ok(self, domain, request): - # Liberal check of domain. This is here as an optimization to avoid - # having to load lots of MSIE cookie files unless necessary. - - # Munge req_host and erhn to always start with a dot, so as to err on - # the side of letting cookies through. - dotted_req_host, dotted_erhn = eff_request_host_lc(request) - if not dotted_req_host.startswith("."): - dotted_req_host = "."+dotted_req_host - if not dotted_erhn.startswith("."): - dotted_erhn = "."+dotted_erhn - if not (dotted_req_host.endswith(domain) or - dotted_erhn.endswith(domain)): - #debug(" request domain %s does not match cookie domain %s", - # req_host, domain) - return False - - if self.is_blocked(domain): - debug(" domain %s is in user block-list", domain) - return False - if self.is_not_allowed(domain): - debug(" domain %s is not in user allow-list", domain) - return False - - return True - - def path_return_ok(self, path, request): - debug("- checking cookie path=%s", path) - req_path = request_path(request) - if not req_path.startswith(path): - debug(" %s does not path-match %s", req_path, path) - return False - return True - - -def vals_sorted_by_key(adict): - keys = adict.keys() - keys.sort() - return map(adict.get, keys) - -class MappingIterator: - """Iterates over nested mapping, depth-first, in sorted order by key.""" - def __init__(self, mapping): - self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack - - def __iter__(self): return self - - def next(self): - # this is hairy because of lack of generators - while 1: - try: - vals, i, prev_item = self._s.pop() - except IndexError: - raise StopIteration() - if i < len(vals): - item = vals[i] - i = i + 1 - self._s.append((vals, i, prev_item)) - try: - item.items - except AttributeError: - # non-mapping - break - else: - # mapping - self._s.append((vals_sorted_by_key(item), 0, item)) - continue - return item - - -# Used as second parameter to dict.get method, to distinguish absent -# dict key from one with a None value. -class Absent: pass - -class CookieJar: - """Collection of HTTP cookies. - - You may not need to know about this class: try mechanize.urlopen(). - - The major methods are extract_cookies and add_cookie_header; these are all - you are likely to need. - - CookieJar supports the iterator protocol: - - for cookie in cookiejar: - # do something with cookie - - Methods: - - add_cookie_header(request) - extract_cookies(response, request) - get_policy() - set_policy(policy) - cookies_for_request(request) - make_cookies(response, request) - set_cookie_if_ok(cookie, request) - set_cookie(cookie) - clear_session_cookies() - clear_expired_cookies() - clear(domain=None, path=None, name=None) - - Public attributes - - policy: CookiePolicy object - - """ - - non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - def __init__(self, policy=None): - """ - See CookieJar.__doc__ for argument documentation. - - """ - if policy is None: - policy = DefaultCookiePolicy() - self._policy = policy - - self._cookies = {} - - # for __getitem__ iteration in pre-2.2 Pythons - self._prev_getitem_index = 0 - - def get_policy(self): - return self._policy - - def set_policy(self, policy): - self._policy = policy - - def _cookies_for_domain(self, domain, request): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - debug("Checking %s for cookies to return", domain) - cookies_by_path = self._cookies[domain] - for path in cookies_by_path.keys(): - if not self._policy.path_return_ok(path, request): - continue - cookies_by_name = cookies_by_path[path] - for cookie in cookies_by_name.values(): - if not self._policy.return_ok(cookie, request): - debug(" not returning cookie") - continue - debug(" it's a match") - cookies.append(cookie) - return cookies - - def cookies_for_request(self, request): - """Return a list of cookies to be returned to server. - - The returned list of cookie instances is sorted in the order they - should appear in the Cookie: header for return to the server. - - See add_cookie_header.__doc__ for the interface required of the - request argument. - - New in version 0.1.10 - - """ - self._policy._now = self._now = int(time.time()) - cookies = self._cookies_for_request(request) - # add cookies in order of most specific (i.e. longest) path first - def decreasing_size(a, b): return cmp(len(b.path), len(a.path)) - cookies.sort(decreasing_size) - return cookies - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - # this method still exists (alongside cookies_for_request) because it - # is part of an implied protected interface for subclasses of cookiejar - # XXX document that implied interface, or provide another way of - # implementing cookiejars than subclassing - cookies = [] - for domain in self._cookies.keys(): - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookie_attrs(self, cookies): - """Return a list of cookie-attributes to be returned to server. - - The $Version attribute is also added when appropriate (currently only - once per request). - - >>> jar = CookieJar() - >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False, - ... "example.com", False, False, - ... "/", False, False, None, True, - ... None, None, {}) - >>> jar._cookie_attrs([ns_cookie]) - ['foo="bar"'] - >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False, - ... ".example.com", True, False, - ... "/", False, False, None, True, - ... None, None, {}) - >>> jar._cookie_attrs([rfc2965_cookie]) - ['$Version=1', 'foo=bar', '$Domain="example.com"'] - - """ - version_set = False - - attrs = [] - for cookie in cookies: - # set version of Cookie header - # XXX - # What should it be if multiple matching Set-Cookie headers have - # different versions themselves? - # Answer: there is no answer; was supposed to be settled by - # RFC 2965 errata, but that may never appear... - version = cookie.version - if not version_set: - version_set = True - if version > 0: - attrs.append("$Version=%s" % version) - - # quote cookie value if necessary - # (not for Netscape protocol, which already has any quotes - # intact, due to the poorly-specified Netscape Cookie: syntax) - if ((cookie.value is not None) and - self.non_word_re.search(cookie.value) and version > 0): - value = self.quote_re.sub(r"\\\1", cookie.value) - else: - value = cookie.value - - # add cookie-attributes to be returned in Cookie header - if cookie.value is None: - attrs.append(cookie.name) - else: - attrs.append("%s=%s" % (cookie.name, value)) - if version > 0: - if cookie.path_specified: - attrs.append('$Path="%s"' % cookie.path) - if cookie.domain.startswith("."): - domain = cookie.domain - if (not cookie.domain_initial_dot and - domain.startswith(".")): - domain = domain[1:] - attrs.append('$Domain="%s"' % domain) - if cookie.port is not None: - p = "$Port" - if cookie.port_specified: - p = p + ('="%s"' % cookie.port) - attrs.append(p) - - return attrs - - def add_cookie_header(self, request): - """Add correct Cookie: header to request (mechanize.Request object). - - The Cookie2 header is also added unless policy.hide_cookie2 is true. - - The request object (usually a mechanize.Request instance) must support - the methods get_full_url, get_host, is_unverifiable, get_type, - has_header, get_header, header_items and add_unredirected_header, as - documented by urllib2. - """ - debug("add_cookie_header") - cookies = self.cookies_for_request(request) - - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_unredirected_header("Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if self._policy.rfc2965 and not self._policy.hide_cookie2: - for cookie in cookies: - if cookie.version != 1 and not request.has_header("Cookie2"): - request.add_unredirected_header("Cookie2", '$Version="1"') - break - - self.clear_expired_cookies() - - def _normalized_cookie_tuples(self, attrs_set): - """Return list of tuples containing normalised cookie information. - - attrs_set is the list of lists of key,value pairs extracted from - the Set-Cookie or Set-Cookie2 headers. - - Tuples are name, value, standard, rest, where name and value are the - cookie name and value, standard is a dictionary containing the standard - cookie-attributes (discard, secure, version, expires or max-age, - domain, path and port) and rest is a dictionary containing the rest of - the cookie-attributes. - - """ - cookie_tuples = [] - - boolean_attrs = "discard", "secure" - value_attrs = ("version", - "expires", "max-age", - "domain", "path", "port", - "comment", "commenturl") - - for cookie_attrs in attrs_set: - name, value = cookie_attrs[0] - - # Build dictionary of standard cookie-attributes (standard) and - # dictionary of other cookie-attributes (rest). - - # Note: expiry time is normalised to seconds since epoch. V0 - # cookies should have the Expires cookie-attribute, and V1 cookies - # should have Max-Age, but since V1 includes RFC 2109 cookies (and - # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we - # accept either (but prefer Max-Age). - max_age_set = False - - bad_cookie = False - - standard = {} - rest = {} - for k, v in cookie_attrs[1:]: - lc = k.lower() - # don't lose case distinction for unknown fields - if lc in value_attrs or lc in boolean_attrs: - k = lc - if k in boolean_attrs and v is None: - # boolean cookie-attribute is present, but has no value - # (like "discard", rather than "port=80") - v = True - if standard.has_key(k): - # only first value is significant - continue - if k == "domain": - if v is None: - debug(" missing value for domain attribute") - bad_cookie = True - break - # RFC 2965 section 3.3.3 - v = v.lower() - if k == "expires": - if max_age_set: - # Prefer max-age to expires (like Mozilla) - continue - if v is None: - debug(" missing or invalid value for expires " - "attribute: treating as session cookie") - continue - if k == "max-age": - max_age_set = True - if v is None: - debug(" missing value for max-age attribute") - bad_cookie = True - break - try: - v = int(v) - except ValueError: - debug(" missing or invalid (non-numeric) value for " - "max-age attribute") - bad_cookie = True - break - # convert RFC 2965 Max-Age to seconds since epoch - # XXX Strictly you're supposed to follow RFC 2616 - # age-calculation rules. Remember that zero Max-Age is a - # is a request to discard (old and new) cookie, though. - k = "expires" - v = self._now + v - if (k in value_attrs) or (k in boolean_attrs): - if (v is None and - k not in ["port", "comment", "commenturl"]): - debug(" missing value for %s attribute" % k) - bad_cookie = True - break - standard[k] = v - else: - rest[k] = v - - if bad_cookie: - continue - - cookie_tuples.append((name, value, standard, rest)) - - return cookie_tuples - - def _cookie_from_cookie_tuple(self, tup, request): - # standard is dict of standard cookie-attributes, rest is dict of the - # rest of them - name, value, standard, rest = tup - - domain = standard.get("domain", Absent) - path = standard.get("path", Absent) - port = standard.get("port", Absent) - expires = standard.get("expires", Absent) - - # set the easy defaults - version = standard.get("version", None) - if version is not None: - try: - version = int(version) - except ValueError: - return None # invalid version, ignore cookie - secure = standard.get("secure", False) - # (discard is also set if expires is Absent) - discard = standard.get("discard", False) - comment = standard.get("comment", None) - comment_url = standard.get("commenturl", None) - - # set default path - if path is not Absent and path != "": - path_specified = True - path = escape_path(path) - else: - path_specified = False - path = request_path(request) - i = path.rfind("/") - if i != -1: - if version == 0: - # Netscape spec parts company from reality here - path = path[:i] - else: - path = path[:i+1] - if len(path) == 0: path = "/" - - # set default domain - domain_specified = domain is not Absent - # but first we have to remember whether it starts with a dot - domain_initial_dot = False - if domain_specified: - domain_initial_dot = bool(domain.startswith(".")) - if domain is Absent: - req_host, erhn = eff_request_host_lc(request) - domain = erhn - elif not domain.startswith("."): - domain = "."+domain - - # set default port - port_specified = False - if port is not Absent: - if port is None: - # Port attr present, but has no value: default to request port. - # Cookie should then only be sent back on that port. - port = request_port(request) - else: - port_specified = True - port = re.sub(r"\s+", "", port) - else: - # No port attr present. Cookie can be sent back on any port. - port = None - - # set default expires and discard - if expires is Absent: - expires = None - discard = True - - return Cookie(version, - name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest) - - def _cookies_from_attrs_set(self, attrs_set, request): - cookie_tuples = self._normalized_cookie_tuples(attrs_set) - - cookies = [] - for tup in cookie_tuples: - cookie = self._cookie_from_cookie_tuple(tup, request) - if cookie: cookies.append(cookie) - return cookies - - def _process_rfc2109_cookies(self, cookies): - if self._policy.rfc2109_as_netscape is None: - rfc2109_as_netscape = not self._policy.rfc2965 - else: - rfc2109_as_netscape = self._policy.rfc2109_as_netscape - for cookie in cookies: - if cookie.version == 1: - cookie.rfc2109 = True - if rfc2109_as_netscape: - # treat 2109 cookies as Netscape cookies rather than - # as RFC2965 cookies - cookie.version = 0 - - def _make_cookies(self, response, request): - # get cookie-attributes for RFC 2965 and Netscape protocols - headers = response.info() - rfc2965_hdrs = headers.getheaders("Set-Cookie2") - ns_hdrs = headers.getheaders("Set-Cookie") - - rfc2965 = self._policy.rfc2965 - netscape = self._policy.netscape - - if ((not rfc2965_hdrs and not ns_hdrs) or - (not ns_hdrs and not rfc2965) or - (not rfc2965_hdrs and not netscape) or - (not netscape and not rfc2965)): - return [] # no relevant cookie headers: quick exit - - try: - cookies = self._cookies_from_attrs_set( - split_header_words(rfc2965_hdrs), request) - except: - reraise_unmasked_exceptions() - cookies = [] - - if ns_hdrs and netscape: - try: - # RFC 2109 and Netscape cookies - ns_cookies = self._cookies_from_attrs_set( - parse_ns_headers(ns_hdrs), request) - except: - reraise_unmasked_exceptions() - ns_cookies = [] - self._process_rfc2109_cookies(ns_cookies) - - # Look for Netscape cookies (from Set-Cookie headers) that match - # corresponding RFC 2965 cookies (from Set-Cookie2 headers). - # For each match, keep the RFC 2965 cookie and ignore the Netscape - # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are - # bundled in with the Netscape cookies for this purpose, which is - # reasonable behaviour. - if rfc2965: - lookup = {} - for cookie in cookies: - lookup[(cookie.domain, cookie.path, cookie.name)] = None - - def no_matching_rfc2965(ns_cookie, lookup=lookup): - key = ns_cookie.domain, ns_cookie.path, ns_cookie.name - return not lookup.has_key(key) - ns_cookies = filter(no_matching_rfc2965, ns_cookies) - - if ns_cookies: - cookies.extend(ns_cookies) - - return cookies - - def make_cookies(self, response, request): - """Return sequence of Cookie objects extracted from response object. - - See extract_cookies.__doc__ for the interface required of the - response and request arguments. - - """ - self._policy._now = self._now = int(time.time()) - return [cookie for cookie in self._make_cookies(response, request) - if cookie.expires is None or not cookie.expires <= self._now] - - def set_cookie_if_ok(self, cookie, request): - """Set a cookie if policy says it's OK to do so. - - cookie: mechanize.Cookie instance - request: see extract_cookies.__doc__ for the required interface - - """ - self._policy._now = self._now = int(time.time()) - - if self._policy.set_ok(cookie, request): - self.set_cookie(cookie) - - def set_cookie(self, cookie): - """Set a cookie, without checking whether or not it should be set. - - cookie: mechanize.Cookie instance - """ - c = self._cookies - if not c.has_key(cookie.domain): c[cookie.domain] = {} - c2 = c[cookie.domain] - if not c2.has_key(cookie.path): c2[cookie.path] = {} - c3 = c2[cookie.path] - c3[cookie.name] = cookie - - def extract_cookies(self, response, request): - """Extract cookies from response, where allowable given the request. - - Look for allowable Set-Cookie: and Set-Cookie2: headers in the response - object passed as argument. Any of these headers that are found are - used to update the state of the object (subject to the policy.set_ok - method's approval). - - The response object (usually be the result of a call to - mechanize.urlopen, or similar) should support an info method, which - returns a mimetools.Message object (in fact, the 'mimetools.Message - object' may be any object that provides a getheaders method). - - The request object (usually a mechanize.Request instance) must support - the methods get_full_url, get_type, get_host, and is_unverifiable, as - documented by mechanize, and the port attribute (the port number). The - request is used to set default values for cookie-attributes as well as - for checking that the cookie is OK to be set. - - """ - debug("extract_cookies: %s", response.info()) - self._policy._now = self._now = int(time.time()) - - for cookie in self._make_cookies(response, request): - if cookie.expires is not None and cookie.expires <= self._now: - # Expiry date in past is request to delete cookie. This can't be - # in DefaultCookiePolicy, because can't delete cookies there. - try: - self.clear(cookie.domain, cookie.path, cookie.name) - except KeyError: - pass - debug("Expiring cookie, domain='%s', path='%s', name='%s'", - cookie.domain, cookie.path, cookie.name) - elif self._policy.set_ok(cookie, request): - debug(" setting cookie: %s", cookie) - self.set_cookie(cookie) - - def clear(self, domain=None, path=None, name=None): - """Clear some cookies. - - Invoking this method without arguments will clear all cookies. If - given a single argument, only cookies belonging to that domain will be - removed. If given two arguments, cookies belonging to the specified - path within that domain are removed. If given three arguments, then - the cookie with the specified name, path and domain is removed. - - Raises KeyError if no matching cookie exists. - - """ - if name is not None: - if (domain is None) or (path is None): - raise ValueError( - "domain and path must be given to remove a cookie by name") - del self._cookies[domain][path][name] - elif path is not None: - if domain is None: - raise ValueError( - "domain must be given to remove cookies by path") - del self._cookies[domain][path] - elif domain is not None: - del self._cookies[domain] - else: - self._cookies = {} - - def clear_session_cookies(self): - """Discard all session cookies. - - Discards all cookies held by object which had either no Max-Age or - Expires cookie-attribute or an explicit Discard cookie-attribute, or - which otherwise have ended up with a true discard attribute. For - interactive browsers, the end of a session usually corresponds to - closing the browser window. - - Note that the save method won't save session cookies anyway, unless you - ask otherwise by passing a true ignore_discard argument. - - """ - for cookie in self: - if cookie.discard: - self.clear(cookie.domain, cookie.path, cookie.name) - - def clear_expired_cookies(self): - """Discard all expired cookies. - - You probably don't need to call this method: expired cookies are never - sent back to the server (provided you're using DefaultCookiePolicy), - this method is called by CookieJar itself every so often, and the save - method won't save expired cookies anyway (unless you ask otherwise by - passing a true ignore_expires argument). - - """ - now = time.time() - for cookie in self: - if cookie.is_expired(now): - self.clear(cookie.domain, cookie.path, cookie.name) - - def __getitem__(self, i): - if i == 0: - self._getitem_iterator = self.__iter__() - elif self._prev_getitem_index != i-1: raise IndexError( - "CookieJar.__getitem__ only supports sequential iteration") - self._prev_getitem_index = i - try: - return self._getitem_iterator.next() - except StopIteration: - raise IndexError() - - def __iter__(self): - return MappingIterator(self._cookies) - - def __len__(self): - """Return number of contained cookies.""" - i = 0 - for cookie in self: i = i + 1 - return i - - def __repr__(self): - r = [] - for cookie in self: r.append(repr(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - def __str__(self): - r = [] - for cookie in self: r.append(str(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - -class LoadError(Exception): pass - -class FileCookieJar(CookieJar): - """CookieJar that can be loaded from and saved to a file. - - Additional methods - - save(filename=None, ignore_discard=False, ignore_expires=False) - load(filename=None, ignore_discard=False, ignore_expires=False) - revert(filename=None, ignore_discard=False, ignore_expires=False) - - Additional public attributes - - filename: filename for loading and saving cookies - - Additional public readable attributes - - delayload: request that cookies are lazily loaded from disk; this is only - a hint since this only affects performance, not behaviour (unless the - cookies on disk are changing); a CookieJar object may ignore it (in fact, - only MSIECookieJar lazily loads cookies at the moment) - - """ - - def __init__(self, filename=None, delayload=False, policy=None): - """ - See FileCookieJar.__doc__ for argument documentation. - - Cookies are NOT loaded from the named file until either the load or - revert method is called. - - """ - CookieJar.__init__(self, policy) - if filename is not None and not isstringlike(filename): - raise ValueError("filename must be string-like") - self.filename = filename - self.delayload = bool(delayload) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """Save cookies to a file. - - filename: name of file in which to save cookies - ignore_discard: save even cookies set to be discarded - ignore_expires: save even cookies that have expired - - The file is overwritten if it already exists, thus wiping all its - cookies. Saved cookies can be restored later using the load or revert - methods. If filename is not specified, self.filename is used; if - self.filename is None, ValueError is raised. - - """ - raise NotImplementedError() - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file. - - Old cookies are kept unless overwritten by newly loaded ones. - - Arguments are as for .save(). - - If filename is not specified, self.filename is used; if self.filename - is None, ValueError is raised. The named file must be in the format - understood by the class, or LoadError will be raised. This format will - be identical to that written by the save method, unless the load format - is not sufficiently well understood (as is the case for MSIECookieJar). - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename) - try: - self._really_load(f, filename, ignore_discard, ignore_expires) - finally: - f.close() - - def revert(self, filename=None, - ignore_discard=False, ignore_expires=False): - """Clear all cookies and reload cookies from a saved file. - - Raises LoadError (or IOError) if reversion is not successful; the - object's state will not be altered if this happens. - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - old_state = copy.deepcopy(self._cookies) - self._cookies = {} - try: - self.load(filename, ignore_discard, ignore_expires) - except (LoadError, IOError): - self._cookies = old_state - raise diff --git a/plugin.video.alfa/lib/mechanize/_debug.py b/plugin.video.alfa/lib/mechanize/_debug.py deleted file mode 100755 index c17a06ce..00000000 --- a/plugin.video.alfa/lib/mechanize/_debug.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging - -from _response import response_seek_wrapper -from _urllib2_fork import BaseHandler - - -class HTTPResponseDebugProcessor(BaseHandler): - handler_order = 900 # before redirections, after everything else - - def http_response(self, request, response): - if not hasattr(response, "seek"): - response = response_seek_wrapper(response) - info = logging.getLogger("mechanize.http_responses").info - try: - info(response.read()) - finally: - response.seek(0) - info("*****************************************************") - return response - - https_response = http_response - -class HTTPRedirectDebugProcessor(BaseHandler): - def http_request(self, request): - if hasattr(request, "redirect_dict"): - info = logging.getLogger("mechanize.http_redirects").info - info("redirecting to %s", request.get_full_url()) - return request diff --git a/plugin.video.alfa/lib/mechanize/_firefox3cookiejar.py b/plugin.video.alfa/lib/mechanize/_firefox3cookiejar.py deleted file mode 100755 index 83fcd21a..00000000 --- a/plugin.video.alfa/lib/mechanize/_firefox3cookiejar.py +++ /dev/null @@ -1,248 +0,0 @@ -"""Firefox 3 "cookies.sqlite" cookie persistence. - -Copyright 2008 John J Lee - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -import logging -import time - -from _clientcookie import CookieJar, Cookie, MappingIterator -from _util import isstringlike, experimental -debug = logging.getLogger("mechanize.cookies").debug - - -class Firefox3CookieJar(CookieJar): - - """Firefox 3 cookie jar. - - The cookies are stored in Firefox 3's "cookies.sqlite" format. - - Constructor arguments: - - filename: filename of cookies.sqlite (typically found at the top level - of a firefox profile directory) - autoconnect: as a convenience, connect to the SQLite cookies database at - Firefox3CookieJar construction time (default True) - policy: an object satisfying the mechanize.CookiePolicy interface - - Note that this is NOT a FileCookieJar, and there are no .load(), - .save() or .restore() methods. The database is in sync with the - cookiejar object's state after each public method call. - - Following Firefox's own behaviour, session cookies are never saved to - the database. - - The file is created, and an sqlite database written to it, if it does - not already exist. The moz_cookies database table is created if it does - not already exist. - """ - - # XXX - # handle DatabaseError exceptions - # add a FileCookieJar (explicit .save() / .revert() / .load() methods) - - def __init__(self, filename, autoconnect=True, policy=None): - experimental("Firefox3CookieJar is experimental code") - CookieJar.__init__(self, policy) - if filename is not None and not isstringlike(filename): - raise ValueError("filename must be string-like") - self.filename = filename - self._conn = None - if autoconnect: - self.connect() - - def connect(self): - import sqlite3 # not available in Python 2.4 stdlib - self._conn = sqlite3.connect(self.filename) - self._conn.isolation_level = "DEFERRED" - self._create_table_if_necessary() - - def close(self): - self._conn.close() - - def _transaction(self, func): - try: - cur = self._conn.cursor() - try: - result = func(cur) - finally: - cur.close() - except: - self._conn.rollback() - raise - else: - self._conn.commit() - return result - - def _execute(self, query, params=()): - return self._transaction(lambda cur: cur.execute(query, params)) - - def _query(self, query, params=()): - # XXX should we bother with a transaction? - cur = self._conn.cursor() - try: - cur.execute(query, params) - return cur.fetchall() - finally: - cur.close() - - def _create_table_if_necessary(self): - self._execute("""\ -CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT, - value TEXT, host TEXT, path TEXT,expiry INTEGER, - lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""") - - def _cookie_from_row(self, row): - (pk, name, value, domain, path, expires, - last_accessed, secure, http_only) = row - - version = 0 - domain = domain.encode("ascii", "ignore") - path = path.encode("ascii", "ignore") - name = name.encode("ascii", "ignore") - value = value.encode("ascii", "ignore") - secure = bool(secure) - - # last_accessed isn't a cookie attribute, so isn't added to rest - rest = {} - if http_only: - rest["HttpOnly"] = None - - if name == "": - name = value - value = None - - initial_dot = domain.startswith(".") - domain_specified = initial_dot - - discard = False - if expires == "": - expires = None - discard = True - - return Cookie(version, name, value, - None, False, - domain, domain_specified, initial_dot, - path, False, - secure, - expires, - discard, - None, - None, - rest) - - def clear(self, domain=None, path=None, name=None): - CookieJar.clear(self, domain, path, name) - where_parts = [] - sql_params = [] - if domain is not None: - where_parts.append("host = ?") - sql_params.append(domain) - if path is not None: - where_parts.append("path = ?") - sql_params.append(path) - if name is not None: - where_parts.append("name = ?") - sql_params.append(name) - where = " AND ".join(where_parts) - if where: - where = " WHERE " + where - def clear(cur): - cur.execute("DELETE FROM moz_cookies%s" % where, - tuple(sql_params)) - self._transaction(clear) - - def _row_from_cookie(self, cookie, cur): - expires = cookie.expires - if cookie.discard: - expires = "" - - domain = unicode(cookie.domain) - path = unicode(cookie.path) - name = unicode(cookie.name) - value = unicode(cookie.value) - secure = bool(int(cookie.secure)) - - if value is None: - value = name - name = "" - - last_accessed = int(time.time()) - http_only = cookie.has_nonstandard_attr("HttpOnly") - - query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""") - pk = query.fetchone()[0] - if pk is None: - pk = 1 - - return (pk, name, value, domain, path, expires, - last_accessed, secure, http_only) - - def set_cookie(self, cookie): - if cookie.discard: - CookieJar.set_cookie(self, cookie) - return - - def set_cookie(cur): - # XXX - # is this RFC 2965-correct? - # could this do an UPDATE instead? - row = self._row_from_cookie(cookie, cur) - name, unused, domain, path = row[1:5] - cur.execute("""\ -DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""", - (domain, path, name)) - cur.execute("""\ -INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) -""", row) - self._transaction(set_cookie) - - def __iter__(self): - # session (non-persistent) cookies - for cookie in MappingIterator(self._cookies): - yield cookie - # persistent cookies - for row in self._query("""\ -SELECT * FROM moz_cookies ORDER BY name, path, host"""): - yield self._cookie_from_row(row) - - def _cookies_for_request(self, request): - session_cookies = CookieJar._cookies_for_request(self, request) - def get_cookies(cur): - query = cur.execute("SELECT host from moz_cookies") - domains = [row[0] for row in query.fetchall()] - cookies = [] - for domain in domains: - cookies += self._persistent_cookies_for_domain(domain, - request, cur) - return cookies - persistent_coookies = self._transaction(get_cookies) - return session_cookies + persistent_coookies - - def _persistent_cookies_for_domain(self, domain, request, cur): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - debug("Checking %s for cookies to return", domain) - query = cur.execute("""\ -SELECT * from moz_cookies WHERE host = ? ORDER BY path""", - (domain,)) - cookies = [self._cookie_from_row(row) for row in query.fetchall()] - last_path = None - r = [] - for cookie in cookies: - if (cookie.path != last_path and - not self._policy.path_return_ok(cookie.path, request)): - last_path = cookie.path - continue - if not self._policy.return_ok(cookie, request): - debug(" not returning cookie") - continue - debug(" it's a match") - r.append(cookie) - return r diff --git a/plugin.video.alfa/lib/mechanize/_form.py b/plugin.video.alfa/lib/mechanize/_form.py deleted file mode 100755 index ed2b13b4..00000000 --- a/plugin.video.alfa/lib/mechanize/_form.py +++ /dev/null @@ -1,3280 +0,0 @@ -"""HTML form handling for web clients. - -HTML form handling for web clients: useful for parsing HTML forms, filling them -in and returning the completed forms to the server. This code developed from a -port of Gisle Aas' Perl module HTML::Form, from the libwww-perl library, but -the interface is not the same. - -The most useful docstring is the one for HTMLForm. - -RFC 1866: HTML 2.0 -RFC 1867: Form-based File Upload in HTML -RFC 2388: Returning Values from Forms: multipart/form-data -HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX) -HTML 4.01 Specification, W3C Recommendation 24 December 1999 - - -Copyright 2002-2007 John J. Lee -Copyright 2005 Gary Poster -Copyright 2005 Zope Corporation -Copyright 1998-2000 Gisle Aas. - -This code is free software; you can redistribute it and/or modify it -under the terms of the BSD or ZPL 2.1 licenses (see the file -COPYING.txt included with the distribution). - -""" - -# TODO: -# Clean up post the merge into mechanize -# * Remove code that was duplicated in ClientForm and mechanize -# * Remove weird import stuff -# * Remove pre-Python 2.4 compatibility cruft -# * Clean up tests -# * Later release: Remove the ClientForm 0.1 backwards-compatibility switch -# Remove parser testing hack -# Clean action URI -# Switch to unicode throughout -# See Wichert Akkerman's 2004-01-22 message to c.l.py. -# Apply recommendations from google code project CURLIES -# Apply recommendations from HTML 5 spec -# Add charset parameter to Content-type headers? How to find value?? -# Functional tests to add: -# Single and multiple file upload -# File upload with missing name (check standards) -# mailto: submission & enctype text/plain?? - -# Replace by_label etc. with moniker / selector concept. Allows, e.g., a -# choice between selection by value / id / label / element contents. Or -# choice between matching labels exactly or by substring. etc. - - -__all__ = ['AmbiguityError', 'CheckboxControl', 'Control', - 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm', - 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl', - 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label', - 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile', - 'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl', - 'RadioControl', 'ScalarControl', 'SelectControl', - 'SubmitButtonControl', 'SubmitControl', 'TextControl', - 'TextareaControl', 'XHTMLCompatibleFormParser'] - -import HTMLParser -from cStringIO import StringIO -import inspect -import logging -import random -import re -import sys -import urllib -import urlparse -import warnings - -import _beautifulsoup -import _request - -# from Python itself, for backwards compatibility of raised exceptions -import sgmllib -# bundled copy of sgmllib -import _sgmllib_copy - - -VERSION = "0.2.11" - -CHUNK = 1024 # size of chunks fed to parser, in bytes - -DEFAULT_ENCODING = "latin-1" - -_logger = logging.getLogger("mechanize.forms") -OPTIMIZATION_HACK = True - -def debug(msg, *args, **kwds): - if OPTIMIZATION_HACK: - return - - caller_name = inspect.stack()[1][3] - extended_msg = '%%s %s' % msg - extended_args = (caller_name,)+args - _logger.debug(extended_msg, *extended_args, **kwds) - -def _show_debug_messages(): - global OPTIMIZATION_HACK - OPTIMIZATION_HACK = False - _logger.setLevel(logging.DEBUG) - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.DEBUG) - _logger.addHandler(handler) - - -def deprecation(message, stack_offset=0): - warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset) - - -class Missing: pass - -_compress_re = re.compile(r"\s+") -def compress_text(text): return _compress_re.sub(" ", text.strip()) - -def normalize_line_endings(text): - return re.sub(r"(?:(? - w = MimeWriter(f) - ...call w.addheader(key, value) 0 or more times... - - followed by either: - - f = w.startbody(content_type) - ...call f.write(data) for body data... - - or: - - w.startmultipartbody(subtype) - for each part: - subwriter = w.nextpart() - ...use the subwriter's methods to create the subpart... - w.lastpart() - - The subwriter is another MimeWriter instance, and should be - treated in the same way as the toplevel MimeWriter. This way, - writing recursive body parts is easy. - - Warning: don't forget to call lastpart()! - - XXX There should be more state so calls made in the wrong order - are detected. - - Some special cases: - - - startbody() just returns the file passed to the constructor; - but don't use this knowledge, as it may be changed. - - - startmultipartbody() actually returns a file as well; - this can be used to write the initial 'if you can read this your - mailer is not MIME-aware' message. - - - If you call flushheaders(), the headers accumulated so far are - written out (and forgotten); this is useful if you don't need a - body part at all, e.g. for a subpart of type message/rfc822 - that's (mis)used to store some header-like information. - - - Passing a keyword argument 'prefix=' to addheader(), - start*body() affects where the header is inserted; 0 means - append at the end, 1 means insert at the start; default is - append for addheader(), but insert for start*body(), which use - it to determine where the Content-type header goes. - - """ - - def __init__(self, fp, http_hdrs=None): - self._http_hdrs = http_hdrs - self._fp = fp - self._headers = [] - self._boundary = [] - self._first_part = True - - def addheader(self, key, value, prefix=0, - add_to_http_hdrs=0): - """ - prefix is ignored if add_to_http_hdrs is true. - """ - lines = value.split("\r\n") - while lines and not lines[-1]: del lines[-1] - while lines and not lines[0]: del lines[0] - if add_to_http_hdrs: - value = "".join(lines) - # 2.2 urllib2 doesn't normalize header case - self._http_hdrs.append((key.capitalize(), value)) - else: - for i in range(1, len(lines)): - lines[i] = " " + lines[i].strip() - value = "\r\n".join(lines) + "\r\n" - line = key.title() + ": " + value - if prefix: - self._headers.insert(0, line) - else: - self._headers.append(line) - - def flushheaders(self): - self._fp.writelines(self._headers) - self._headers = [] - - def startbody(self, ctype=None, plist=[], prefix=1, - add_to_http_hdrs=0, content_type=1): - """ - prefix is ignored if add_to_http_hdrs is true. - """ - if content_type and ctype: - for name, value in plist: - ctype = ctype + ';\r\n %s=%s' % (name, value) - self.addheader("Content-Type", ctype, prefix=prefix, - add_to_http_hdrs=add_to_http_hdrs) - self.flushheaders() - if not add_to_http_hdrs: self._fp.write("\r\n") - self._first_part = True - return self._fp - - def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1, - add_to_http_hdrs=0, content_type=1): - boundary = boundary or choose_boundary() - self._boundary.append(boundary) - return self.startbody("multipart/" + subtype, - [("boundary", boundary)] + plist, - prefix=prefix, - add_to_http_hdrs=add_to_http_hdrs, - content_type=content_type) - - def nextpart(self): - boundary = self._boundary[-1] - if self._first_part: - self._first_part = False - else: - self._fp.write("\r\n") - self._fp.write("--" + boundary + "\r\n") - return self.__class__(self._fp) - - def lastpart(self): - if self._first_part: - self.nextpart() - boundary = self._boundary.pop() - self._fp.write("\r\n--" + boundary + "--\r\n") - - -class LocateError(ValueError): pass -class AmbiguityError(LocateError): pass -class ControlNotFoundError(LocateError): pass -class ItemNotFoundError(LocateError): pass - -class ItemCountError(ValueError): pass - -# for backwards compatibility, ParseError derives from exceptions that were -# raised by versions of ClientForm <= 0.2.5 -# TODO: move to _html -class ParseError(sgmllib.SGMLParseError, - HTMLParser.HTMLParseError): - - def __init__(self, *args, **kwds): - Exception.__init__(self, *args, **kwds) - - def __str__(self): - return Exception.__str__(self) - - -class _AbstractFormParser: - """forms attribute contains HTMLForm instances on completion.""" - # thanks to Moshe Zadka for an example of sgmllib/htmllib usage - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - if entitydefs is None: - entitydefs = get_entitydefs() - self._entitydefs = entitydefs - self._encoding = encoding - - self.base = None - self.forms = [] - self.labels = [] - self._current_label = None - self._current_form = None - self._select = None - self._optgroup = None - self._option = None - self._textarea = None - - # forms[0] will contain all controls that are outside of any form - # self._global_form is an alias for self.forms[0] - self._global_form = None - self.start_form([]) - self.end_form() - self._current_form = self._global_form = self.forms[0] - - def do_base(self, attrs): - debug("%s", attrs) - for key, value in attrs: - if key == "href": - self.base = self.unescape_attr_if_required(value) - - def end_body(self): - debug("") - if self._current_label is not None: - self.end_label() - if self._current_form is not self._global_form: - self.end_form() - - def start_form(self, attrs): - debug("%s", attrs) - if self._current_form is not self._global_form: - raise ParseError("nested FORMs") - name = None - action = None - enctype = "application/x-www-form-urlencoded" - method = "GET" - d = {} - for key, value in attrs: - if key == "name": - name = self.unescape_attr_if_required(value) - elif key == "action": - action = self.unescape_attr_if_required(value) - elif key == "method": - method = self.unescape_attr_if_required(value.upper()) - elif key == "enctype": - enctype = self.unescape_attr_if_required(value.lower()) - d[key] = self.unescape_attr_if_required(value) - controls = [] - self._current_form = (name, action, method, enctype), d, controls - - def end_form(self): - debug("") - if self._current_label is not None: - self.end_label() - if self._current_form is self._global_form: - raise ParseError("end of FORM before start") - self.forms.append(self._current_form) - self._current_form = self._global_form - - def start_select(self, attrs): - debug("%s", attrs) - if self._select is not None: - raise ParseError("nested SELECTs") - if self._textarea is not None: - raise ParseError("SELECT inside TEXTAREA") - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - - self._select = d - self._add_label(d) - - self._append_select_control({"__select": d}) - - def end_select(self): - debug("") - if self._select is None: - raise ParseError("end of SELECT before start") - - if self._option is not None: - self._end_option() - - self._select = None - - def start_optgroup(self, attrs): - debug("%s", attrs) - if self._select is None: - raise ParseError("OPTGROUP outside of SELECT") - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - - self._optgroup = d - - def end_optgroup(self): - debug("") - if self._optgroup is None: - raise ParseError("end of OPTGROUP before start") - self._optgroup = None - - def _start_option(self, attrs): - debug("%s", attrs) - if self._select is None: - raise ParseError("OPTION outside of SELECT") - if self._option is not None: - self._end_option() - - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - - self._option = {} - self._option.update(d) - if (self._optgroup and self._optgroup.has_key("disabled") and - not self._option.has_key("disabled")): - self._option["disabled"] = None - - def _end_option(self): - debug("") - if self._option is None: - raise ParseError("end of OPTION before start") - - contents = self._option.get("contents", "").strip() - self._option["contents"] = contents - if not self._option.has_key("value"): - self._option["value"] = contents - if not self._option.has_key("label"): - self._option["label"] = contents - # stuff dict of SELECT HTML attrs into a special private key - # (gets deleted again later) - self._option["__select"] = self._select - self._append_select_control(self._option) - self._option = None - - def _append_select_control(self, attrs): - debug("%s", attrs) - controls = self._current_form[2] - name = self._select.get("name") - controls.append(("select", name, attrs)) - - def start_textarea(self, attrs): - debug("%s", attrs) - if self._textarea is not None: - raise ParseError("nested TEXTAREAs") - if self._select is not None: - raise ParseError("TEXTAREA inside SELECT") - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - self._add_label(d) - - self._textarea = d - - def end_textarea(self): - debug("") - if self._textarea is None: - raise ParseError("end of TEXTAREA before start") - controls = self._current_form[2] - name = self._textarea.get("name") - controls.append(("textarea", name, self._textarea)) - self._textarea = None - - def start_label(self, attrs): - debug("%s", attrs) - if self._current_label: - self.end_label() - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - taken = bool(d.get("for")) # empty id is invalid - d["__text"] = "" - d["__taken"] = taken - if taken: - self.labels.append(d) - self._current_label = d - - def end_label(self): - debug("") - label = self._current_label - if label is None: - # something is ugly in the HTML, but we're ignoring it - return - self._current_label = None - # if it is staying around, it is True in all cases - del label["__taken"] - - def _add_label(self, d): - #debug("%s", d) - if self._current_label is not None: - if not self._current_label["__taken"]: - self._current_label["__taken"] = True - d["__label"] = self._current_label - - def handle_data(self, data): - debug("%s", data) - - if self._option is not None: - # self._option is a dictionary of the OPTION element's HTML - # attributes, but it has two special keys, one of which is the - # special "contents" key contains text between OPTION tags (the - # other is the "__select" key: see the end_option method) - map = self._option - key = "contents" - elif self._textarea is not None: - map = self._textarea - key = "value" - data = normalize_line_endings(data) - # not if within option or textarea - elif self._current_label is not None: - map = self._current_label - key = "__text" - else: - return - - if data and not map.has_key(key): - # according to - # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break - # immediately after start tags or immediately before end tags must - # be ignored, but real browsers only ignore a line break after a - # start tag, so we'll do that. - if data[0:2] == "\r\n": - data = data[2:] - elif data[0:1] in ["\n", "\r"]: - data = data[1:] - map[key] = data - else: - map[key] = map[key] + data - - def do_button(self, attrs): - debug("%s", attrs) - d = {} - d["type"] = "submit" # default - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - controls = self._current_form[2] - - type = d["type"] - name = d.get("name") - # we don't want to lose information, so use a type string that - # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON} - # e.g. type for BUTTON/RESET is "resetbutton" - # (type for INPUT/RESET is "reset") - type = type+"button" - self._add_label(d) - controls.append((type, name, d)) - - def do_input(self, attrs): - debug("%s", attrs) - d = {} - d["type"] = "text" # default - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - controls = self._current_form[2] - - type = d["type"] - name = d.get("name") - self._add_label(d) - controls.append((type, name, d)) - - def do_isindex(self, attrs): - debug("%s", attrs) - d = {} - for key, val in attrs: - d[key] = self.unescape_attr_if_required(val) - controls = self._current_form[2] - - self._add_label(d) - # isindex doesn't have type or name HTML attributes - controls.append(("isindex", None, d)) - - def handle_entityref(self, name): - #debug("%s", name) - self.handle_data(unescape( - '&%s;' % name, self._entitydefs, self._encoding)) - - def handle_charref(self, name): - #debug("%s", name) - self.handle_data(unescape_charref(name, self._encoding)) - - def unescape_attr(self, name): - #debug("%s", name) - return unescape(name, self._entitydefs, self._encoding) - - def unescape_attrs(self, attrs): - #debug("%s", attrs) - escaped_attrs = {} - for key, val in attrs.items(): - try: - val.items - except AttributeError: - escaped_attrs[key] = self.unescape_attr(val) - else: - # e.g. "__select" -- yuck! - escaped_attrs[key] = self.unescape_attrs(val) - return escaped_attrs - - def unknown_entityref(self, ref): self.handle_data("&%s;" % ref) - def unknown_charref(self, ref): self.handle_data("&#%s;" % ref) - - -class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser): - """Good for XHTML, bad for tolerance of incorrect HTML.""" - # thanks to Michael Howitz for this! - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - HTMLParser.HTMLParser.__init__(self) - _AbstractFormParser.__init__(self, entitydefs, encoding) - - def feed(self, data): - try: - HTMLParser.HTMLParser.feed(self, data) - except HTMLParser.HTMLParseError, exc: - raise ParseError(exc) - - def start_option(self, attrs): - _AbstractFormParser._start_option(self, attrs) - - def end_option(self): - _AbstractFormParser._end_option(self) - - def handle_starttag(self, tag, attrs): - try: - method = getattr(self, "start_" + tag) - except AttributeError: - try: - method = getattr(self, "do_" + tag) - except AttributeError: - pass # unknown tag - else: - method(attrs) - else: - method(attrs) - - def handle_endtag(self, tag): - try: - method = getattr(self, "end_" + tag) - except AttributeError: - pass # unknown tag - else: - method() - - def unescape(self, name): - # Use the entitydefs passed into constructor, not - # HTMLParser.HTMLParser's entitydefs. - return self.unescape_attr(name) - - def unescape_attr_if_required(self, name): - return name # HTMLParser.HTMLParser already did it - def unescape_attrs_if_required(self, attrs): - return attrs # ditto - - def close(self): - HTMLParser.HTMLParser.close(self) - self.end_body() - - -class _AbstractSgmllibParser(_AbstractFormParser): - - def do_option(self, attrs): - _AbstractFormParser._start_option(self, attrs) - - # we override this attr to decode hex charrefs - entity_or_charref = re.compile( - '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)') - def convert_entityref(self, name): - return unescape("&%s;" % name, self._entitydefs, self._encoding) - def convert_charref(self, name): - return unescape_charref("%s" % name, self._encoding) - def unescape_attr_if_required(self, name): - return name # sgmllib already did it - def unescape_attrs_if_required(self, attrs): - return attrs # ditto - - -class FormParser(_AbstractSgmllibParser, _sgmllib_copy.SGMLParser): - """Good for tolerance of incorrect HTML, bad for XHTML.""" - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - _sgmllib_copy.SGMLParser.__init__(self) - _AbstractFormParser.__init__(self, entitydefs, encoding) - - def feed(self, data): - try: - _sgmllib_copy.SGMLParser.feed(self, data) - except _sgmllib_copy.SGMLParseError, exc: - raise ParseError(exc) - - def close(self): - _sgmllib_copy.SGMLParser.close(self) - self.end_body() - - -class _AbstractBSFormParser(_AbstractSgmllibParser): - - bs_base_class = None - - def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): - _AbstractFormParser.__init__(self, entitydefs, encoding) - self.bs_base_class.__init__(self) - - def handle_data(self, data): - _AbstractFormParser.handle_data(self, data) - self.bs_base_class.handle_data(self, data) - - def feed(self, data): - try: - self.bs_base_class.feed(self, data) - except _sgmllib_copy.SGMLParseError, exc: - raise ParseError(exc) - - def close(self): - self.bs_base_class.close(self) - self.end_body() - - -class RobustFormParser(_AbstractBSFormParser, _beautifulsoup.BeautifulSoup): - - """Tries to be highly tolerant of incorrect HTML.""" - - bs_base_class = _beautifulsoup.BeautifulSoup - - -class NestingRobustFormParser(_AbstractBSFormParser, - _beautifulsoup.ICantBelieveItsBeautifulSoup): - - """Tries to be highly tolerant of incorrect HTML. - - Different from RobustFormParser in that it more often guesses nesting - above missing end tags (see BeautifulSoup docs). - """ - - bs_base_class = _beautifulsoup.ICantBelieveItsBeautifulSoup - - -#FormParser = XHTMLCompatibleFormParser # testing hack -#FormParser = RobustFormParser # testing hack - - -def ParseResponseEx(response, - select_default=False, - form_parser_class=FormParser, - request_class=_request.Request, - entitydefs=None, - encoding=DEFAULT_ENCODING, - - # private - _urljoin=urlparse.urljoin, - _urlparse=urlparse.urlparse, - _urlunparse=urlparse.urlunparse, - ): - """Identical to ParseResponse, except that: - - 1. The returned list contains an extra item. The first form in the list - contains all controls not contained in any FORM element. - - 2. The arguments ignore_errors and backwards_compat have been removed. - - 3. Backwards-compatibility mode (backwards_compat=True) is not available. - """ - return _ParseFileEx(response, response.geturl(), - select_default, - False, - form_parser_class, - request_class, - entitydefs, - False, - encoding, - _urljoin=_urljoin, - _urlparse=_urlparse, - _urlunparse=_urlunparse, - ) - -def ParseFileEx(file, base_uri, - select_default=False, - form_parser_class=FormParser, - request_class=_request.Request, - entitydefs=None, - encoding=DEFAULT_ENCODING, - - # private - _urljoin=urlparse.urljoin, - _urlparse=urlparse.urlparse, - _urlunparse=urlparse.urlunparse, - ): - """Identical to ParseFile, except that: - - 1. The returned list contains an extra item. The first form in the list - contains all controls not contained in any FORM element. - - 2. The arguments ignore_errors and backwards_compat have been removed. - - 3. Backwards-compatibility mode (backwards_compat=True) is not available. - """ - return _ParseFileEx(file, base_uri, - select_default, - False, - form_parser_class, - request_class, - entitydefs, - False, - encoding, - _urljoin=_urljoin, - _urlparse=_urlparse, - _urlunparse=_urlunparse, - ) - -def ParseString(text, base_uri, *args, **kwds): - fh = StringIO(text) - return ParseFileEx(fh, base_uri, *args, **kwds) - -def ParseResponse(response, *args, **kwds): - """Parse HTTP response and return a list of HTMLForm instances. - - The return value of mechanize.urlopen can be conveniently passed to this - function as the response parameter. - - mechanize.ParseError is raised on parse errors. - - response: file-like object (supporting read() method) with a method - geturl(), returning the URI of the HTTP response - select_default: for multiple-selection SELECT controls and RADIO controls, - pick the first item as the default if none are selected in the HTML - form_parser_class: class to instantiate and use to pass - request_class: class to return from .click() method (default is - mechanize.Request) - entitydefs: mapping like {"&": "&", ...} containing HTML entity - definitions (a sensible default is used) - encoding: character encoding used for encoding numeric character references - when matching link text. mechanize does not attempt to find the encoding - in a META HTTP-EQUIV attribute in the document itself (mechanize, for - example, does do that and will pass the correct value to mechanize using - this parameter). - - backwards_compat: boolean that determines whether the returned HTMLForm - objects are backwards-compatible with old code. If backwards_compat is - true: - - - ClientForm 0.1 code will continue to work as before. - - - Label searches that do not specify a nr (number or count) will always - get the first match, even if other controls match. If - backwards_compat is False, label searches that have ambiguous results - will raise an AmbiguityError. - - - Item label matching is done by strict string comparison rather than - substring matching. - - - De-selecting individual list items is allowed even if the Item is - disabled. - - The backwards_compat argument will be removed in a future release. - - Pass a true value for select_default if you want the behaviour specified by - RFC 1866 (the HTML 2.0 standard), which is to select the first item in a - RADIO or multiple-selection SELECT control if none were selected in the - HTML. Most browsers (including Microsoft Internet Explorer (IE) and - Netscape Navigator) instead leave all items unselected in these cases. The - W3C HTML 4.0 standard leaves this behaviour undefined in the case of - multiple-selection SELECT controls, but insists that at least one RADIO - button should be checked at all times, in contradiction to browser - behaviour. - - There is a choice of parsers. mechanize.XHTMLCompatibleFormParser (uses - HTMLParser.HTMLParser) works best for XHTML, mechanize.FormParser (uses - bundled copy of sgmllib.SGMLParser) (the default) works better for ordinary - grubby HTML. Note that HTMLParser is only available in Python 2.2 and - later. You can pass your own class in here as a hack to work around bad - HTML, but at your own risk: there is no well-defined interface. - - """ - return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:] - -def ParseFile(file, base_uri, *args, **kwds): - """Parse HTML and return a list of HTMLForm instances. - - mechanize.ParseError is raised on parse errors. - - file: file-like object (supporting read() method) containing HTML with zero - or more forms to be parsed - base_uri: the URI of the document (note that the base URI used to submit - the form will be that given in the BASE element if present, not that of - the document) - - For the other arguments and further details, see ParseResponse.__doc__. - - """ - return _ParseFileEx(file, base_uri, *args, **kwds)[1:] - -def _ParseFileEx(file, base_uri, - select_default=False, - ignore_errors=False, - form_parser_class=FormParser, - request_class=_request.Request, - entitydefs=None, - backwards_compat=True, - encoding=DEFAULT_ENCODING, - _urljoin=urlparse.urljoin, - _urlparse=urlparse.urlparse, - _urlunparse=urlparse.urlunparse, - ): - if backwards_compat: - deprecation("operating in backwards-compatibility mode", 1) - fp = form_parser_class(entitydefs, encoding) - while 1: - data = file.read(CHUNK) - try: - fp.feed(data) - except ParseError, e: - e.base_uri = base_uri - raise - if len(data) != CHUNK: break - fp.close() - if fp.base is not None: - # HTML BASE element takes precedence over document URI - base_uri = fp.base - labels = [] # Label(label) for label in fp.labels] - id_to_labels = {} - for l in fp.labels: - label = Label(l) - labels.append(label) - for_id = l["for"] - coll = id_to_labels.get(for_id) - if coll is None: - id_to_labels[for_id] = [label] - else: - coll.append(label) - forms = [] - for (name, action, method, enctype), attrs, controls in fp.forms: - if action is None: - action = base_uri - else: - action = _urljoin(base_uri, action) - # would be nice to make HTMLForm class (form builder) pluggable - form = HTMLForm( - action, method, enctype, name, attrs, request_class, - forms, labels, id_to_labels, backwards_compat) - form._urlparse = _urlparse - form._urlunparse = _urlunparse - for ii in range(len(controls)): - type, name, attrs = controls[ii] - # index=ii*10 allows ImageControl to return multiple ordered pairs - form.new_control( - type, name, attrs, select_default=select_default, index=ii*10) - forms.append(form) - for form in forms: - form.fixup() - return forms - - -class Label: - def __init__(self, attrs): - self.id = attrs.get("for") - self._text = attrs.get("__text").strip() - self._ctext = compress_text(self._text) - self.attrs = attrs - self._backwards_compat = False # maintained by HTMLForm - - def __getattr__(self, name): - if name == "text": - if self._backwards_compat: - return self._text - else: - return self._ctext - return getattr(Label, name) - - def __setattr__(self, name, value): - if name == "text": - # don't see any need for this, so make it read-only - raise AttributeError("text attribute is read-only") - self.__dict__[name] = value - - def __str__(self): - return "" % (self.id, self.text) - - -def _get_label(attrs): - text = attrs.get("__label") - if text is not None: - return Label(text) - else: - return None - -class Control: - """An HTML form control. - - An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm - are accessed using the HTMLForm.find_control method or the - HTMLForm.controls attribute. - - Control instances are usually constructed using the ParseFile / - ParseResponse functions. If you use those functions, you can ignore the - rest of this paragraph. A Control is only properly initialised after the - fixup method has been called. In fact, this is only strictly necessary for - ListControl instances. This is necessary because ListControls are built up - from ListControls each containing only a single item, and their initial - value(s) can only be known after the sequence is complete. - - The types and values that are acceptable for assignment to the value - attribute are defined by subclasses. - - If the disabled attribute is true, this represents the state typically - represented by browsers by 'greying out' a control. If the disabled - attribute is true, the Control will raise AttributeError if an attempt is - made to change its value. In addition, the control will not be considered - 'successful' as defined by the W3C HTML 4 standard -- ie. it will - contribute no data to the return value of the HTMLForm.click* methods. To - enable a control, set the disabled attribute to a false value. - - If the readonly attribute is true, the Control will raise AttributeError if - an attempt is made to change its value. To make a control writable, set - the readonly attribute to a false value. - - All controls have the disabled and readonly attributes, not only those that - may have the HTML attributes of the same names. - - On assignment to the value attribute, the following exceptions are raised: - TypeError, AttributeError (if the value attribute should not be assigned - to, because the control is disabled, for example) and ValueError. - - If the name or value attributes are None, or the value is an empty list, or - if the control is disabled, the control is not successful. - - Public attributes: - - type: string describing type of control (see the keys of the - HTMLForm.type2class dictionary for the allowable values) (readonly) - name: name of control (readonly) - value: current value of control (subclasses may allow a single value, a - sequence of values, or either) - disabled: disabled state - readonly: readonly state - id: value of id HTML attribute - - """ - def __init__(self, type, name, attrs, index=None): - """ - type: string describing type of control (see the keys of the - HTMLForm.type2class dictionary for the allowable values) - name: control name - attrs: HTML attributes of control's HTML element - - """ - raise NotImplementedError() - - def add_to_form(self, form): - self._form = form - form.controls.append(self) - - def fixup(self): - pass - - def is_of_kind(self, kind): - raise NotImplementedError() - - def clear(self): - raise NotImplementedError() - - def __getattr__(self, name): raise NotImplementedError() - def __setattr__(self, name, value): raise NotImplementedError() - - def pairs(self): - """Return list of (key, value) pairs suitable for passing to urlencode. - """ - return [(k, v) for (i, k, v) in self._totally_ordered_pairs()] - - def _totally_ordered_pairs(self): - """Return list of (key, value, index) tuples. - - Like pairs, but allows preserving correct ordering even where several - controls are involved. - - """ - raise NotImplementedError() - - def _write_mime_data(self, mw, name, value): - """Write data for a subitem of this control to a MimeWriter.""" - # called by HTMLForm - mw2 = mw.nextpart() - mw2.addheader("Content-Disposition", - 'form-data; name="%s"' % name, 1) - f = mw2.startbody(prefix=0) - f.write(value) - - def __str__(self): - raise NotImplementedError() - - def get_labels(self): - """Return all labels (Label instances) for this control. - - If the control was surrounded by a