Merge remote-tracking branch 'alfa-addon/master'
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<addon id="plugin.video.alfa" name="Alfa" version="2.7.3" provider-name="Alfa Addon">
|
||||
<addon id="plugin.video.alfa" name="Alfa" version="2.7.4" provider-name="Alfa Addon">
|
||||
<requires>
|
||||
<import addon="xbmc.python" version="2.1.0"/>
|
||||
<import addon="script.module.libtorrent" optional="true"/>
|
||||
@@ -19,17 +19,17 @@
|
||||
</assets>
|
||||
<news>[B]Estos son los cambios para esta versión:[/B]
|
||||
[COLOR green][B]Canales agregados y arreglos[/B][/COLOR]
|
||||
¤ allcalidad ¤ cinecalidad
|
||||
¤ repelis ¤ cumlouder
|
||||
¤ porntrex ¤ crunchyroll
|
||||
¤ pedropolis ¤ pepecine
|
||||
¤ repelis ¤ thevid
|
||||
¤ vevio ¤ danimados
|
||||
¤ sipeliculas ¤ cinecalidad
|
||||
¤ locopelis ¤ pelisipad
|
||||
¤ divxtotal ¤ elitetorrent
|
||||
¤ estrenosgo ¤ grantorrent
|
||||
¤ mejortorrent1 ¤ newpct1
|
||||
¤ danimados ¤ fanpelis
|
||||
¤ repelis
|
||||
¤ tvvip ¤ zonatorrent
|
||||
¤ maxipelis24 ¤ wikiseries
|
||||
¤ arreglos internos
|
||||
¤ Agradecimientos a @angedam, @chivmalev, @alaquepasa por colaborar en ésta versión
|
||||
¤ Agradecimientos a @angedam y @chivmalev por colaborar en ésta versión
|
||||
|
||||
</news>
|
||||
<description lang="es">Navega con Kodi por páginas web para ver sus videos de manera fácil.</description>
|
||||
|
||||
@@ -161,6 +161,7 @@ def findvideos(item):
|
||||
itemlist.extend(servertools.find_video_items(data=data))
|
||||
|
||||
for videoitem in itemlist:
|
||||
videoitem.channel = item.channel
|
||||
videoitem.title = '[%s]' % videoitem.server.capitalize()
|
||||
|
||||
return itemlist
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
{
|
||||
"id": "cineasiaenlinea",
|
||||
"name": "CineAsiaEnLinea",
|
||||
"active": true,
|
||||
"adult": false,
|
||||
"language": ["cast", "lat"],
|
||||
"thumbnail": "http://i.imgur.com/5KOU8uy.png?3",
|
||||
"banner": "cineasiaenlinea.png",
|
||||
"categories": [
|
||||
"movie",
|
||||
"vos"
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"id": "modo_grafico",
|
||||
"type": "bool",
|
||||
"label": "Buscar información extra",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "include_in_global_search",
|
||||
"type": "bool",
|
||||
"label": "Incluir en búsqueda global",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "include_in_newest_peliculas",
|
||||
"type": "bool",
|
||||
"label": "Incluir en Novedades - Películas",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "include_in_newest_terror",
|
||||
"type": "bool",
|
||||
"label": "Incluir en Novedades - terror",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "perfil",
|
||||
"type": "list",
|
||||
"label": "Perfil de color",
|
||||
"default": 3,
|
||||
"enabled": true,
|
||||
"visible": true,
|
||||
"lvalues": [
|
||||
"Sin color",
|
||||
"Perfil 3",
|
||||
"Perfil 2",
|
||||
"Perfil 1"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,177 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
|
||||
from core import httptools
|
||||
from core import scrapertools
|
||||
from core import servertools
|
||||
from core import tmdb
|
||||
from core.item import Item
|
||||
from platformcode import config, logger
|
||||
from channelselector import get_thumb
|
||||
|
||||
host = "http://www.cineasiaenlinea.com/"
|
||||
__channel__='cineasiaenlinea'
|
||||
|
||||
try:
|
||||
__modo_grafico__ = config.get_setting('modo_grafico', __channel__)
|
||||
except:
|
||||
__modo_grafico__ = True
|
||||
|
||||
# Configuracion del canal
|
||||
__perfil__ = int(config.get_setting('perfil', 'cineasiaenlinea'))
|
||||
|
||||
# Fijar perfil de color
|
||||
perfil = [['0xFFFFE6CC', '0xFFFFCE9C', '0xFF994D00'],
|
||||
['0xFFA5F6AF', '0xFF5FDA6D', '0xFF11811E'],
|
||||
['0xFF58D3F7', '0xFF2E9AFE', '0xFF2E64FE']]
|
||||
|
||||
if __perfil__ - 1 >= 0:
|
||||
color1, color2, color3 = perfil[__perfil__ - 1]
|
||||
else:
|
||||
color1 = color2 = color3 = ""
|
||||
|
||||
|
||||
def mainlist(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
|
||||
itemlist.append(item.clone(action="peliculas", title="Novedades", url=host + "archivos/peliculas",
|
||||
thumbnail=get_thumb('newest', auto=True), text_color=color1,))
|
||||
itemlist.append(item.clone(action="peliculas", title="Estrenos", url=host + "archivos/estrenos",
|
||||
thumbnail=get_thumb('premieres', auto=True), text_color=color1))
|
||||
itemlist.append(item.clone(action="indices", title="Por géneros", url=host,
|
||||
thumbnail=get_thumb('genres', auto=True), text_color=color1))
|
||||
itemlist.append(item.clone(action="indices", title="Por país", url=host, text_color=color1,
|
||||
thumbnail=get_thumb('country', auto=True)))
|
||||
itemlist.append(item.clone(action="indices", title="Por año", url=host, text_color=color1,
|
||||
thumbnail=get_thumb('year', auto=True)))
|
||||
|
||||
itemlist.append(item.clone(title="", action=""))
|
||||
itemlist.append(item.clone(action="search", title="Buscar...", text_color=color3,
|
||||
thumbnail=get_thumb('search', auto=True)))
|
||||
itemlist.append(item.clone(action="configuracion", title="Configurar canal...", text_color="gold", folder=False))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def configuracion(item):
|
||||
from platformcode import platformtools
|
||||
ret = platformtools.show_channel_settings()
|
||||
platformtools.itemlist_refresh()
|
||||
return ret
|
||||
|
||||
|
||||
def search(item, texto):
|
||||
logger.info()
|
||||
|
||||
item.url = "%s?s=%s" % (host, texto.replace(" ", "+"))
|
||||
|
||||
try:
|
||||
return peliculas(item)
|
||||
# Se captura la excepción, para no interrumpir al buscador global si un canal falla
|
||||
except:
|
||||
import sys
|
||||
for line in sys.exc_info():
|
||||
logger.error("%s" % line)
|
||||
return []
|
||||
|
||||
|
||||
def newest(categoria):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
item = Item()
|
||||
try:
|
||||
if categoria == 'peliculas':
|
||||
item.url = host + "archivos/peliculas"
|
||||
elif categoria == 'terror':
|
||||
item.url = host + "genero/terror"
|
||||
item.action = "peliculas"
|
||||
itemlist = peliculas(item)
|
||||
|
||||
if itemlist[-1].action == "peliculas":
|
||||
itemlist.pop()
|
||||
|
||||
# Se captura la excepción, para no interrumpir al canal novedades si un canal falla
|
||||
except:
|
||||
import sys
|
||||
for line in sys.exc_info():
|
||||
logger.error("{0}".format(line))
|
||||
return []
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def peliculas(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
item.text_color = color2
|
||||
|
||||
# Descarga la página
|
||||
data = httptools.downloadpage(item.url).data
|
||||
|
||||
patron = '<h3><a href="([^"]+)">([^<]+)<.*?src="([^"]+)".*?<a rel="tag">([^<]+)<' \
|
||||
'.*?<a rel="tag">([^<]+)<'
|
||||
matches = scrapertools.find_multiple_matches(data, patron)
|
||||
for scrapedurl, scrapedtitle, scrapedthumbnail, year, calidad in matches:
|
||||
title = re.sub(r' \((\d+)\)', '', scrapedtitle)
|
||||
scrapedtitle += " [%s]" % calidad
|
||||
infolab = {'year': year}
|
||||
itemlist.append(item.clone(action="findvideos", title=scrapedtitle, url=scrapedurl,
|
||||
thumbnail=scrapedthumbnail, infoLabels=infolab,
|
||||
contentTitle=title, contentType="movie", quality=calidad))
|
||||
|
||||
tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
|
||||
next_page = scrapertools.find_single_match(data, '<a class="nextpostslink" rel="next" href="([^"]+)"')
|
||||
if next_page:
|
||||
itemlist.append(item.clone(title=">> Página Siguiente", url=next_page))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def indices(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
|
||||
# Descarga la página
|
||||
data = httptools.downloadpage(item.url).data
|
||||
logger.info(data)
|
||||
if "géneros" in item.title:
|
||||
bloque = scrapertools.find_single_match(data, '(?i)<h4>Peliculas por genero</h4>(.*?)</ul>')
|
||||
matches = scrapertools.find_multiple_matches(bloque, '<a href="([^"]+)".*?>([^<]+)<')
|
||||
elif "año" in item.title:
|
||||
bloque = scrapertools.find_single_match(data, '(?i)<h4>Peliculas por Año</h4>(.*?)</select>')
|
||||
matches = scrapertools.find_multiple_matches(bloque, '<option value="([^"]+)">([^<]+)<')
|
||||
else:
|
||||
bloque = scrapertools.find_single_match(data, '(?i)<h4>Peliculas por Pais</h4>(.*?)</ul>')
|
||||
matches = scrapertools.find_multiple_matches(bloque, '<a href="([^"]+)".*?>([^<]+)<')
|
||||
|
||||
for scrapedurl, scrapedtitle in matches:
|
||||
if "año" in item.title:
|
||||
scrapedurl = "%sfecha-estreno/%s" % (host, scrapedurl)
|
||||
itemlist.append(Item(channel=item.channel, action="peliculas", title=scrapedtitle, url=scrapedurl,
|
||||
thumbnail=item.thumbnail, text_color=color3))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def findvideos(item):
|
||||
logger.info()
|
||||
data = httptools.downloadpage(item.url).data
|
||||
item.infoLabels["plot"] = scrapertools.find_single_match(data, '(?i)<h2>SINOPSIS.*?<p>(.*?)</p>')
|
||||
item.infoLabels["trailer"] = scrapertools.find_single_match(data, 'src="(http://www.youtube.com/embed/[^"]+)"')
|
||||
|
||||
itemlist = servertools.find_video_items(item=item, data=data)
|
||||
for it in itemlist:
|
||||
it.thumbnail = item.thumbnail
|
||||
it.text_color = color2
|
||||
|
||||
itemlist.append(item.clone(action="add_pelicula_to_library", title="Añadir película a la videoteca"))
|
||||
if item.infoLabels["trailer"]:
|
||||
folder = True
|
||||
if config.is_xbmc():
|
||||
folder = False
|
||||
itemlist.append(item.clone(channel="trailertools", action="buscartrailer", title="Ver Trailer", folder=folder,
|
||||
contextual=not folder))
|
||||
|
||||
return itemlist
|
||||
@@ -324,7 +324,7 @@ def findvideos(item):
|
||||
url = server_url[server_id] + video_id + '.html'
|
||||
elif server_id == 'BitTorrent':
|
||||
import urllib
|
||||
base_url = '%sprotect/v.php' % host
|
||||
base_url = '%s/protect/v.php' % host
|
||||
post = {'i':video_id, 'title':item.title}
|
||||
post = urllib.urlencode(post)
|
||||
headers = {'Referer':item.url}
|
||||
|
||||
@@ -8,5 +8,15 @@
|
||||
"banner": "https://imgur.com/xG5xqBq.png",
|
||||
"categories": [
|
||||
"tvshow"
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"id": "include_in_global_search",
|
||||
"type": "bool",
|
||||
"label": "Incluir en busqueda global",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from channelselector import get_thumb
|
||||
from core import httptools
|
||||
@@ -22,48 +23,64 @@ list_quality = ['default']
|
||||
|
||||
def mainlist(item):
|
||||
logger.info()
|
||||
|
||||
thumb_series = get_thumb("channels_tvshow.png")
|
||||
autoplay.init(item.channel, list_servers, list_quality)
|
||||
|
||||
itemlist = list()
|
||||
|
||||
itemlist.append(Item(channel=item.channel, action="mainpage", title="Categorías", url=host,
|
||||
thumbnail=thumb_series))
|
||||
itemlist.append(Item(channel=item.channel, action="mainpage", title="Más Populares", url=host,
|
||||
thumbnail=thumb_series))
|
||||
itemlist.append(Item(channel=item.channel, action="lista", title="Peliculas Animadas", url=host+"peliculas/",
|
||||
thumbnail=thumb_series))
|
||||
itemlist.append(Item(channel=item.channel, action="search", title="Buscar", url=host + "?s=",
|
||||
thumbnail=thumb_series))
|
||||
autoplay.show_option(item.channel, itemlist)
|
||||
return itemlist
|
||||
|
||||
|
||||
"""
|
||||
def search(item, texto):
|
||||
logger.info()
|
||||
texto = texto.replace(" ","+")
|
||||
item.url = item.url+texto
|
||||
item.url = host + "?s=" + texto
|
||||
if texto!='':
|
||||
return lista(item)
|
||||
"""
|
||||
return sub_search(item)
|
||||
|
||||
|
||||
def sub_search(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
data = httptools.downloadpage(item.url).data
|
||||
patron = 'class="thumbnail animation-.*?href="([^"]+).*?'
|
||||
patron += 'img src="([^"]+).*?'
|
||||
patron += 'alt="([^"]+).*?'
|
||||
patron += 'class="year">(\d{4})'
|
||||
matches = scrapertools.find_multiple_matches(data, patron)
|
||||
for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedyear in matches:
|
||||
item.action = "findvideos"
|
||||
item.contentTitle = scrapedtitle
|
||||
item.contentSerieName = ""
|
||||
if "serie" in scrapedurl:
|
||||
item.action = "episodios"
|
||||
item.contentTitle = ""
|
||||
item.contentSerieName = scrapedtitle
|
||||
title = scrapedtitle
|
||||
if scrapedyear:
|
||||
item.infoLabels['year'] = int(scrapedyear)
|
||||
title += " (%s)" %item.infoLabels['year']
|
||||
itemlist.append(item.clone(thumbnail = scrapedthumbnail,
|
||||
title = title,
|
||||
url = scrapedurl
|
||||
))
|
||||
tmdb.set_infoLabels(itemlist)
|
||||
return itemlist
|
||||
|
||||
|
||||
def mainpage(item):
|
||||
logger.info()
|
||||
|
||||
itemlist = []
|
||||
|
||||
data1 = httptools.downloadpage(item.url).data
|
||||
data1 = re.sub(r"\n|\r|\t|\s{2}| ", "", data1)
|
||||
if item.title=="Más Populares":
|
||||
patron_sec='<a class="lglossary" data-type.+?>(.+?)<\/ul>'
|
||||
patron='<img .+? src="([^"]+)".+?<a href="([^"]+)".+?>([^"]+)<\/a>' #scrapedthumbnail, #scrapedurl, #scrapedtitle
|
||||
if item.title=="Categorías":
|
||||
patron_sec='<ul id="main_header".+?>(.+?)<\/ul><\/div>'
|
||||
patron='<a href="([^"]+)">([^"]+)<\/a>'#scrapedurl, #scrapedtitle
|
||||
|
||||
patron_sec='<ul id="main_header".+?>(.+?)<\/ul><\/div>'
|
||||
patron='<a href="([^"]+)">([^"]+)<\/a>'#scrapedurl, #scrapedtitle
|
||||
data = scrapertools.find_single_match(data1, patron_sec)
|
||||
|
||||
matches = scrapertools.find_multiple_matches(data, patron)
|
||||
if item.title=="Géneros" or item.title=="Categorías":
|
||||
for scrapedurl, scrapedtitle in matches:
|
||||
@@ -82,11 +99,10 @@ def mainpage(item):
|
||||
return itemlist
|
||||
return itemlist
|
||||
|
||||
|
||||
def lista(item):
|
||||
logger.info()
|
||||
|
||||
itemlist = []
|
||||
|
||||
data = httptools.downloadpage(item.url).data
|
||||
data = re.sub(r"\n|\r|\t|\s{2}| ", "", data)
|
||||
if item.title=="Peliculas Animadas":
|
||||
@@ -114,8 +130,8 @@ def lista(item):
|
||||
|
||||
def episodios(item):
|
||||
logger.info()
|
||||
|
||||
itemlist = []
|
||||
infoLabels = {}
|
||||
data = httptools.downloadpage(item.url).data
|
||||
data = re.sub(r"\n|\r|\t|\s{2}| ", "", data)
|
||||
data_lista = scrapertools.find_single_match(data,
|
||||
@@ -123,51 +139,52 @@ def episodios(item):
|
||||
show = item.title
|
||||
patron_caps = '<img alt=".+?" src="([^"]+)"><\/a><\/div><div class=".+?">([^"]+)<\/div>.+?'
|
||||
patron_caps += '<a .+? href="([^"]+)">([^"]+)<\/a>'
|
||||
#scrapedthumbnail,#scrapedtempepi, #scrapedurl, #scrapedtitle
|
||||
matches = scrapertools.find_multiple_matches(data_lista, patron_caps)
|
||||
for scrapedthumbnail, scrapedtempepi, scrapedurl, scrapedtitle in matches:
|
||||
tempepi=scrapedtempepi.split(" - ")
|
||||
if tempepi[0]=='Pel':
|
||||
tempepi[0]=0
|
||||
title="{0}x{1} - ({2})".format(tempepi[0], tempepi[1].zfill(2), scrapedtitle)
|
||||
itemlist.append(Item(channel=item.channel, thumbnail=scrapedthumbnail,
|
||||
action="findvideos", title=title, url=scrapedurl, show=show))
|
||||
|
||||
item.infoLabels["season"] = tempepi[0]
|
||||
item.infoLabels["episode"] = tempepi[1]
|
||||
itemlist.append(item.clone(thumbnail=scrapedthumbnail,
|
||||
action="findvideos", title=title, url=scrapedurl))
|
||||
if config.get_videolibrary_support() and len(itemlist) > 0:
|
||||
itemlist.append(Item(channel=item.channel, title="[COLOR yellow]Añadir " + show + " a la videoteca[/COLOR]", url=item.url,
|
||||
action="add_serie_to_library", extra="episodios", show=show))
|
||||
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def findvideos(item):
|
||||
logger.info()
|
||||
import base64
|
||||
|
||||
itemlist = []
|
||||
|
||||
data = httptools.downloadpage(item.url).data
|
||||
data = re.sub(r"\n|\r|\t|\s{2}| ", "", data)
|
||||
data1 = scrapertools.find_single_match(data,
|
||||
'<div id="playex" .+?>(.+?)<\/nav>?\s<\/div><\/div>')
|
||||
patron = "changeLink\('([^']+)'\)"
|
||||
matches = re.compile(patron, re.DOTALL).findall(data1)
|
||||
|
||||
for url64 in matches:
|
||||
url =base64.b64decode(url64)
|
||||
if 'danimados' in url:
|
||||
new_data = httptools.downloadpage('https:'+url.replace('stream', 'stream_iframe')).data
|
||||
url = scrapertools.find_single_match(new_data, '<source src="([^"]+)"')
|
||||
|
||||
itemlist.append(item.clone(title='%s',url=url, action="play"))
|
||||
|
||||
url1 =base64.b64decode(url64)
|
||||
if 'danimados' in url1:
|
||||
new_data = httptools.downloadpage('https:'+url1.replace('stream', 'stream_iframe')).data
|
||||
logger.info("Intel33 %s" %new_data)
|
||||
url = scrapertools.find_single_match(new_data, "sources: \[\{file:'([^']+)")
|
||||
if "zkstream" in url:
|
||||
url1 = httptools.downloadpage(url, follow_redirects=False, only_headers=True).headers.get("location", "")
|
||||
else:
|
||||
url1 = url
|
||||
itemlist.append(item.clone(title='%s',url=url1, action="play"))
|
||||
tmdb.set_infoLabels(itemlist)
|
||||
itemlist = servertools.get_servers_itemlist(itemlist, lambda i: i.title % i.server.capitalize())
|
||||
|
||||
if config.get_videolibrary_support() and len(itemlist) > 0 and item.contentType=="movie" and item.contentChannel!='videolibrary':
|
||||
itemlist.append(
|
||||
item.clone(channel=item.channel, title='[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]', url=item.url,
|
||||
action="add_pelicula_to_library", contentTitle=item.show))
|
||||
|
||||
action="add_pelicula_to_library"))
|
||||
autoplay.start(itemlist, item)
|
||||
return itemlist
|
||||
|
||||
|
||||
def play(item):
|
||||
item.thumbnail = item.contentThumbnail
|
||||
return [item]
|
||||
|
||||
@@ -519,17 +519,35 @@ def findvideos(item):
|
||||
item, itemlist = generictools.post_tmdb_findvideos(item, itemlist)
|
||||
|
||||
#Ahora tratamos los enlaces .torrent
|
||||
for scrapedurl in matches: #leemos los torrents con la diferentes calidades
|
||||
for scrapedurl in matches: #leemos los torrents con la diferentes calidades
|
||||
#Generamos una copia de Item para trabajar sobre ella
|
||||
item_local = item.clone()
|
||||
|
||||
#Buscamos si ya tiene tamaño, si no, los buscamos en el archivo .torrent
|
||||
size = scrapertools.find_single_match(item_local.quality, '\s\[(\d+,?\d*?\s\w\s?[b|B])\]')
|
||||
if not size:
|
||||
size = generictools.get_torrent_size(item_local.url) #Buscamos el tamaño en el .torrent
|
||||
if size:
|
||||
item_local.title = re.sub(r'\s\[\d+,?\d*?\s\w[b|B]\]', '', item_local.title) #Quitamos size de título, si lo traía
|
||||
item_local.title = '%s [%s]' % (item_local.title, size) #Agregamos size al final del título
|
||||
size = size.replace('GB', 'G B').replace('Gb', 'G b').replace('MB', 'M B').replace('Mb', 'M b')
|
||||
item_local.quality = re.sub(r'\s\[\d+,?\d*?\s\w\s?[b|B]\]', '', item_local.quality) #Quitamos size de calidad, si lo traía
|
||||
item_local.quality = '%s [%s]' % (item_local.quality, size) #Agregamos size al final de la calidad
|
||||
|
||||
#Ahora pintamos el link del Torrent
|
||||
item_local.url = scrapedurl
|
||||
if host not in item_local.url and host.replace('https', 'http') not in item_local.url :
|
||||
item_local.url = host + item_local.url
|
||||
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language)) #Preparamos título de Torrent
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
|
||||
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language))
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
item_local.alive = "??" #Calidad del link sin verificar
|
||||
item_local.action = "play" #Visualizar vídeo
|
||||
item_local.server = "torrent" #Seridor Torrent
|
||||
|
||||
@@ -171,8 +171,11 @@ def listado(item):
|
||||
|
||||
#Limpiamos el título de la basura innecesaria
|
||||
title = title.replace("Dual", "").replace("dual", "").replace("Subtitulada", "").replace("subtitulada", "").replace("Subt", "").replace("subt", "").replace("Sub", "").replace("sub", "").replace("(Proper)", "").replace("(proper)", "").replace("Proper", "").replace("proper", "").replace("#", "").replace("(Latino)", "").replace("Latino", "")
|
||||
title = title.replace("- HDRip", "").replace("(HDRip)", "").replace("- Hdrip", "").replace("(microHD)", "").replace("(DVDRip)", "").replace("(HDRip)", "").replace("(BR-LINE)", "").replace("(HDTS-SCREENER)", "").replace("(BDRip)", "").replace("(BR-Screener)", "").replace("(DVDScreener)", "").replace("TS-Screener", "").replace(" TS", "").replace(" Ts", "")
|
||||
title = title.replace("- HDRip", "").replace("(HDRip)", "").replace("- Hdrip", "").replace("(microHD)", "").replace("(DVDRip)", "").replace("(HDRip)", "").replace("(BR-LINE)", "").replace("(HDTS-SCREENER)", "").replace("(BDRip)", "").replace("(BR-Screener)", "").replace("(DVDScreener)", "").replace("TS-Screener", "").replace(" TS", "").replace(" Ts", "").replace("temporada", "").replace("Temporada", "").replace("capitulo", "").replace("Capitulo", "")
|
||||
|
||||
title = re.sub(r'(?:\d+)?x.?\s?\d+', '', title)
|
||||
title = re.sub(r'\??\s?\d*?\&.*', '', title).title().strip()
|
||||
|
||||
item_local.from_title = title #Guardamos esta etiqueta para posible desambiguación de título
|
||||
|
||||
if item_local.extra == "peliculas": #preparamos Item para películas
|
||||
@@ -190,16 +193,17 @@ def listado(item):
|
||||
item_local.contentType = "episode"
|
||||
item_local.extra = "series"
|
||||
epi_mult = scrapertools.find_single_match(item_local.url, r'cap.*?-\d+-al-(\d+)')
|
||||
item_local.contentSeason = scrapertools.find_single_match(item_local.url, r'temp.*?-(\d+)')
|
||||
item_local.contentSeason = scrapertools.find_single_match(item_local.url, r'temporada-(\d+)')
|
||||
item_local.contentEpisodeNumber = scrapertools.find_single_match(item_local.url, r'cap.*?-(\d+)')
|
||||
if not item_local.contentSeason:
|
||||
item_local.contentSeason = scrapertools.find_single_match(item_local.url, r'-(\d+)[x|X]\d+')
|
||||
if not item_local.contentEpisodeNumber:
|
||||
item_local.contentEpisodeNumber = scrapertools.find_single_match(item_local.url, r'-\d+[x|X](\d+)')
|
||||
if item_local.contentSeason < 1:
|
||||
item_local.contentSeason = 1
|
||||
if not item_local.contentSeason or item_local.contentSeason < 1:
|
||||
item_local.contentSeason = 0
|
||||
if item_local.contentEpisodeNumber < 1:
|
||||
item_local.contentEpisodeNumber = 1
|
||||
|
||||
item_local.contentSerieName = title
|
||||
if epi_mult:
|
||||
title = "%sx%s al %s -" % (item_local.contentSeason, str(item_local.contentEpisodeNumber).zfill(2), str(epi_mult).zfill(2)) #Creamos un título con el rango de episodios
|
||||
@@ -269,11 +273,11 @@ def findvideos(item):
|
||||
#data = unicode(data, "utf-8", errors="replace")
|
||||
|
||||
#Añadimos el tamaño para todos
|
||||
size = scrapertools.find_single_match(item.quality, '\s\[(\d+,?\d*?\s\w[b|B]s)\]')
|
||||
size = scrapertools.find_single_match(item.quality, '\s\[(\d+,?\d*?\s\w\s?[b|B]s)\]')
|
||||
if size:
|
||||
item.title = re.sub('\s\[\d+,?\d*?\s\w[b|B]s\]', '', item.title) #Quitamos size de título, si lo traía
|
||||
item.title = '%s [%s]' % (item.title, size) #Agregamos size al final del título
|
||||
item.quality = re.sub('\s\[\d+,?\d*?\s\w[b|B]s\]', '', item.quality) #Quitamos size de calidad, si lo traía
|
||||
item.quality = re.sub('\s\[\d+,?\d*?\s\w\s?[b|B]s\]', '', item.quality) #Quitamos size de calidad, si lo traía
|
||||
|
||||
patron_t = '<div class="enlace_descarga".*?<a href="(.*?\.torrent)"'
|
||||
link_torrent = scrapertools.find_single_match(data, patron_t)
|
||||
@@ -299,9 +303,11 @@ def findvideos(item):
|
||||
#Llamamos al método para crear el título general del vídeo, con toda la información obtenida de TMDB
|
||||
item, itemlist = generictools.post_tmdb_findvideos(item, itemlist)
|
||||
|
||||
if not size:
|
||||
size = generictools.get_torrent_size(link_torrent) #Buscamos el tamaño en el .torrent
|
||||
if size:
|
||||
item.quality = '%s [%s]' % (item.quality, size) #Agregamos size al final de calidad
|
||||
item.quality = item.quality.replace("G", "G ").replace("M", "M ") #Se evita la palabra reservada en Unify
|
||||
item.quality = item.quality.replace("GB", "G B").replace("MB", "M B") #Se evita la palabra reservada en Unify
|
||||
|
||||
#Generamos una copia de Item para trabajar sobre ella
|
||||
item_local = item.clone()
|
||||
@@ -313,8 +319,15 @@ def findvideos(item):
|
||||
item_local.quality += "[Torrent]"
|
||||
item_local.url = link_torrent
|
||||
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language)) #Preparamos título de Torrent
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
item_local.alive = "??" #Calidad del link sin verificar
|
||||
item_local.action = "play" #Visualizar vídeo
|
||||
item_local.server = "torrent" #Seridor Torrent
|
||||
|
||||
@@ -682,7 +682,7 @@ def findvideos(item):
|
||||
#Ahora tratamos los enlaces .torrent
|
||||
itemlist_alt = [] #Usamos una lista intermedia para poder ordenar los episodios
|
||||
if matches_torrent:
|
||||
for scrapedurl, scrapedquality, scrapedlang in matches_torrent: #leemos los torrents con la diferentes calidades
|
||||
for scrapedurl, scrapedquality, scrapedlang in matches_torrent: #leemos los torrents con la diferentes calidades
|
||||
#Generamos una copia de Item para trabajar sobre ella
|
||||
item_local = item.clone()
|
||||
|
||||
@@ -756,9 +756,19 @@ def findvideos(item):
|
||||
|
||||
#Ahora pintamos el link del Torrent
|
||||
item_local.url = host + scrapedtorrent
|
||||
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (quality, str(item_local.language)) #Preparamos título de Torrent
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
|
||||
size = generictools.get_torrent_size(item_local.url) #Buscamos el tamaño en el .torrent
|
||||
if size:
|
||||
quality += ' [%s]' % size
|
||||
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (quality, str(item_local.language))
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', quality)
|
||||
quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', quality)
|
||||
quality = quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
item_local.alive = "??" #Calidad del link sin verificar
|
||||
item_local.action = "play" #Visualizar vídeo
|
||||
item_local.server = "torrent" #Seridor Torrent
|
||||
@@ -896,8 +906,15 @@ def findvideos(item):
|
||||
|
||||
#Ahora pintamos el link Directo
|
||||
item_local.url = enlace
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', quality)
|
||||
quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', quality)
|
||||
quality = quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
item_local.action = "play" #Visualizar vídeo
|
||||
item_local.server = servidor #Seridor Directo
|
||||
|
||||
|
||||
@@ -474,14 +474,17 @@ def findvideos(item):
|
||||
#Añadimos la duración, que estará en item.quility
|
||||
if scrapertools.find_single_match(item.quality, '(\[\d+:\d+)') and not scrapertools.find_single_match(item_local.quality, '(\[\d+:\d+)'):
|
||||
item_local.quality = '%s [/COLOR][COLOR white][%s h]' % (item_local.quality, scrapertools.find_single_match(item.quality, '(\d+:\d+)'))
|
||||
|
||||
#if size and item_local.contentType != "episode":
|
||||
if not size:
|
||||
size = generictools.get_torrent_size(scrapedurl) #Buscamos el tamaño en el .torrent
|
||||
if size:
|
||||
size = size.replace(".", ",").replace("B,", " B").replace("b,", " b")
|
||||
if '[/COLOR][COLOR white]' in item_local.quality:
|
||||
item_local.quality = '%s [%s]' % (item_local.quality, size)
|
||||
else:
|
||||
item_local.quality = '%s [/COLOR][COLOR white][%s]' % (item_local.quality, size)
|
||||
if item_local.action == 'show_result': #Viene de una búsqueda global
|
||||
if item_local.action == 'show_result': #Viene de una búsqueda global
|
||||
channel = item_local.channel.capitalize()
|
||||
if item_local.from_channel:
|
||||
channel = item_local.from_channel.capitalize()
|
||||
@@ -491,8 +494,15 @@ def findvideos(item):
|
||||
if scrapedurl:
|
||||
item_local.url = scrapedurl
|
||||
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language)) #Preparamos título de Torrent
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip() #Quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title).strip() #Quitamos colores vacíos
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
item_local.alive = "??" #Calidad del link sin verificar
|
||||
item_local.action = "play" #Visualizar vídeo
|
||||
item_local.server = "torrent" #Seridor Torrent
|
||||
|
||||
2
plugin.video.alfa/channels/locopelis.py
Executable file → Normal file
2
plugin.video.alfa/channels/locopelis.py
Executable file → Normal file
@@ -355,7 +355,7 @@ def findvideos(item):
|
||||
new_url = get_link(get_source(item.url))
|
||||
new_url = get_link(get_source(new_url))
|
||||
video_id = scrapertools.find_single_match(new_url, 'http.*?h=(\w+)')
|
||||
new_url = '%s%s' % (host, 'playeropstream/api.php')
|
||||
new_url = '%s%s' % (host.replace('.com','.tv'), 'playeropstream/api.php')
|
||||
post = {'h': video_id}
|
||||
post = urllib.urlencode(post)
|
||||
data = httptools.downloadpage(new_url, post=post).data
|
||||
|
||||
12
plugin.video.alfa/channels/maxipelis24.json
Normal file
12
plugin.video.alfa/channels/maxipelis24.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"id": "maxipelis24",
|
||||
"name": "Maxipelis24",
|
||||
"active": true,
|
||||
"adult": false,
|
||||
"language": ["lat"],
|
||||
"thumbnail": "maxipelis24.png",
|
||||
"banner": "",
|
||||
"categories": [
|
||||
"movie"
|
||||
]
|
||||
}
|
||||
125
plugin.video.alfa/channels/maxipelis24.py
Normal file
125
plugin.video.alfa/channels/maxipelis24.py
Normal file
@@ -0,0 +1,125 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import urlparse
|
||||
import urllib
|
||||
|
||||
from core import servertools
|
||||
from core import httptools
|
||||
from core import scrapertools
|
||||
from core.item import Item
|
||||
from platformcode import config, logger
|
||||
from channelselector import get_thumb
|
||||
|
||||
host="http://maxipelis24.com"
|
||||
|
||||
|
||||
def mainlist(item):
|
||||
logger.info()
|
||||
|
||||
itemlist = []
|
||||
|
||||
itemlist.append(Item(channel=item.channel, title="peliculas", action="movies", url=host, thumbnail=get_thumb('movies', auto=True)))
|
||||
itemlist.append(Item(channel=item.channel, action="category", title="Año de Estreno", url=host, cat='year', thumbnail=get_thumb('year', auto=True)))
|
||||
itemlist.append(Item(channel=item.channel, action="category", title="Géneros", url=host, cat='genre', thumbnail=get_thumb('genres', auto=True)))
|
||||
itemlist.append(Item(channel=item.channel, action="category", title="Calidad", url=host, cat='quality', thumbnail=get_thumb("quality", auto=True)))
|
||||
itemlist.append(Item(channel=item.channel, title="Buscar", action="search", url=host+"?s=", thumbnail=get_thumb("search", auto=True)))
|
||||
|
||||
return itemlist
|
||||
|
||||
def search(item, texto):
|
||||
logger.info()
|
||||
texto = texto.replace(" ", "+")
|
||||
item.url = host + "?s=" + texto
|
||||
if texto != '':
|
||||
return movies(item)
|
||||
|
||||
def category(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
data = httptools.downloadpage(item.url).data
|
||||
data = re.sub(r"\n|\r|\t|\s{2}| ","", data)
|
||||
|
||||
if item.cat == 'genre':
|
||||
data = scrapertools.find_single_match(data, '<h3>Géneros.*?</div>')
|
||||
patron = '<a href="([^"]+)">([^<]+)<'
|
||||
elif item.cat == 'year':
|
||||
data = scrapertools.find_single_match(data, '<h3>Año de estreno.*?</div>')
|
||||
patron = 'li><a href="([^"]+)">([^<]+).*?<'
|
||||
elif item.cat == 'quality':
|
||||
data = scrapertools.find_single_match(data, '<h3>Calidad.*?</div>')
|
||||
patron = 'li><a href="([^"]+)">([^<]+)<'
|
||||
|
||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||
for scrapedurl , scrapedtitle in matches:
|
||||
itemlist.append(Item(channel=item.channel, action='movies', title=scrapedtitle, url=scrapedurl, type='cat', first=0))
|
||||
return itemlist
|
||||
|
||||
def movies(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
|
||||
data = httptools.downloadpage(item.url).data
|
||||
data = re.sub(r"\n|\r|\t|\s{2}| ","", data)
|
||||
|
||||
patron = '<div id="mt.+?href="([^"]+)".+?'
|
||||
patron += '<img src="([^"]+)" alt="([^"]+)".+?'
|
||||
patron += '<span class="imdb">.*?>([^<]+)<.*?'
|
||||
patron += '<span class="ttx">([^<]+).*?'
|
||||
patron += 'class="year">([^<]+).+?class="calidad2">([^<]+)<'
|
||||
|
||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||
for scrapedurl, img, scrapedtitle, ranking, resto, year, quality in matches:
|
||||
plot = scrapertools.htmlclean(resto).strip()
|
||||
title = '%s [COLOR yellow](%s)[/COLOR] [COLOR red][%s][/COLOR]'% (scrapedtitle, ranking, quality)
|
||||
itemlist.append(Item(channel=item.channel,
|
||||
title=title,
|
||||
url=scrapedurl,
|
||||
action="findvideos",
|
||||
plot=plot,
|
||||
thumbnail=img,
|
||||
contentTitle = scrapedtitle,
|
||||
contentType = "movie",
|
||||
quality=quality))
|
||||
|
||||
#Paginacion
|
||||
next_page = '<div class="pag_.*?href="([^"]+)">Siguiente<'
|
||||
matches = re.compile(next_page, re.DOTALL).findall(data)
|
||||
if matches:
|
||||
url = urlparse.urljoin(item.url, matches[0])
|
||||
itemlist.append(Item(channel=item.channel, action = "movies", title = "Página siguiente >>",url = url))
|
||||
|
||||
return itemlist
|
||||
|
||||
def findvideos(item):
|
||||
logger.info()
|
||||
itemlist=[]
|
||||
|
||||
data = httptools.downloadpage(item.url).data
|
||||
|
||||
data = scrapertools.get_match(data, '<div id="contenedor">(.*?)</div></div></div>')
|
||||
|
||||
# Busca los enlaces a los videos
|
||||
listavideos = servertools.findvideos(data)
|
||||
|
||||
for video in listavideos:
|
||||
videotitle = scrapertools.unescape(video[0])
|
||||
url = video[1]
|
||||
server = video[2]
|
||||
|
||||
itemlist.append(Item(channel=item.channel, action="play", server=server, title=videotitle, url=url,
|
||||
thumbnail=item.thumbnail, plot=item.plot, fulltitle=item.title, folder=False))
|
||||
|
||||
# Opción "Añadir esta película a la biblioteca de KODI"
|
||||
if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos':
|
||||
itemlist.append(
|
||||
Item(channel=item.channel,
|
||||
title='[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]',
|
||||
url=item.url,
|
||||
action="add_pelicula_to_library",
|
||||
extra="findvideos",
|
||||
contentTitle=item.contentTitle,
|
||||
thumbnail=item.thumbnail
|
||||
))
|
||||
|
||||
return itemlist
|
||||
@@ -845,18 +845,21 @@ def findvideos(item):
|
||||
|
||||
# Poner la calidad, si es necesario
|
||||
if not item_local.quality:
|
||||
item_local.quality = ''
|
||||
if scrapertools.find_single_match(data, '<b>Formato:<\/b>&\w+;\s?([^<]+)<br>'):
|
||||
item_local.quality = scrapertools.find_single_match(data, '<b>Formato:<\/b>&\w+;\s?([^<]+)<br>')
|
||||
elif "hdtv" in item_local.url.lower() or "720p" in item_local.url.lower() or "1080p" in item_local.url.lower() or "4k" in item_local.url.lower():
|
||||
item_local.quality = scrapertools.find_single_match(item_local.url, '.*?_([H|7|1|4].*?)\.torrent')
|
||||
item_local.quality = item_local.quality.replace("_", " ")
|
||||
|
||||
|
||||
# Extrae el tamaño del vídeo
|
||||
if scrapertools.find_single_match(data, '<b>Tama.*?:<\/b>&\w+;\s?([^<]+B)<?'):
|
||||
size = scrapertools.find_single_match(data, '<b>Tama.*?:<\/b>&\w+;\s?([^<]+B)<?')
|
||||
else:
|
||||
size = scrapertools.find_single_match(item_local.url, '(\d{1,3},\d{1,2}?\w+)\.torrent')
|
||||
size = size.upper().replace(".", ",").replace("G", " G ").replace("M", " M ") #sustituimos . por , porque Unify lo borra
|
||||
if not size:
|
||||
size = generictools.get_torrent_size(item_local.url) #Buscamos el tamaño en el .torrent
|
||||
if size:
|
||||
item_local.title = re.sub('\s\[\d+,?\d*?\s\w[b|B]\]', '', item_local.title) #Quitamos size de título, si lo traía
|
||||
item_local.title = '%s [%s]' % (item_local.title, size) #Agregamos size al final del título
|
||||
@@ -866,8 +869,15 @@ def findvideos(item):
|
||||
#Ahora pintamos el link del Torrent, si lo hay
|
||||
if item_local.url: # Hay Torrent ?
|
||||
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language)) #Preparamos título de Torrent
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
item_local.alive = "??" #Calidad del link sin verificar
|
||||
item_local.action = "play" #Visualizar vídeo
|
||||
item_local.server = "torrent" #Seridor Torrent
|
||||
|
||||
@@ -1368,12 +1368,14 @@ def findvideos(item):
|
||||
size = scrapertools.find_single_match(data, '<div class="fichas-box"><div class="entry-right"><div style="[^"]+"><span class="[^"]+"><strong>Size:<\/strong>?\s(\d+?\.?\d*?\s\w[b|B])<\/span>')
|
||||
size = size.replace(".", ",") #sustituimos . por , porque Unify lo borra
|
||||
if not size:
|
||||
size = scrapertools.find_single_match(item.quality, '\s\[(\d+,?\d*?\s\w[b|B])\]')
|
||||
size = scrapertools.find_single_match(item.quality, '\s\[(\d+,?\d*?\s\w\s?[b|B])\]')
|
||||
if not size:
|
||||
size = generictools.get_torrent_size(item.url) #Buscamos el tamaño en el .torrent
|
||||
if size:
|
||||
item.title = re.sub(r'\s\[\d+,?\d*?\s\w[b|B]\]', '', item.title) #Quitamos size de título, si lo traía
|
||||
item.title = '%s [%s]' % (item.title, size) #Agregamos size al final del título
|
||||
size = size.replace('GB', 'G B').replace('Gb', 'G b').replace('MB', 'M B').replace('Mb', 'M b')
|
||||
item.quality = re.sub(r'\s\[\d+,?\d*?\s\w[b|B]\]', '', item.quality) #Quitamos size de calidad, si lo traía
|
||||
item.quality = re.sub(r'\s\[\d+,?\d*?\s\w\s?[b|B]\]', '', item.quality) #Quitamos size de calidad, si lo traía
|
||||
|
||||
#Llamamos al método para crear el título general del vídeo, con toda la información obtenida de TMDB
|
||||
item, itemlist = generictools.post_tmdb_findvideos(item, itemlist)
|
||||
@@ -1399,8 +1401,15 @@ def findvideos(item):
|
||||
else:
|
||||
quality = item_local.quality
|
||||
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (quality, str(item_local.language)) #Preparamos título de Torrent
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip() #Quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title).strip() #Quitamos colores vacíos
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', quality)
|
||||
quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', quality)
|
||||
quality = quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
item_local.alive = "??" #Calidad del link sin verificar
|
||||
item_local.action = "play" #Visualizar vídeo
|
||||
item_local.server = "torrent" #Servidor
|
||||
@@ -1485,9 +1494,15 @@ def findvideos(item):
|
||||
item_local.action = "play"
|
||||
item_local.server = servidor
|
||||
item_local.url = enlace
|
||||
item_local.title = item_local.title.replace("[]", "").strip()
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip()
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title).strip()
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
itemlist.append(item_local.clone())
|
||||
|
||||
except:
|
||||
@@ -1582,9 +1597,16 @@ def findvideos(item):
|
||||
item_local.action = "play"
|
||||
item_local.server = servidor
|
||||
item_local.url = enlace
|
||||
item_local.title = parte_title.replace("[]", "").strip()
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip()
|
||||
item_local.title = re.sub(r'\[COLOR \w+\]-\[\/COLOR\]', '', item_local.title).strip()
|
||||
item_local.title = parte_title.strip()
|
||||
|
||||
#Preparamos título y calidad, quitamos etiquetas vacías
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
|
||||
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
|
||||
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
|
||||
itemlist.append(item_local.clone())
|
||||
|
||||
except:
|
||||
|
||||
@@ -89,6 +89,7 @@ def search(item, texto):
|
||||
logger.info()
|
||||
texto = texto.replace(" ", "+")
|
||||
item.url = host + "/search/%s" % texto
|
||||
if item.contentType == '': item.contentType = 'movie'
|
||||
try:
|
||||
return scraper(item)
|
||||
# Se captura la excepción, para no interrumpir al buscador global si un canal falla
|
||||
|
||||
2
plugin.video.alfa/channels/pelisipad.py
Executable file → Normal file
2
plugin.video.alfa/channels/pelisipad.py
Executable file → Normal file
@@ -519,6 +519,7 @@ def findvideos(item):
|
||||
if item.video_urls:
|
||||
import random
|
||||
import base64
|
||||
|
||||
item.video_urls.sort(key=lambda it: (it[1], random.random()), reverse=True)
|
||||
i = 0
|
||||
actual_quality = ""
|
||||
@@ -534,6 +535,7 @@ def findvideos(item):
|
||||
title += " [COLOR green]Mirror %s[/COLOR] - %s" % (str(i + 1), item.fulltitle)
|
||||
url = vid % "%s" % base64.b64decode("dHQ9MTQ4MDE5MDQ1MSZtbT1NRzZkclhFand6QmVzbmxSMHNZYXhBJmJiPUUwb1dVVVgx"
|
||||
"WTBCQTdhWENpeU9paUE=")
|
||||
url += '|User-Agent=%s' % httptools.get_user_agent
|
||||
itemlist.append(item.clone(title=title, action="play", url=url, video_urls=""))
|
||||
i += 1
|
||||
|
||||
|
||||
@@ -356,7 +356,7 @@ def get_links_by_language(item, data):
|
||||
patron = 'data-source=(.*?)data.*?srt=(.*?)data-iframe.*?Opci.*?<.*?hidden>[^\(]\((.*?)\)'
|
||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||
if language in IDIOMAS:
|
||||
language == IDIOMAS[language]
|
||||
language = IDIOMAS[language]
|
||||
|
||||
for url, sub, quality in matches:
|
||||
if 'http' not in url:
|
||||
@@ -403,7 +403,7 @@ def findvideos(item):
|
||||
i.quality) )
|
||||
# Requerido para FilterTools
|
||||
|
||||
itemlist = filtertools.get_links(video_list, item, list_language)
|
||||
video_list = filtertools.get_links(video_list, item, list_language)
|
||||
|
||||
# Requerido para AutoPlay
|
||||
|
||||
|
||||
@@ -9,16 +9,16 @@ from channelselector import get_thumb
|
||||
from channels import autoplay
|
||||
from channels import filtertools
|
||||
from core import httptools
|
||||
from core import jsontools
|
||||
from core import scrapertools
|
||||
from core import servertools
|
||||
from core import tmdb
|
||||
from core.item import Item
|
||||
from platformcode import config, logger
|
||||
from lib import jsunpack
|
||||
from platformcode import config, logger, platformtools
|
||||
|
||||
|
||||
idio = {'es-mx': 'LAT','es-es': 'ESP','en': 'VO'}
|
||||
cali = {'poor': 'SD','low': 'SD','high': 'HD'}
|
||||
cali = {'poor': 'SD','low': 'SD','medium': 'HD','high': 'HD'}
|
||||
|
||||
list_language = idio.values()
|
||||
list_quality = ["SD","HD"]
|
||||
@@ -44,9 +44,17 @@ def mainlist(item):
|
||||
itemlist.append(Item(channel = item.channel, title = "Por género", action = "generos", url = host, extra = "Genero", thumbnail = get_thumb("genres", auto = True) ))
|
||||
itemlist.append(Item(channel = item.channel, title = ""))
|
||||
itemlist.append(Item(channel = item.channel, title = "Buscar", action = "search", url = host + "/search?term=", thumbnail = get_thumb("search", auto = True)))
|
||||
itemlist.append(item.clone(title="Configurar canal...", text_color="gold", action="configuracion", folder=False))
|
||||
autoplay.show_option(item.channel, itemlist)
|
||||
return itemlist
|
||||
|
||||
|
||||
def configuracion(item):
|
||||
ret = platformtools.show_channel_settings()
|
||||
platformtools.itemlist_refresh()
|
||||
return ret
|
||||
|
||||
|
||||
def destacadas(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
@@ -178,12 +186,10 @@ def findvideos(item):
|
||||
dict = jsontools.load(bloque)
|
||||
urlx = httptools.downloadpage(host + dict[0]["url"]) #Para que pueda saltar el cloudflare, se tiene que descargar la página completa
|
||||
for datos in dict:
|
||||
url1 = httptools.downloadpage(host + datos["url"], follow_redirects=False, only_headers=True).headers.get("location", "")
|
||||
titulo = "Ver en: %s (" + cali[datos["quality"]] + ") (" + idio[datos["audio"]] + ")"
|
||||
text_color = "white"
|
||||
if "youtube" in url1:
|
||||
titulo = "Ver trailer: %s"
|
||||
text_color = "yellow"
|
||||
url1 = datos["url"]
|
||||
hostname = scrapertools.find_single_match(datos["hostname"].replace("www.",""), "(.*?)\.")
|
||||
if hostname == "my": hostname = "mailru"
|
||||
titulo = "Ver en: " + hostname.capitalize() + " (" + cali[datos["quality"]] + ") (" + idio[datos["audio"]] + ")"
|
||||
itemlist.append(
|
||||
item.clone(channel = item.channel,
|
||||
action = "play",
|
||||
@@ -192,7 +198,6 @@ def findvideos(item):
|
||||
title = titulo,
|
||||
url = url1
|
||||
))
|
||||
itemlist = servertools.get_servers_itemlist(itemlist, lambda i: i.title % i.server.capitalize())
|
||||
itemlist.sort(key=lambda it: (it.language, it.server))
|
||||
tmdb.set_infoLabels(itemlist, __modo_grafico__)
|
||||
# Requerido para FilterTools
|
||||
@@ -217,5 +222,11 @@ def findvideos(item):
|
||||
|
||||
|
||||
def play(item):
|
||||
item.thumbnail = item.contentThumbnail
|
||||
return [item]
|
||||
itemlist = []
|
||||
url1 = httptools.downloadpage(host + item.url, follow_redirects=False, only_headers=True).headers.get("location", "")
|
||||
if "storage" in url1:
|
||||
url1 = scrapertools.find_single_match(url1, "src=(.*mp4)").replace("%3A",":").replace("%2F","/")
|
||||
itemlist.append(item.clone(url=url1))
|
||||
itemlist = servertools.get_servers_itemlist(itemlist)
|
||||
itemlist[0].thumbnail = item.contentThumbnail
|
||||
return itemlist
|
||||
|
||||
@@ -143,6 +143,85 @@ def settings(item):
|
||||
|
||||
|
||||
def setting_channel(item):
|
||||
if config.get_platform(True)['num_version'] >= 17.0: # A partir de Kodi 16 se puede usar multiselect, y de 17 con preselect
|
||||
return setting_channel_new(item)
|
||||
else:
|
||||
return setting_channel_old(item)
|
||||
|
||||
def setting_channel_new(item):
|
||||
import channelselector, xbmcgui
|
||||
from core import channeltools
|
||||
|
||||
# Cargar lista de opciones (canales activos del usuario y que permitan búsqueda global)
|
||||
# ------------------------
|
||||
lista = []; ids = []; lista_lang = []
|
||||
channels_list = channelselector.filterchannels('all')
|
||||
for channel in channels_list:
|
||||
channel_parameters = channeltools.get_channel_parameters(channel.channel)
|
||||
|
||||
# No incluir si en la configuracion del canal no existe "include_in_global_search"
|
||||
if not channel_parameters['include_in_global_search']:
|
||||
continue
|
||||
|
||||
lbl = '%s' % channel_parameters['language']
|
||||
lbl += ' %s' % ', '.join(config.get_localized_category(categ) for categ in channel_parameters['categories'])
|
||||
|
||||
it = xbmcgui.ListItem(channel.title, lbl)
|
||||
it.setArt({ 'thumb': channel.thumbnail, 'fanart': channel.fanart })
|
||||
lista.append(it)
|
||||
ids.append(channel.channel)
|
||||
lista_lang.append(channel_parameters['language'])
|
||||
|
||||
# Diálogo para pre-seleccionar
|
||||
# ----------------------------
|
||||
preselecciones_std = ['Modificar selección actual', 'Modificar partiendo de Todos', 'Modificar partiendo de Ninguno', 'Modificar partiendo de Castellano', 'Modificar partiendo de Latino']
|
||||
if item.action == 'setting_channel':
|
||||
# Configuración de los canales incluídos en la búsqueda
|
||||
preselecciones = preselecciones_std
|
||||
presel_values = [1, 2, 3, 4, 5]
|
||||
else:
|
||||
# Llamada desde "buscar en otros canales" (se puede saltar la selección e ir directo a la búsqueda)
|
||||
preselecciones = ['Buscar con la selección actual'] + preselecciones_std
|
||||
presel_values = [0, 1, 2, 3, 4, 5]
|
||||
|
||||
ret = platformtools.dialog_select(config.get_localized_string(59994), preselecciones)
|
||||
if ret == -1: return False # pedido cancel
|
||||
if presel_values[ret] == 0: return True # continuar sin modificar
|
||||
elif presel_values[ret] == 3: preselect = []
|
||||
elif presel_values[ret] == 2: preselect = range(len(ids))
|
||||
elif presel_values[ret] in [4, 5]:
|
||||
busca = 'cast' if presel_values[ret] == 4 else 'lat'
|
||||
preselect = []
|
||||
for i, lg in enumerate(lista_lang):
|
||||
if busca in lg or '*' in lg:
|
||||
preselect.append(i)
|
||||
else:
|
||||
preselect = []
|
||||
for i, canal in enumerate(ids):
|
||||
channel_status = config.get_setting('include_in_global_search', canal)
|
||||
if channel_status:
|
||||
preselect.append(i)
|
||||
|
||||
# Diálogo para seleccionar
|
||||
# ------------------------
|
||||
ret = xbmcgui.Dialog().multiselect(config.get_localized_string(59994), lista, preselect=preselect, useDetails=True)
|
||||
if ret == None: return False # pedido cancel
|
||||
seleccionados = [ids[i] for i in ret]
|
||||
|
||||
# Guardar cambios en canales para la búsqueda
|
||||
# -------------------------------------------
|
||||
for canal in ids:
|
||||
channel_status = config.get_setting('include_in_global_search', canal)
|
||||
if channel_status is None: channel_status = True
|
||||
|
||||
if channel_status and canal not in seleccionados:
|
||||
config.set_setting('include_in_global_search', False, canal)
|
||||
elif not channel_status and canal in seleccionados:
|
||||
config.set_setting('include_in_global_search', True, canal)
|
||||
|
||||
return True
|
||||
|
||||
def setting_channel_old(item):
|
||||
channels_path = os.path.join(config.get_runtime_path(), "channels", '*.json')
|
||||
channel_language = config.get_setting("channel_language", default="all")
|
||||
|
||||
@@ -204,6 +283,7 @@ def save_settings(item, dict_values):
|
||||
config.set_setting("include_in_global_search", dict_values[v], v)
|
||||
|
||||
progreso.close()
|
||||
return True
|
||||
|
||||
|
||||
def cb_custom_button(item, dict_values):
|
||||
@@ -354,8 +434,8 @@ def do_search(item, categories=None):
|
||||
categories = ["Películas"]
|
||||
setting_item = Item(channel=item.channel, title=config.get_localized_string(59994), folder=False,
|
||||
thumbnail=get_thumb("search.png"))
|
||||
setting_channel(setting_item)
|
||||
|
||||
if not setting_channel(setting_item):
|
||||
return False
|
||||
|
||||
if categories is None:
|
||||
categories = []
|
||||
@@ -474,8 +554,8 @@ def do_search(item, categories=None):
|
||||
# es compatible tanto con versiones antiguas de python como nuevas
|
||||
if multithread:
|
||||
pendent = [a for a in threads if a.isAlive()]
|
||||
t = float(100) / len(pendent)
|
||||
while pendent:
|
||||
if len(pendent) > 0: t = float(100) / len(pendent)
|
||||
while len(pendent) > 0:
|
||||
index = (len(threads) - len(pendent)) + 1
|
||||
percentage = int(math.ceil(index * t))
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "seriecanal",
|
||||
"name": "Seriecanal",
|
||||
"active": true,
|
||||
"active": false,
|
||||
"adult": false,
|
||||
"language": ["cast"],
|
||||
"thumbnail": "http://i.imgur.com/EwMK8Yd.png",
|
||||
|
||||
@@ -4,12 +4,14 @@ import re
|
||||
import urllib
|
||||
import urlparse
|
||||
|
||||
from core import httptools
|
||||
from core import scrapertools
|
||||
from core import servertools
|
||||
from core import tmdb
|
||||
from platformcode import config, logger
|
||||
|
||||
__modo_grafico__ = config.get_setting('modo_grafico', "seriecanal")
|
||||
__perfil__ = config.get_setting('perfil', "descargasmix")
|
||||
__perfil__ = config.get_setting('perfil', "seriecanal")
|
||||
|
||||
# Fijar perfil de color
|
||||
perfil = [['0xFFFFE6CC', '0xFFFFCE9C', '0xFF994D00'],
|
||||
@@ -17,23 +19,21 @@ perfil = [['0xFFFFE6CC', '0xFFFFCE9C', '0xFF994D00'],
|
||||
['0xFF58D3F7', '0xFF2E9AFE', '0xFF2E64FE']]
|
||||
color1, color2, color3 = perfil[__perfil__]
|
||||
|
||||
URL_BASE = "http://www.seriecanal.com/"
|
||||
host = "https://www.seriecanal.com/"
|
||||
|
||||
|
||||
def login():
|
||||
logger.info()
|
||||
data = scrapertools.downloadpage(URL_BASE)
|
||||
data = httptools.downloadpage(host).data
|
||||
if "Cerrar Sesion" in data:
|
||||
return True, ""
|
||||
|
||||
usuario = config.get_setting("user", "seriecanal")
|
||||
password = config.get_setting("password", "seriecanal")
|
||||
if usuario == "" or password == "":
|
||||
return False, 'Regístrate en www.seriecanal.com e introduce tus datos en "Configurar Canal"'
|
||||
else:
|
||||
post = urllib.urlencode({'username': usuario, 'password': password})
|
||||
data = scrapertools.downloadpage("http://www.seriecanal.com/index.php?page=member&do=login&tarea=acceder",
|
||||
post=post)
|
||||
data = httptools.downloadpage(host + "index.php?page=member&do=login&tarea=acceder", post=post).data
|
||||
if "Bienvenid@, se ha identificado correctamente en nuestro sistema" in data:
|
||||
return True, ""
|
||||
else:
|
||||
@@ -44,18 +44,15 @@ def mainlist(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
item.text_color = color1
|
||||
|
||||
result, message = login()
|
||||
if result:
|
||||
itemlist.append(item.clone(action="series", title="Últimos episodios", url=URL_BASE))
|
||||
itemlist.append(item.clone(action="series", title="Últimos episodios", url=host))
|
||||
itemlist.append(item.clone(action="genero", title="Series por género"))
|
||||
itemlist.append(item.clone(action="alfabetico", title="Series por orden alfabético"))
|
||||
itemlist.append(item.clone(action="search", title="Buscar..."))
|
||||
else:
|
||||
itemlist.append(item.clone(action="", title=message, text_color="red"))
|
||||
|
||||
itemlist.append(item.clone(action="configuracion", title="Configurar canal...", text_color="gold", folder=False))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
@@ -68,7 +65,7 @@ def configuracion(item):
|
||||
|
||||
def search(item, texto):
|
||||
logger.info()
|
||||
item.url = "http://www.seriecanal.com/index.php?page=portada&do=category&method=post&category_id=0&order=" \
|
||||
item.url = host + "index.php?page=portada&do=category&method=post&category_id=0&order=" \
|
||||
"C_Create&view=thumb&pgs=1&p2=1"
|
||||
try:
|
||||
post = "keyserie=" + texto
|
||||
@@ -85,27 +82,24 @@ def search(item, texto):
|
||||
def genero(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
data = scrapertools.downloadpage(URL_BASE)
|
||||
data = httptools.downloadpage(host).data
|
||||
data = scrapertools.find_single_match(data, '<ul class="tag-cloud">(.*?)</ul>')
|
||||
|
||||
matches = scrapertools.find_multiple_matches(data, '<a.*?href="([^"]+)">([^"]+)</a>')
|
||||
for scrapedurl, scrapedtitle in matches:
|
||||
scrapedtitle = scrapedtitle.capitalize()
|
||||
url = urlparse.urljoin(URL_BASE, scrapedurl)
|
||||
url = urlparse.urljoin(host, scrapedurl)
|
||||
itemlist.append(item.clone(action="series", title=scrapedtitle, url=url))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def alfabetico(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
data = scrapertools.downloadpage(URL_BASE)
|
||||
data = httptools.downloadpage(host).data
|
||||
data = scrapertools.find_single_match(data, '<ul class="pagination pagination-sm" style="margin:5px 0;">(.*?)</ul>')
|
||||
|
||||
matches = scrapertools.find_multiple_matches(data, '<a.*?href="([^"]+)">([^"]+)</a>')
|
||||
for scrapedurl, scrapedtitle in matches:
|
||||
url = urlparse.urljoin(URL_BASE, scrapedurl)
|
||||
url = urlparse.urljoin(host, scrapedurl)
|
||||
itemlist.append(item.clone(action="series", title=scrapedtitle, url=url))
|
||||
return itemlist
|
||||
|
||||
@@ -115,45 +109,38 @@ def series(item):
|
||||
itemlist = []
|
||||
item.infoLabels = {}
|
||||
item.text_color = color2
|
||||
|
||||
if item.extra != "":
|
||||
data = scrapertools.downloadpage(item.url, post=item.extra)
|
||||
data = httptools.downloadpage(item.url, post=item.extra).data
|
||||
else:
|
||||
data = scrapertools.downloadpage(item.url)
|
||||
data = httptools.downloadpage(item.url).data
|
||||
data = re.sub(r"\n|\r|\t|\s{2}| ", "", data)
|
||||
|
||||
patron = '<div class="item-inner" style="margin: 0 20px 0px 0\;"><img src="([^"]+)".*?' \
|
||||
'href="([^"]+)" title="Click para Acceder a la Ficha(?:\|([^"]+)|)".*?' \
|
||||
'<strong>([^"]+)</strong></a>.*?<strong>([^"]+)</strong></p>.*?' \
|
||||
'<p class="text-warning".*?\;">(.*?)</p>'
|
||||
|
||||
matches = scrapertools.find_multiple_matches(data, patron)
|
||||
|
||||
for scrapedthumbnail, scrapedurl, scrapedplot, scrapedtitle, scrapedtemp, scrapedepi in matches:
|
||||
title = scrapedtitle + " - " + scrapedtemp + " - " + scrapedepi
|
||||
url = urlparse.urljoin(URL_BASE, scrapedurl)
|
||||
temporada = scrapertools.find_single_match(scrapedtemp, "(\d+)")
|
||||
new_item = item.clone()
|
||||
new_item.contentType = "tvshow"
|
||||
url = urlparse.urljoin(host, scrapedurl)
|
||||
temporada = scrapertools.find_single_match(scrapedtemp, "\d+")
|
||||
episode = scrapertools.find_single_match(scrapedepi, "\d+")
|
||||
#item.contentType = "tvshow"
|
||||
if temporada != "":
|
||||
new_item.infoLabels['season'] = temporada
|
||||
new_item.contentType = "season"
|
||||
|
||||
logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + scrapedthumbnail + "]")
|
||||
itemlist.append(new_item.clone(action="findvideos", title=title, fulltitle=scrapedtitle, url=url,
|
||||
thumbnail=scrapedthumbnail, plot=scrapedplot, contentTitle=scrapedtitle,
|
||||
context=["buscar_trailer"], show=scrapedtitle))
|
||||
|
||||
try:
|
||||
from core import tmdb
|
||||
tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
|
||||
except:
|
||||
pass
|
||||
item.infoLabels['season'] = temporada
|
||||
#item.contentType = "season"
|
||||
if episode != "":
|
||||
item.infoLabels['episode'] = episode
|
||||
#item.contentType = "episode"
|
||||
itemlist.append(item.clone(action="findvideos", title=title, url=url,
|
||||
contentSerieName=scrapedtitle,
|
||||
context=["buscar_trailer"]))
|
||||
tmdb.set_infoLabels(itemlist)
|
||||
# Extra marca siguiente página
|
||||
next_page = scrapertools.find_single_match(data, '<a href="([^"]+)" (?:onclick="return false;" |)title='
|
||||
'"Página Siguiente"')
|
||||
if next_page != "/":
|
||||
url = urlparse.urljoin(URL_BASE, next_page)
|
||||
url = urlparse.urljoin(host, next_page)
|
||||
itemlist.append(item.clone(action="series", title=">> Siguiente", url=url, text_color=color3))
|
||||
|
||||
return itemlist
|
||||
@@ -163,10 +150,8 @@ def findvideos(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
item.text_color = color3
|
||||
|
||||
data = scrapertools.downloadpage(item.url)
|
||||
data = httptools.downloadpage(item.url).data
|
||||
data = scrapertools.decodeHtmlentities(data)
|
||||
|
||||
# Busca en la seccion descarga/torrent
|
||||
data_download = scrapertools.find_single_match(data, '<th>Episodio - Enlaces de Descarga</th>(.*?)</table>')
|
||||
patron = '<p class="item_name".*?<a href="([^"]+)".*?>([^"]+)</a>'
|
||||
@@ -178,18 +163,15 @@ def findvideos(item):
|
||||
else:
|
||||
scrapedtitle = "[Torrent] " + scrapedepi
|
||||
scrapedtitle = scrapertools.htmlclean(scrapedtitle)
|
||||
|
||||
new_item.infoLabels['episode'] = scrapertools.find_single_match(scrapedtitle, "Episodio (\d+)")
|
||||
logger.debug("title=[" + scrapedtitle + "], url=[" + scrapedurl + "]")
|
||||
itemlist.append(new_item.clone(action="play", title=scrapedtitle, url=scrapedurl, server="torrent",
|
||||
contentType="episode"))
|
||||
|
||||
# Busca en la seccion online
|
||||
data_online = scrapertools.find_single_match(data, "<th>Enlaces de Visionado Online</th>(.*?)</table>")
|
||||
patron = '<a href="([^"]+)\\n.*?src="([^"]+)".*?' \
|
||||
'title="Enlace de Visionado Online">([^"]+)</a>'
|
||||
matches = scrapertools.find_multiple_matches(data_online, patron)
|
||||
|
||||
for scrapedurl, scrapedthumb, scrapedtitle in matches:
|
||||
# Deshecha enlaces de trailers
|
||||
scrapedtitle = scrapertools.htmlclean(scrapedtitle)
|
||||
@@ -200,7 +182,6 @@ def findvideos(item):
|
||||
|
||||
new_item.infoLabels['episode'] = scrapertools.find_single_match(scrapedtitle, "Episodio (\d+)")
|
||||
itemlist.append(new_item.clone(action="play", title=title, url=scrapedurl, contentType="episode"))
|
||||
|
||||
# Comprueba si hay otras temporadas
|
||||
if not "No hay disponible ninguna Temporada adicional" in data:
|
||||
data_temp = scrapertools.find_single_match(data, '<div class="panel panel-success">(.*?)</table>')
|
||||
@@ -210,7 +191,7 @@ def findvideos(item):
|
||||
matches = scrapertools.find_multiple_matches(data_temp, patron)
|
||||
for scrapedurl, scrapedtitle in matches:
|
||||
new_item = item.clone()
|
||||
url = urlparse.urljoin(URL_BASE, scrapedurl)
|
||||
url = urlparse.urljoin(host, scrapedurl)
|
||||
scrapedtitle = scrapedtitle.capitalize()
|
||||
temporada = scrapertools.find_single_match(scrapedtitle, "Temporada (\d+)")
|
||||
if temporada != "":
|
||||
@@ -218,13 +199,7 @@ def findvideos(item):
|
||||
new_item.infoLabels['episode'] = ""
|
||||
itemlist.append(new_item.clone(action="findvideos", title=scrapedtitle, url=url, text_color="red",
|
||||
contentType="season"))
|
||||
|
||||
try:
|
||||
from core import tmdb
|
||||
tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
|
||||
except:
|
||||
pass
|
||||
|
||||
tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
|
||||
new_item = item.clone()
|
||||
if config.is_xbmc():
|
||||
new_item.contextual = True
|
||||
@@ -236,7 +211,6 @@ def findvideos(item):
|
||||
def play(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
|
||||
if item.extra == "torrent":
|
||||
itemlist.append(item.clone())
|
||||
else:
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import urlparse
|
||||
|
||||
from core import httptools
|
||||
from core import scrapertools
|
||||
from core import servertools
|
||||
@@ -12,10 +9,8 @@ from platformcode import logger
|
||||
|
||||
host = 'http://www.sipeliculas.com'
|
||||
|
||||
|
||||
def mainlist(item):
|
||||
logger.info()
|
||||
|
||||
itemlist = []
|
||||
itemlist.append(item.clone(title="Novedades", action="lista", url=host + "/cartelera/"))
|
||||
itemlist.append(item.clone(title="Actualizadas", action="lista", url=host + "/peliculas-actualizadas/"))
|
||||
@@ -24,7 +19,6 @@ def mainlist(item):
|
||||
itemlist.append(item.clone(title="Año", action="menuseccion", url=host, extra="/estrenos-gratis/"))
|
||||
itemlist.append(item.clone(title="Alfabetico", action="alfabetica", url=host + '/mirar/'))
|
||||
itemlist.append(item.clone(title="Buscar", action="search", url=host + "/ver/"))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
@@ -33,7 +27,6 @@ def alfabetica(item):
|
||||
itemlist = []
|
||||
for letra in "1abcdefghijklmnopqrstuvwxyz":
|
||||
itemlist.append(item.clone(title=letra.upper(), url=item.url + letra, action="lista"))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
@@ -42,7 +35,6 @@ def menuseccion(item):
|
||||
itemlist = []
|
||||
seccion = item.extra
|
||||
data = httptools.downloadpage(item.url).data
|
||||
|
||||
if seccion == '/online/':
|
||||
data = scrapertools.find_single_match(data,
|
||||
'<h2 class="[^"]+"><i class="[^"]+"></i>Películas por géneros<u class="[^"]+"></u></h2>(.*?)<ul class="abc">')
|
||||
@@ -50,8 +42,7 @@ def menuseccion(item):
|
||||
elif seccion == '/estrenos-gratis/':
|
||||
data = scrapertools.find_single_match(data, '<ul class="lista-anio" id="lista-anio">(.*?)</ul>')
|
||||
patron = '<li ><a href="([^"]+)" title="[^"]+">([^<]+)</a></li>'
|
||||
|
||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||
matches = scrapertools.find_multiple_matches(data, patron)
|
||||
for scrapedurl, extra in matches:
|
||||
itemlist.append(Item(channel=item.channel, action='lista', title=extra, url=scrapedurl))
|
||||
return itemlist
|
||||
@@ -64,22 +55,19 @@ def lista(item):
|
||||
listado = scrapertools.find_single_match(data,
|
||||
'<div id="sipeliculas" class="borde"><div class="izquierda">(.*?)<div class="derecha"><h2')
|
||||
patron = '<a class="i" href="(.*?)".*?src="(.*?)".*?title=.*?>(.*?)<.*?span>(.*?)<.*?<p><span>(.*?)<'
|
||||
|
||||
matches = re.compile(patron, re.DOTALL).findall(listado)
|
||||
|
||||
matches = scrapertools.find_multiple_matches(listado, patron)
|
||||
for scrapedurl, scrapedthumbnail, scrapedtitle, year, plot in matches:
|
||||
itemlist.append(Item(channel=item.channel, action='findvideos', title=scrapedtitle, url=scrapedurl,
|
||||
thumbnail=scrapedthumbnail, plot=plot, contentTitle=scrapedtitle, extra=item.extra,
|
||||
itemlist.append(Item(channel=item.channel, action='findvideos', title=scrapedtitle + " (%s)" %year, url=scrapedurl,
|
||||
thumbnail=scrapedthumbnail, contentTitle=scrapedtitle, extra=item.extra,
|
||||
infoLabels ={'year':year}))
|
||||
|
||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||
# Paginacion
|
||||
if itemlist != []:
|
||||
patron = '<li[^<]+<a href="([^"]+)" title="[^"]+">Siguiente[^<]+</a></li>'
|
||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||
matches = scrapertools.find_multiple_matches(data, patron)
|
||||
if matches:
|
||||
itemlist.append(
|
||||
item.clone(title="Pagina Siguiente", action='lista', url=urlparse.urljoin(host, matches[0])))
|
||||
item.clone(title="Pagina Siguiente", action='lista', url=host + "/" + matches[0]))
|
||||
return itemlist
|
||||
|
||||
|
||||
@@ -97,11 +85,10 @@ def findvideos(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
data = httptools.downloadpage(item.url).data
|
||||
|
||||
listado1 = scrapertools.find_single_match(data,
|
||||
'<div class="links" id="ver-mas-opciones"><h2 class="h2"><i class="[^"]+"></i>[^<]+</h2><ul class="opciones">(.*?)</ul>')
|
||||
patron1 = '<li ><a id="([^"]+)" rel="nofollow" href="([^"]+)" title="[^"]+" alt="([^"]+)"><span class="opcion"><i class="[^"]+"></i><u>[^<]+</u>[^<]+</span><span class="ico"><img src="[^"]+" alt="[^"]+"/>[^<]+</span><span>([^"]+)</span><span>([^"]+)</span></a></li>'
|
||||
matches = matches = re.compile(patron1, re.DOTALL).findall(listado1)
|
||||
matches = matches = scrapertools.find_multiple_matches(listado1, patron1)
|
||||
for vidId, vidUrl, vidServer, language, quality in matches:
|
||||
server = servertools.get_server_name(vidServer)
|
||||
if 'Sub' in language:
|
||||
@@ -109,39 +96,32 @@ def findvideos(item):
|
||||
itemlist.append(Item(channel=item.channel, action='play', url=vidUrl, extra=vidId,
|
||||
title='Ver en ' + vidServer + ' | ' + language + ' | ' + quality,
|
||||
thumbnail=item.thumbnail, server=server, language=language, quality=quality ))
|
||||
|
||||
listado2 = scrapertools.find_single_match(data, '<ul class="opciones-tab">(.*?)</ul>')
|
||||
patron2 = '<li ><a id="([^"]+)" rel="nofollow" href="([^"]+)" title="[^"]+" alt="([^"]+)"><img src="[^"]+" alt="[^"]+"/>[^<]+</a></li>'
|
||||
matches = matches = re.compile(patron2, re.DOTALL).findall(listado2)
|
||||
matches = matches = scrapertools.find_multiple_matches(listado2, patron2)
|
||||
for vidId, vidUrl, vidServer in matches:
|
||||
server = servertools.get_server_name(vidServer)
|
||||
itemlist.append(Item(channel=item.channel, action='play', url=vidUrl, extra=vidId, title='Ver en ' + vidServer,
|
||||
thumbnail=item.thumbnail, server=server))
|
||||
|
||||
for videoitem in itemlist:
|
||||
videoitem.fulltitle = item.title
|
||||
videoitem.folder = False
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
def play(item):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
|
||||
video = httptools.downloadpage(host + '/ajax.public.php', 'acc=ver_opc&f=' + item.extra).data
|
||||
logger.info("video=" + video)
|
||||
enlaces = servertools.findvideos(video)
|
||||
if enlaces:
|
||||
logger.info("server=" + enlaces[0][2])
|
||||
thumbnail = servertools.guess_server_thumbnail(video)
|
||||
# Añade al listado de XBMC
|
||||
data = httptools.downloadpage(item.url).data
|
||||
video = scrapertools.find_single_match(data, '</div><iframe src="([^"]+)')
|
||||
if video:
|
||||
itemlist.append(
|
||||
Item(channel=item.channel, action="play", title=item.title, fulltitle=item.fulltitle, url=enlaces[0][1],
|
||||
server=enlaces[0][2], thumbnail=thumbnail, folder=False))
|
||||
|
||||
item.clone(action="play", url=video, folder=False, server=""))
|
||||
itemlist = servertools.get_servers_itemlist(itemlist)
|
||||
itemlist[0].thumbnail = item.contentThumbnail
|
||||
return itemlist
|
||||
|
||||
|
||||
def newest(categoria):
|
||||
logger.info()
|
||||
itemlist = []
|
||||
@@ -155,16 +135,13 @@ def newest(categoria):
|
||||
item.url = host + "/online/terror/"
|
||||
else:
|
||||
return []
|
||||
|
||||
itemlist = lista(item)
|
||||
if itemlist[-1].title == "» Siguiente »":
|
||||
itemlist.pop()
|
||||
|
||||
# Se captura la excepción, para no interrumpir al canal novedades si un canal falla
|
||||
except:
|
||||
import sys
|
||||
for line in sys.exc_info():
|
||||
logger.error("{0}".format(line))
|
||||
return []
|
||||
|
||||
return itemlist
|
||||
|
||||
@@ -620,7 +620,7 @@ def play(item):
|
||||
data['a']['tt']) + \
|
||||
"&mm=" + data['a']['mm'] + "&bb=" + data['a']['bb']
|
||||
|
||||
url += "|User-Agent=Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Mobile Safari/537.36"
|
||||
url += "|User-Agent=%s" % httptools.get_user_agent
|
||||
|
||||
itemlist.append(item.clone(action="play", server="directo", url=url, folder=False))
|
||||
|
||||
|
||||
14
plugin.video.alfa/channels/ultrapeliculashd.json
Executable file → Normal file
14
plugin.video.alfa/channels/ultrapeliculashd.json
Executable file → Normal file
@@ -19,6 +19,20 @@
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "filter_languages",
|
||||
"type": "list",
|
||||
"label": "Mostrar enlaces en idioma...",
|
||||
"default": 0,
|
||||
"enabled": true,
|
||||
"visible": true,
|
||||
"lvalues": [
|
||||
"No filtrar",
|
||||
"LAT",
|
||||
"CAST",
|
||||
"VOSE"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "include_in_newest_latino",
|
||||
"type": "bool",
|
||||
|
||||
@@ -8,6 +8,7 @@ from core import servertools
|
||||
from core import jsontools
|
||||
from core import tmdb
|
||||
from core.item import Item
|
||||
from channels import filtertools, autoplay
|
||||
from platformcode import config, logger
|
||||
|
||||
host = 'http://www.ultrapeliculashd.com'
|
||||
@@ -63,39 +64,51 @@ tcalidad = {'1080P': 'https://s21.postimg.cc/4h1s0t1wn/hd1080.png',
|
||||
'720P': 'https://s12.postimg.cc/lthu7v4q5/hd720.png', "HD": "https://s27.postimg.cc/m2dhhkrur/image.png"}
|
||||
|
||||
|
||||
IDIOMAS = {'Latino': 'LAT', 'Español': 'CAST', 'SUB':'VOSE'}
|
||||
list_language = IDIOMAS.values()
|
||||
list_quality = ['default', '1080p']
|
||||
list_servers = ['openload','directo']
|
||||
|
||||
__comprueba_enlaces__ = config.get_setting('comprueba_enlaces', 'ultrapeliculashd')
|
||||
__comprueba_enlaces_num__ = config.get_setting('comprueba_enlaces_num', 'ultrapeliculashd')
|
||||
|
||||
def mainlist(item):
|
||||
logger.info()
|
||||
|
||||
autoplay.init(item.channel, list_servers, list_quality)
|
||||
|
||||
itemlist = []
|
||||
|
||||
itemlist.append(item.clone(title="Todas",
|
||||
action="lista",
|
||||
thumbnail='https://s18.postimg.cc/fwvaeo6qh/todas.png',
|
||||
fanart='https://s18.postimg.cc/fwvaeo6qh/todas.png',
|
||||
url=host + '/movies/'
|
||||
))
|
||||
itemlist.append(Item(channel=item.channel, title="Todas",
|
||||
action="lista",
|
||||
thumbnail='https://s18.postimg.cc/fwvaeo6qh/todas.png',
|
||||
fanart='https://s18.postimg.cc/fwvaeo6qh/todas.png',
|
||||
url=host + '/movies/'
|
||||
))
|
||||
|
||||
itemlist.append(item.clone(title="Generos",
|
||||
action="generos",
|
||||
url=host,
|
||||
thumbnail='https://s3.postimg.cc/5s9jg2wtf/generos.png',
|
||||
fanart='https://s3.postimg.cc/5s9jg2wtf/generos.png'
|
||||
))
|
||||
itemlist.append(Item(channel=item.channel, title="Generos",
|
||||
action="generos",
|
||||
url=host,
|
||||
thumbnail='https://s3.postimg.cc/5s9jg2wtf/generos.png',
|
||||
fanart='https://s3.postimg.cc/5s9jg2wtf/generos.png'
|
||||
))
|
||||
|
||||
itemlist.append(item.clone(title="Alfabetico",
|
||||
action="seccion",
|
||||
url=host,
|
||||
thumbnail='https://s17.postimg.cc/fwi1y99en/a-z.png',
|
||||
fanart='https://s17.postimg.cc/fwi1y99en/a-z.png',
|
||||
extra='alfabetico'
|
||||
))
|
||||
itemlist.append(Item(channel=item.channel, title="Alfabetico",
|
||||
action="seccion",
|
||||
url=host,
|
||||
thumbnail='https://s17.postimg.cc/fwi1y99en/a-z.png',
|
||||
fanart='https://s17.postimg.cc/fwi1y99en/a-z.png',
|
||||
extra='alfabetico'
|
||||
))
|
||||
|
||||
itemlist.append(item.clone(title="Buscar",
|
||||
action="search",
|
||||
url=host + '/?s=',
|
||||
thumbnail='https://s30.postimg.cc/pei7txpa9/buscar.png',
|
||||
fanart='https://s30.postimg.cc/pei7txpa9/buscar.png'
|
||||
))
|
||||
itemlist.append(Item(channel=item.channel, title="Buscar",
|
||||
action="search",
|
||||
url=host + '/?s=',
|
||||
thumbnail='https://s30.postimg.cc/pei7txpa9/buscar.png',
|
||||
fanart='https://s30.postimg.cc/pei7txpa9/buscar.png'
|
||||
))
|
||||
|
||||
autoplay.show_option(item.channel, itemlist)
|
||||
|
||||
return itemlist
|
||||
|
||||
@@ -160,13 +173,13 @@ def generos(item):
|
||||
title = scrapedtitle
|
||||
url = scrapedurl
|
||||
if scrapedtitle not in ['PRÓXIMAMENTE', 'EN CINE']:
|
||||
itemlist.append(item.clone(action="lista",
|
||||
title=title,
|
||||
fulltitle=item.title,
|
||||
url=url,
|
||||
thumbnail=thumbnail,
|
||||
fanart=fanart
|
||||
))
|
||||
itemlist.append(Item(channel=item.channel, action="lista",
|
||||
title=title,
|
||||
fulltitle=item.title,
|
||||
url=url,
|
||||
thumbnail=thumbnail,
|
||||
fanart=fanart
|
||||
))
|
||||
return itemlist
|
||||
|
||||
|
||||
@@ -209,15 +222,33 @@ def alpha(item):
|
||||
|
||||
|
||||
def findvideos(item):
|
||||
from lib import jsunpack
|
||||
logger.info()
|
||||
itemlist = []
|
||||
data = httptools.downloadpage(item.url).data
|
||||
data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data)
|
||||
patron = '<iframe.*?rptss src=(.*?) (?:width.*?|frameborder.*?) allowfullscreen><\/iframe>'
|
||||
patron = '<div id=(option.*?) class=play.*?<iframe.*?'
|
||||
patron += 'rptss src=(.*?) (?:width.*?|frameborder.*?) allowfullscreen><\/iframe>'
|
||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||
|
||||
for video_url in matches:
|
||||
if 'stream' in video_url and 'streamango' not in video_url:
|
||||
for option, video_url in matches:
|
||||
language = scrapertools.find_single_match(data, '#%s>.*?-->(.*?)(?:\s|<)' % option)
|
||||
if 'sub' in language.lower():
|
||||
language = 'SUB'
|
||||
language = IDIOMAS[language]
|
||||
if 'ultrapeliculashd' in video_url:
|
||||
new_data = httptools.downloadpage(video_url).data
|
||||
new_data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", new_data)
|
||||
if 'drive' not in video_url:
|
||||
quality= '1080p'
|
||||
packed = scrapertools.find_single_match(new_data, '<script>(eval\(.*?)eval')
|
||||
unpacked = jsunpack.unpack(packed)
|
||||
url = scrapertools.find_single_match(unpacked, 'file:(http.?:.*?)\}')
|
||||
else:
|
||||
quality= '1080p'
|
||||
url = scrapertools.find_single_match(new_data, '</div><iframe src=([^\s]+) webkitallowfullscreen')
|
||||
|
||||
elif 'stream' in video_url and 'streamango' not in video_url:
|
||||
data = httptools.downloadpage('https:'+video_url).data
|
||||
if not 'iframe' in video_url:
|
||||
new_url=scrapertools.find_single_match(data, 'iframe src="(.*?)"')
|
||||
@@ -233,26 +264,42 @@ def findvideos(item):
|
||||
url = url.replace('download', 'preview')+headers_string
|
||||
|
||||
sub = scrapertools.find_single_match(new_data, 'file:.*?"(.*?srt)"')
|
||||
new_item = (Item(title=item.title, url=url, quality=quality, subtitle=sub, server='directo'))
|
||||
new_item = (Item(title=item.title, url=url, quality=quality, subtitle=sub, server='directo',
|
||||
language = language))
|
||||
itemlist.append(new_item)
|
||||
|
||||
else:
|
||||
itemlist.extend(servertools.find_video_items(data=video_url))
|
||||
url = video_url
|
||||
quality = 'default'
|
||||
|
||||
for videoitem in itemlist:
|
||||
videoitem.channel = item.channel
|
||||
videoitem.action = 'play'
|
||||
videoitem.thumbnail = item.thumbnail
|
||||
videoitem.infoLabels = item.infoLabels
|
||||
videoitem.title = item.contentTitle + ' (' + videoitem.server + ')'
|
||||
if 'youtube' in videoitem.url:
|
||||
videoitem.title = '[COLOR orange]Trailer en Youtube[/COLOR]'
|
||||
if not config.get_setting("unify"):
|
||||
title = ' [%s] [%s]' % (quality, language)
|
||||
else:
|
||||
title = ''
|
||||
|
||||
itemlist = servertools.get_servers_itemlist(itemlist)
|
||||
new_item = (Item(channel=item.channel, title='%s'+title, url=url, action='play', quality=quality,
|
||||
language=language, infoLabels=item.infoLabels))
|
||||
itemlist.append(new_item)
|
||||
|
||||
|
||||
itemlist = servertools.get_servers_itemlist(itemlist, lambda i: i.title % i.server.capitalize())
|
||||
|
||||
if __comprueba_enlaces__:
|
||||
itemlist = servertools.check_list_links(itemlist, __comprueba_enlaces_num__)
|
||||
|
||||
# Requerido para FilterTools
|
||||
|
||||
itemlist = filtertools.get_links(itemlist, item, list_language)
|
||||
|
||||
# Requerido para AutoPlay
|
||||
|
||||
autoplay.start(itemlist, item)
|
||||
|
||||
if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos':
|
||||
itemlist.append(
|
||||
Item(channel=item.channel, title='[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]', url=item.url,
|
||||
action="add_pelicula_to_library", extra="findvideos", contentTitle=item.contentTitle))
|
||||
|
||||
return itemlist
|
||||
|
||||
|
||||
|
||||
@@ -221,7 +221,7 @@ def findvideos(item):
|
||||
language = ''
|
||||
if 'latino' in link.lower():
|
||||
language='Latino'
|
||||
elif 'español' in link.lower():
|
||||
elif 'espaÑol' in link.lower():
|
||||
language = 'Español'
|
||||
elif 'subtitulado' in link.lower():
|
||||
language = 'VOSE'
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"id": "zentorrents",
|
||||
"name": "Zentorrent",
|
||||
"active": false,
|
||||
"adult": false,
|
||||
"language": ["cast"],
|
||||
"banner": "zentorrents.png",
|
||||
"thumbnail": "http://s6.postimg.cc/9zv90yjip/zentorrentlogo.jpg",
|
||||
"categories": [
|
||||
"torrent",
|
||||
"movie",
|
||||
"tvshow"
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"id": "include_in_global_search",
|
||||
"type": "bool",
|
||||
"label": "Incluir en busqueda global",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
}
|
||||
]
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,28 +5,59 @@
|
||||
"adult": false,
|
||||
"language": ["cast", "lat"],
|
||||
"banner": "",
|
||||
"thumbnail": "https://zonatorrent.org/wp-content/uploads/2017/04/zonatorrent-New-Logo.png",
|
||||
"thumbnail": "zonatorrent.png",
|
||||
"version": 1,
|
||||
"categories": [
|
||||
"torrent",
|
||||
"movie"
|
||||
"torrent",
|
||||
"movie",
|
||||
"tvshow",
|
||||
"vos"
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"id": "include_in_global_search",
|
||||
"type": "bool",
|
||||
"label": "Incluir en busqueda global",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "modo_grafico",
|
||||
"type": "bool",
|
||||
"label": "Buscar información extra",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
{
|
||||
"id": "include_in_global_search",
|
||||
"type": "bool",
|
||||
"label": "Incluir en busqueda global",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "modo_grafico",
|
||||
"type": "bool",
|
||||
"label": "Buscar información extra",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "timeout_downloadpage",
|
||||
"type": "list",
|
||||
"label": "Timeout (segs.) en descarga de páginas o verificación de servidores",
|
||||
"default": 5,
|
||||
"enabled": true,
|
||||
"visible": true,
|
||||
"lvalues": [
|
||||
"None",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5",
|
||||
"6",
|
||||
"7",
|
||||
"8",
|
||||
"9",
|
||||
"10"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "seleccionar_ult_temporadda_activa",
|
||||
"type": "bool",
|
||||
"label": "Seleccionar para Videoteca si estará activa solo la última Temporada",
|
||||
"default": true,
|
||||
"enabled": true,
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"id": "include_in_newest_peliculas",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -24,9 +24,9 @@ def getmainlist(view="thumb_"):
|
||||
thumbnail=get_thumb("channels.png", view), view=view,
|
||||
category=config.get_localized_string(30119), viewmode="thumbnails"))
|
||||
|
||||
itemlist.append(Item(title='Mis enlaces', channel="alfavorites", action="mainlist",
|
||||
thumbnail=get_thumb("favorites.png", view), view=view,
|
||||
category='Mis enlaces', viewmode="thumbnails"))
|
||||
itemlist.append(Item(title=config.get_localized_string(70527), channel="alfavorites", action="mainlist",
|
||||
thumbnail=get_thumb("mylink.png", view), view=view,
|
||||
category=config.get_localized_string(70527), viewmode="thumbnails"))
|
||||
|
||||
itemlist.append(Item(title=config.get_localized_string(30103), channel="search", action="mainlist",
|
||||
thumbnail=get_thumb("search.png", view),
|
||||
@@ -197,7 +197,7 @@ def filterchannels(category, view="thumb_"):
|
||||
thumbnail=channel_parameters["thumbnail"], type="generic", viewmode="list"))
|
||||
|
||||
if category in ['movie', 'tvshow']:
|
||||
titles = [config.get_localized_string(70028), config.get_localized_string(30985), config.get_localized_string(70527), config.get_localized_string(60264), config.get_localized_string(70528)]
|
||||
titles = [config.get_localized_string(70028), config.get_localized_string(30985), config.get_localized_string(70559), config.get_localized_string(60264), config.get_localized_string(70560)]
|
||||
ids = ['popular', 'top_rated', 'now_playing', 'on_the_air']
|
||||
for x in range(0,3):
|
||||
if x == 2 and category != 'movie':
|
||||
@@ -267,4 +267,4 @@ def set_channel_info(parameters):
|
||||
content = config.get_localized_category(cat)
|
||||
|
||||
info = '[COLOR yellow]Tipo de contenido:[/COLOR] %s\n\n[COLOR yellow]Idiomas:[/COLOR] %s' % (content, language)
|
||||
return info
|
||||
return info
|
||||
|
||||
3
plugin.video.alfa/core/httptools.py
Executable file → Normal file
3
plugin.video.alfa/core/httptools.py
Executable file → Normal file
@@ -56,6 +56,9 @@ default_headers["Accept-Encoding"] = "gzip"
|
||||
HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT = config.get_setting('httptools_timeout', default=15)
|
||||
if HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT == 0: HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT = None
|
||||
|
||||
def get_user_agent():
|
||||
# Devuelve el user agent global para ser utilizado cuando es necesario para la url.
|
||||
return default_headers["User-Agent"]
|
||||
|
||||
def get_url_headers(url):
|
||||
domain_cookies = cj._cookies.get("." + urlparse.urlparse(url)[1], {}).get("/", {})
|
||||
|
||||
@@ -319,7 +319,7 @@ def set_infoLabels_item(item, seekTmdb=True, idioma_busqueda='es', lock=None):
|
||||
|
||||
__leer_datos(otmdb_global)
|
||||
|
||||
if lock:
|
||||
if lock and lock.locked():
|
||||
lock.release()
|
||||
|
||||
if item.infoLabels['episode']:
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
# ------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
import urllib
|
||||
import urlparse
|
||||
@@ -236,8 +237,7 @@ def post_tmdb_listado(item, itemlist):
|
||||
del item.channel_alt
|
||||
if item.url_alt:
|
||||
del item.url_alt
|
||||
if item.extra2:
|
||||
del item.extra2
|
||||
|
||||
#Ajustamos el nombre de la categoría
|
||||
if not item.category_new:
|
||||
item.category_new = ''
|
||||
@@ -389,8 +389,8 @@ def post_tmdb_listado(item, itemlist):
|
||||
if item_local.infoLabels['episodio_titulo']:
|
||||
item_local.infoLabels['episodio_titulo'] = item_local.infoLabels['episodio_titulo'].replace(" []", "").strip()
|
||||
title = title.replace("--", "").replace(" []", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
|
||||
title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', title).strip()
|
||||
title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', title).strip()
|
||||
title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', title).strip()
|
||||
title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', title).strip()
|
||||
|
||||
if item.category_new == "newest": #Viene de Novedades. Marcamos el título con el nombre del canal
|
||||
title += ' -%s-' % scrapertools.find_single_match(item_local.url, 'http.?\:\/\/(?:www.)?(\w+)\.\w+\/').capitalize()
|
||||
@@ -766,6 +766,7 @@ def post_tmdb_episodios(item, itemlist):
|
||||
#Si no está el título del episodio, pero sí está en "title", lo rescatamos
|
||||
if not item_local.infoLabels['episodio_titulo'] and item_local.infoLabels['title'].lower() != item_local.infoLabels['tvshowtitle'].lower():
|
||||
item_local.infoLabels['episodio_titulo'] = item_local.infoLabels['title']
|
||||
item_local.infoLabels['episodio_titulo'] = item_local.infoLabels['episodio_titulo'].replace('GB', 'G B').replace('MB', 'M B')
|
||||
|
||||
#Preparamos el título para que sea compatible con Añadir Serie a Videoteca
|
||||
if "Temporada" in item_local.title: #Compatibilizamos "Temporada" con Unify
|
||||
@@ -792,8 +793,8 @@ def post_tmdb_episodios(item, itemlist):
|
||||
item_local.infoLabels['episodio_titulo'] = item_local.infoLabels['episodio_titulo'].replace(" []", "").strip()
|
||||
item_local.infoLabels['title'] = item_local.infoLabels['title'].replace(" []", "").strip()
|
||||
item_local.title = item_local.title.replace(" []", "").strip()
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip()
|
||||
item_local.title = re.sub(r'\s\[COLOR \w+\]-\[\/COLOR\]', '', item_local.title).strip()
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?-?\s?\]?\]\[\/COLOR\]', '', item_local.title).strip()
|
||||
item_local.title = re.sub(r'\s?\[COLOR \w+\]-?\s?\[\/COLOR\]', '', item_local.title).strip()
|
||||
|
||||
#Si la información de num. total de episodios de TMDB no es correcta, tratamos de calcularla
|
||||
if num_episodios < item_local.contentEpisodeNumber:
|
||||
@@ -1054,8 +1055,8 @@ def post_tmdb_findvideos(item, itemlist):
|
||||
title_gen = item.title
|
||||
|
||||
#Limpiamos etiquetas vacías
|
||||
title_gen = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', title_gen).strip() #Quitamos etiquetas vacías
|
||||
title_gen = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', title_gen).strip() #Quitamos colores vacíos
|
||||
title_gen = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', title_gen).strip() #Quitamos etiquetas vacías
|
||||
title_gen = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', title_gen).strip() #Quitamos colores vacíos
|
||||
title_gen = title_gen.replace(" []", "").strip() #Quitamos etiquetas vacías
|
||||
title_videoteca = title_gen #Salvamos el título para Videoteca
|
||||
|
||||
@@ -1103,7 +1104,131 @@ def post_tmdb_findvideos(item, itemlist):
|
||||
|
||||
return (item, itemlist)
|
||||
|
||||
|
||||
def get_torrent_size(url):
|
||||
logger.info()
|
||||
|
||||
"""
|
||||
|
||||
Módulo extraido del antiguo canal ZenTorrent
|
||||
|
||||
Calcula el tamaño de los archivos que contienen un .torrent. Descarga el archivo .torrent en una carpeta,
|
||||
lo lee y descodifica. Si contiene múltiples archivos, suma el tamaño de todos ellos
|
||||
|
||||
Llamada: generictools.get_torrent_size(url)
|
||||
Entrada: url: url del archivo .torrent
|
||||
Salida: size: str con el tamaño y tipo de medida ( MB, GB, etc)
|
||||
|
||||
"""
|
||||
|
||||
def convert_size(size):
|
||||
import math
|
||||
if (size == 0):
|
||||
return '0B'
|
||||
size_name = ("B", "KB", "M B", "G B", "TB", "PB", "EB", "ZB", "YB")
|
||||
i = int(math.floor(math.log(size, 1024)))
|
||||
p = math.pow(1024, i)
|
||||
s = round(size / p, 2)
|
||||
return '%s %s' % (s, size_name[i])
|
||||
|
||||
def decode(text):
|
||||
try:
|
||||
src = tokenize(text)
|
||||
data = decode_item(src.next, src.next())
|
||||
for token in src: # look for more tokens
|
||||
raise SyntaxError("trailing junk")
|
||||
except (AttributeError, ValueError, StopIteration):
|
||||
try:
|
||||
data = data
|
||||
except:
|
||||
data = src
|
||||
|
||||
return data
|
||||
|
||||
def tokenize(text, match=re.compile("([idel])|(\d+):|(-?\d+)").match):
|
||||
i = 0
|
||||
while i < len(text):
|
||||
m = match(text, i)
|
||||
s = m.group(m.lastindex)
|
||||
i = m.end()
|
||||
if m.lastindex == 2:
|
||||
yield "s"
|
||||
yield text[i:i + int(s)]
|
||||
i = i + int(s)
|
||||
else:
|
||||
yield s
|
||||
|
||||
def decode_item(next, token):
|
||||
if token == "i":
|
||||
# integer: "i" value "e"
|
||||
data = int(next())
|
||||
if next() != "e":
|
||||
raise ValueError
|
||||
elif token == "s":
|
||||
# string: "s" value (virtual tokens)
|
||||
data = next()
|
||||
elif token == "l" or token == "d":
|
||||
# container: "l" (or "d") values "e"
|
||||
data = []
|
||||
tok = next()
|
||||
while tok != "e":
|
||||
data.append(decode_item(next, tok))
|
||||
tok = next()
|
||||
if token == "d":
|
||||
data = dict(zip(data[0::2], data[1::2]))
|
||||
else:
|
||||
raise ValueError
|
||||
return data
|
||||
|
||||
|
||||
#Móludo principal
|
||||
size = ""
|
||||
try:
|
||||
torrents_path = config.get_videolibrary_path() + '/torrents' #path para dejar el .torrent
|
||||
|
||||
if not os.path.exists(torrents_path):
|
||||
os.mkdir(torrents_path) #si no está la carpeta la creamos
|
||||
|
||||
urllib.URLopener.version = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 SE 2.X MetaSr 1.0'
|
||||
urllib.urlretrieve(url, torrents_path + "/generictools.torrent") #desacargamos el .torrent a la carpeta
|
||||
torrent_file = open(torrents_path + "/generictools.torrent", "rb").read() #leemos el .torrent
|
||||
|
||||
if "used CloudFlare" in torrent_file: #Si tiene CloudFlare, usamos este proceso
|
||||
try:
|
||||
urllib.urlretrieve("http://anonymouse.org/cgi-bin/anon-www.cgi/" + url.strip(),
|
||||
torrents_path + "/generictools.torrent")
|
||||
torrent_file = open(torrents_path + "/generictools.torrent", "rb").read()
|
||||
except:
|
||||
torrent_file = ""
|
||||
|
||||
torrent = decode(torrent_file) #decodificamos el .torrent
|
||||
|
||||
#si sólo tiene un archivo, tomamos la longitud y la convertimos a una unidad legible, si no dará error
|
||||
try:
|
||||
sizet = torrent["info"]['length']
|
||||
size = convert_size(sizet)
|
||||
except:
|
||||
pass
|
||||
|
||||
#si tiene múltiples archivos sumamos la longitud de todos
|
||||
if not size:
|
||||
check_video = scrapertools.find_multiple_matches(str(torrent["info"]["files"]), "'length': (\d+)}")
|
||||
sizet = sum([int(i) for i in check_video])
|
||||
size = convert_size(sizet)
|
||||
|
||||
except:
|
||||
logger.error('ERROR al buscar el tamaño de un .Torrent: ' + url)
|
||||
|
||||
try:
|
||||
os.remove(torrents_path + "/generictools.torrent") #borramos el .torrent
|
||||
except:
|
||||
pass
|
||||
|
||||
#logger.debug(url + ' / ' + size)
|
||||
|
||||
return size
|
||||
|
||||
|
||||
def get_field_from_kodi_DB(item, from_fields='*', files='file'):
|
||||
logger.info()
|
||||
"""
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
__all__ = [
|
||||
'AbstractBasicAuthHandler',
|
||||
'AbstractDigestAuthHandler',
|
||||
'BaseHandler',
|
||||
'Browser',
|
||||
'BrowserStateError',
|
||||
'CacheFTPHandler',
|
||||
'ContentTooShortError',
|
||||
'Cookie',
|
||||
'CookieJar',
|
||||
'CookiePolicy',
|
||||
'DefaultCookiePolicy',
|
||||
'DefaultFactory',
|
||||
'FTPHandler',
|
||||
'Factory',
|
||||
'FileCookieJar',
|
||||
'FileHandler',
|
||||
'FormNotFoundError',
|
||||
'FormsFactory',
|
||||
'HTTPBasicAuthHandler',
|
||||
'HTTPCookieProcessor',
|
||||
'HTTPDefaultErrorHandler',
|
||||
'HTTPDigestAuthHandler',
|
||||
'HTTPEquivProcessor',
|
||||
'HTTPError',
|
||||
'HTTPErrorProcessor',
|
||||
'HTTPHandler',
|
||||
'HTTPPasswordMgr',
|
||||
'HTTPPasswordMgrWithDefaultRealm',
|
||||
'HTTPProxyPasswordMgr',
|
||||
'HTTPRedirectDebugProcessor',
|
||||
'HTTPRedirectHandler',
|
||||
'HTTPRefererProcessor',
|
||||
'HTTPRefreshProcessor',
|
||||
'HTTPResponseDebugProcessor',
|
||||
'HTTPRobotRulesProcessor',
|
||||
'HTTPSClientCertMgr',
|
||||
'HeadParser',
|
||||
'History',
|
||||
'LWPCookieJar',
|
||||
'Link',
|
||||
'LinkNotFoundError',
|
||||
'LinksFactory',
|
||||
'LoadError',
|
||||
'MSIECookieJar',
|
||||
'MozillaCookieJar',
|
||||
'OpenerDirector',
|
||||
'OpenerFactory',
|
||||
'ParseError',
|
||||
'ProxyBasicAuthHandler',
|
||||
'ProxyDigestAuthHandler',
|
||||
'ProxyHandler',
|
||||
'Request',
|
||||
'RobotExclusionError',
|
||||
'RobustFactory',
|
||||
'RobustFormsFactory',
|
||||
'RobustLinksFactory',
|
||||
'RobustTitleFactory',
|
||||
'SeekableResponseOpener',
|
||||
'TitleFactory',
|
||||
'URLError',
|
||||
'USE_BARE_EXCEPT',
|
||||
'UnknownHandler',
|
||||
'UserAgent',
|
||||
'UserAgentBase',
|
||||
'XHTMLCompatibleHeadParser',
|
||||
'__version__',
|
||||
'build_opener',
|
||||
'install_opener',
|
||||
'lwp_cookie_str',
|
||||
'make_response',
|
||||
'request_host',
|
||||
'response_seek_wrapper', # XXX deprecate in public interface?
|
||||
'seek_wrapped_response', # XXX should probably use this internally in place of response_seek_wrapper()
|
||||
'str2time',
|
||||
'urlopen',
|
||||
'urlretrieve',
|
||||
'urljoin',
|
||||
|
||||
# ClientForm API
|
||||
'AmbiguityError',
|
||||
'ControlNotFoundError',
|
||||
'FormParser',
|
||||
'ItemCountError',
|
||||
'ItemNotFoundError',
|
||||
'LocateError',
|
||||
'Missing',
|
||||
'ParseFile',
|
||||
'ParseFileEx',
|
||||
'ParseResponse',
|
||||
'ParseResponseEx',
|
||||
'ParseString',
|
||||
'XHTMLCompatibleFormParser',
|
||||
# deprecated
|
||||
'CheckboxControl',
|
||||
'Control',
|
||||
'FileControl',
|
||||
'HTMLForm',
|
||||
'HiddenControl',
|
||||
'IgnoreControl',
|
||||
'ImageControl',
|
||||
'IsindexControl',
|
||||
'Item',
|
||||
'Label',
|
||||
'ListControl',
|
||||
'PasswordControl',
|
||||
'RadioControl',
|
||||
'ScalarControl',
|
||||
'SelectControl',
|
||||
'SubmitButtonControl',
|
||||
'SubmitControl',
|
||||
'TextControl',
|
||||
'TextareaControl',
|
||||
]
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from _version import __version__
|
||||
|
||||
# high-level stateful browser-style interface
|
||||
from _mechanize import \
|
||||
Browser, History, \
|
||||
BrowserStateError, LinkNotFoundError, FormNotFoundError
|
||||
|
||||
# configurable URL-opener interface
|
||||
from _useragent import UserAgentBase, UserAgent
|
||||
from _html import \
|
||||
Link, \
|
||||
Factory, DefaultFactory, RobustFactory, \
|
||||
FormsFactory, LinksFactory, TitleFactory, \
|
||||
RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
|
||||
|
||||
# urllib2 work-alike interface. This is a superset of the urllib2 interface.
|
||||
from _urllib2 import *
|
||||
import _urllib2
|
||||
if hasattr(_urllib2, "HTTPSHandler"):
|
||||
__all__.append("HTTPSHandler")
|
||||
del _urllib2
|
||||
|
||||
# misc
|
||||
from _http import HeadParser
|
||||
from _http import XHTMLCompatibleHeadParser
|
||||
from _opener import ContentTooShortError, OpenerFactory, urlretrieve
|
||||
from _response import \
|
||||
response_seek_wrapper, seek_wrapped_response, make_response
|
||||
from _rfc3986 import urljoin
|
||||
from _util import http2time as str2time
|
||||
|
||||
# cookies
|
||||
from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
|
||||
CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \
|
||||
effective_request_host
|
||||
from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
|
||||
# 2.4 raises SyntaxError due to generator / try/finally use
|
||||
if sys.version_info[:2] > (2,4):
|
||||
try:
|
||||
import sqlite3
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
from _firefox3cookiejar import Firefox3CookieJar
|
||||
from _mozillacookiejar import MozillaCookieJar
|
||||
from _msiecookiejar import MSIECookieJar
|
||||
|
||||
# forms
|
||||
from _form import (
|
||||
AmbiguityError,
|
||||
ControlNotFoundError,
|
||||
FormParser,
|
||||
ItemCountError,
|
||||
ItemNotFoundError,
|
||||
LocateError,
|
||||
Missing,
|
||||
ParseError,
|
||||
ParseFile,
|
||||
ParseFileEx,
|
||||
ParseResponse,
|
||||
ParseResponseEx,
|
||||
ParseString,
|
||||
XHTMLCompatibleFormParser,
|
||||
# deprecated
|
||||
CheckboxControl,
|
||||
Control,
|
||||
FileControl,
|
||||
HTMLForm,
|
||||
HiddenControl,
|
||||
IgnoreControl,
|
||||
ImageControl,
|
||||
IsindexControl,
|
||||
Item,
|
||||
Label,
|
||||
ListControl,
|
||||
PasswordControl,
|
||||
RadioControl,
|
||||
ScalarControl,
|
||||
SelectControl,
|
||||
SubmitButtonControl,
|
||||
SubmitControl,
|
||||
TextControl,
|
||||
TextareaControl,
|
||||
)
|
||||
|
||||
# If you hate the idea of turning bugs into warnings, do:
|
||||
# import mechanize; mechanize.USE_BARE_EXCEPT = False
|
||||
USE_BARE_EXCEPT = True
|
||||
|
||||
logger = logging.getLogger("mechanize")
|
||||
if logger.level is logging.NOTSET:
|
||||
logger.setLevel(logging.CRITICAL)
|
||||
del logger
|
||||
@@ -1,68 +0,0 @@
|
||||
"""HTTP Authentication and Proxy support.
|
||||
|
||||
|
||||
Copyright 2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
from _urllib2_fork import HTTPPasswordMgr
|
||||
|
||||
|
||||
# TODO: stop deriving from HTTPPasswordMgr
|
||||
class HTTPProxyPasswordMgr(HTTPPasswordMgr):
|
||||
# has default realm and host/port
|
||||
def add_password(self, realm, uri, user, passwd):
|
||||
# uri could be a single URI or a sequence
|
||||
if uri is None or isinstance(uri, basestring):
|
||||
uris = [uri]
|
||||
else:
|
||||
uris = uri
|
||||
passwd_by_domain = self.passwd.setdefault(realm, {})
|
||||
for uri in uris:
|
||||
for default_port in True, False:
|
||||
reduced_uri = self.reduce_uri(uri, default_port)
|
||||
passwd_by_domain[reduced_uri] = (user, passwd)
|
||||
|
||||
def find_user_password(self, realm, authuri):
|
||||
attempts = [(realm, authuri), (None, authuri)]
|
||||
# bleh, want default realm to take precedence over default
|
||||
# URI/authority, hence this outer loop
|
||||
for default_uri in False, True:
|
||||
for realm, authuri in attempts:
|
||||
authinfo_by_domain = self.passwd.get(realm, {})
|
||||
for default_port in True, False:
|
||||
reduced_authuri = self.reduce_uri(authuri, default_port)
|
||||
for uri, authinfo in authinfo_by_domain.iteritems():
|
||||
if uri is None and not default_uri:
|
||||
continue
|
||||
if self.is_suburi(uri, reduced_authuri):
|
||||
return authinfo
|
||||
user, password = None, None
|
||||
|
||||
if user is not None:
|
||||
break
|
||||
return user, password
|
||||
|
||||
def reduce_uri(self, uri, default_port=True):
|
||||
if uri is None:
|
||||
return None
|
||||
return HTTPPasswordMgr.reduce_uri(self, uri, default_port)
|
||||
|
||||
def is_suburi(self, base, test):
|
||||
if base is None:
|
||||
# default to the proxy's host/port
|
||||
hostport, path = test
|
||||
base = (hostport, "/")
|
||||
return HTTPPasswordMgr.is_suburi(self, base, test)
|
||||
|
||||
|
||||
class HTTPSClientCertMgr(HTTPPasswordMgr):
|
||||
# implementation inheritance: this is not a proper subclass
|
||||
def add_key_cert(self, uri, key_file, cert_file):
|
||||
self.add_password(None, uri, key_file, cert_file)
|
||||
def find_key_cert(self, authuri):
|
||||
return HTTPPasswordMgr.find_user_password(self, None, authuri)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,28 +0,0 @@
|
||||
import logging
|
||||
|
||||
from _response import response_seek_wrapper
|
||||
from _urllib2_fork import BaseHandler
|
||||
|
||||
|
||||
class HTTPResponseDebugProcessor(BaseHandler):
|
||||
handler_order = 900 # before redirections, after everything else
|
||||
|
||||
def http_response(self, request, response):
|
||||
if not hasattr(response, "seek"):
|
||||
response = response_seek_wrapper(response)
|
||||
info = logging.getLogger("mechanize.http_responses").info
|
||||
try:
|
||||
info(response.read())
|
||||
finally:
|
||||
response.seek(0)
|
||||
info("*****************************************************")
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
class HTTPRedirectDebugProcessor(BaseHandler):
|
||||
def http_request(self, request):
|
||||
if hasattr(request, "redirect_dict"):
|
||||
info = logging.getLogger("mechanize.http_redirects").info
|
||||
info("redirecting to %s", request.get_full_url())
|
||||
return request
|
||||
@@ -1,248 +0,0 @@
|
||||
"""Firefox 3 "cookies.sqlite" cookie persistence.
|
||||
|
||||
Copyright 2008 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from _clientcookie import CookieJar, Cookie, MappingIterator
|
||||
from _util import isstringlike, experimental
|
||||
debug = logging.getLogger("mechanize.cookies").debug
|
||||
|
||||
|
||||
class Firefox3CookieJar(CookieJar):
|
||||
|
||||
"""Firefox 3 cookie jar.
|
||||
|
||||
The cookies are stored in Firefox 3's "cookies.sqlite" format.
|
||||
|
||||
Constructor arguments:
|
||||
|
||||
filename: filename of cookies.sqlite (typically found at the top level
|
||||
of a firefox profile directory)
|
||||
autoconnect: as a convenience, connect to the SQLite cookies database at
|
||||
Firefox3CookieJar construction time (default True)
|
||||
policy: an object satisfying the mechanize.CookiePolicy interface
|
||||
|
||||
Note that this is NOT a FileCookieJar, and there are no .load(),
|
||||
.save() or .restore() methods. The database is in sync with the
|
||||
cookiejar object's state after each public method call.
|
||||
|
||||
Following Firefox's own behaviour, session cookies are never saved to
|
||||
the database.
|
||||
|
||||
The file is created, and an sqlite database written to it, if it does
|
||||
not already exist. The moz_cookies database table is created if it does
|
||||
not already exist.
|
||||
"""
|
||||
|
||||
# XXX
|
||||
# handle DatabaseError exceptions
|
||||
# add a FileCookieJar (explicit .save() / .revert() / .load() methods)
|
||||
|
||||
def __init__(self, filename, autoconnect=True, policy=None):
|
||||
experimental("Firefox3CookieJar is experimental code")
|
||||
CookieJar.__init__(self, policy)
|
||||
if filename is not None and not isstringlike(filename):
|
||||
raise ValueError("filename must be string-like")
|
||||
self.filename = filename
|
||||
self._conn = None
|
||||
if autoconnect:
|
||||
self.connect()
|
||||
|
||||
def connect(self):
|
||||
import sqlite3 # not available in Python 2.4 stdlib
|
||||
self._conn = sqlite3.connect(self.filename)
|
||||
self._conn.isolation_level = "DEFERRED"
|
||||
self._create_table_if_necessary()
|
||||
|
||||
def close(self):
|
||||
self._conn.close()
|
||||
|
||||
def _transaction(self, func):
|
||||
try:
|
||||
cur = self._conn.cursor()
|
||||
try:
|
||||
result = func(cur)
|
||||
finally:
|
||||
cur.close()
|
||||
except:
|
||||
self._conn.rollback()
|
||||
raise
|
||||
else:
|
||||
self._conn.commit()
|
||||
return result
|
||||
|
||||
def _execute(self, query, params=()):
|
||||
return self._transaction(lambda cur: cur.execute(query, params))
|
||||
|
||||
def _query(self, query, params=()):
|
||||
# XXX should we bother with a transaction?
|
||||
cur = self._conn.cursor()
|
||||
try:
|
||||
cur.execute(query, params)
|
||||
return cur.fetchall()
|
||||
finally:
|
||||
cur.close()
|
||||
|
||||
def _create_table_if_necessary(self):
|
||||
self._execute("""\
|
||||
CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT,
|
||||
value TEXT, host TEXT, path TEXT,expiry INTEGER,
|
||||
lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""")
|
||||
|
||||
def _cookie_from_row(self, row):
|
||||
(pk, name, value, domain, path, expires,
|
||||
last_accessed, secure, http_only) = row
|
||||
|
||||
version = 0
|
||||
domain = domain.encode("ascii", "ignore")
|
||||
path = path.encode("ascii", "ignore")
|
||||
name = name.encode("ascii", "ignore")
|
||||
value = value.encode("ascii", "ignore")
|
||||
secure = bool(secure)
|
||||
|
||||
# last_accessed isn't a cookie attribute, so isn't added to rest
|
||||
rest = {}
|
||||
if http_only:
|
||||
rest["HttpOnly"] = None
|
||||
|
||||
if name == "":
|
||||
name = value
|
||||
value = None
|
||||
|
||||
initial_dot = domain.startswith(".")
|
||||
domain_specified = initial_dot
|
||||
|
||||
discard = False
|
||||
if expires == "":
|
||||
expires = None
|
||||
discard = True
|
||||
|
||||
return Cookie(version, name, value,
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
path, False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
rest)
|
||||
|
||||
def clear(self, domain=None, path=None, name=None):
|
||||
CookieJar.clear(self, domain, path, name)
|
||||
where_parts = []
|
||||
sql_params = []
|
||||
if domain is not None:
|
||||
where_parts.append("host = ?")
|
||||
sql_params.append(domain)
|
||||
if path is not None:
|
||||
where_parts.append("path = ?")
|
||||
sql_params.append(path)
|
||||
if name is not None:
|
||||
where_parts.append("name = ?")
|
||||
sql_params.append(name)
|
||||
where = " AND ".join(where_parts)
|
||||
if where:
|
||||
where = " WHERE " + where
|
||||
def clear(cur):
|
||||
cur.execute("DELETE FROM moz_cookies%s" % where,
|
||||
tuple(sql_params))
|
||||
self._transaction(clear)
|
||||
|
||||
def _row_from_cookie(self, cookie, cur):
|
||||
expires = cookie.expires
|
||||
if cookie.discard:
|
||||
expires = ""
|
||||
|
||||
domain = unicode(cookie.domain)
|
||||
path = unicode(cookie.path)
|
||||
name = unicode(cookie.name)
|
||||
value = unicode(cookie.value)
|
||||
secure = bool(int(cookie.secure))
|
||||
|
||||
if value is None:
|
||||
value = name
|
||||
name = ""
|
||||
|
||||
last_accessed = int(time.time())
|
||||
http_only = cookie.has_nonstandard_attr("HttpOnly")
|
||||
|
||||
query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""")
|
||||
pk = query.fetchone()[0]
|
||||
if pk is None:
|
||||
pk = 1
|
||||
|
||||
return (pk, name, value, domain, path, expires,
|
||||
last_accessed, secure, http_only)
|
||||
|
||||
def set_cookie(self, cookie):
|
||||
if cookie.discard:
|
||||
CookieJar.set_cookie(self, cookie)
|
||||
return
|
||||
|
||||
def set_cookie(cur):
|
||||
# XXX
|
||||
# is this RFC 2965-correct?
|
||||
# could this do an UPDATE instead?
|
||||
row = self._row_from_cookie(cookie, cur)
|
||||
name, unused, domain, path = row[1:5]
|
||||
cur.execute("""\
|
||||
DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""",
|
||||
(domain, path, name))
|
||||
cur.execute("""\
|
||||
INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", row)
|
||||
self._transaction(set_cookie)
|
||||
|
||||
def __iter__(self):
|
||||
# session (non-persistent) cookies
|
||||
for cookie in MappingIterator(self._cookies):
|
||||
yield cookie
|
||||
# persistent cookies
|
||||
for row in self._query("""\
|
||||
SELECT * FROM moz_cookies ORDER BY name, path, host"""):
|
||||
yield self._cookie_from_row(row)
|
||||
|
||||
def _cookies_for_request(self, request):
|
||||
session_cookies = CookieJar._cookies_for_request(self, request)
|
||||
def get_cookies(cur):
|
||||
query = cur.execute("SELECT host from moz_cookies")
|
||||
domains = [row[0] for row in query.fetchall()]
|
||||
cookies = []
|
||||
for domain in domains:
|
||||
cookies += self._persistent_cookies_for_domain(domain,
|
||||
request, cur)
|
||||
return cookies
|
||||
persistent_coookies = self._transaction(get_cookies)
|
||||
return session_cookies + persistent_coookies
|
||||
|
||||
def _persistent_cookies_for_domain(self, domain, request, cur):
|
||||
cookies = []
|
||||
if not self._policy.domain_return_ok(domain, request):
|
||||
return []
|
||||
debug("Checking %s for cookies to return", domain)
|
||||
query = cur.execute("""\
|
||||
SELECT * from moz_cookies WHERE host = ? ORDER BY path""",
|
||||
(domain,))
|
||||
cookies = [self._cookie_from_row(row) for row in query.fetchall()]
|
||||
last_path = None
|
||||
r = []
|
||||
for cookie in cookies:
|
||||
if (cookie.path != last_path and
|
||||
not self._policy.path_return_ok(cookie.path, request)):
|
||||
last_path = cookie.path
|
||||
continue
|
||||
if not self._policy.return_ok(cookie, request):
|
||||
debug(" not returning cookie")
|
||||
continue
|
||||
debug(" it's a match")
|
||||
r.append(cookie)
|
||||
return r
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,105 +0,0 @@
|
||||
from cStringIO import StringIO
|
||||
|
||||
import _response
|
||||
import _urllib2_fork
|
||||
|
||||
|
||||
# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
|
||||
class GzipConsumer:
|
||||
|
||||
def __init__(self, consumer):
|
||||
self.__consumer = consumer
|
||||
self.__decoder = None
|
||||
self.__data = ""
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self.__consumer, key)
|
||||
|
||||
def feed(self, data):
|
||||
if self.__decoder is None:
|
||||
# check if we have a full gzip header
|
||||
data = self.__data + data
|
||||
try:
|
||||
i = 10
|
||||
flag = ord(data[3])
|
||||
if flag & 4: # extra
|
||||
x = ord(data[i]) + 256*ord(data[i+1])
|
||||
i = i + 2 + x
|
||||
if flag & 8: # filename
|
||||
while ord(data[i]):
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
if flag & 16: # comment
|
||||
while ord(data[i]):
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
if flag & 2: # crc
|
||||
i = i + 2
|
||||
if len(data) < i:
|
||||
raise IndexError("not enough data")
|
||||
if data[:3] != "\x1f\x8b\x08":
|
||||
raise IOError("invalid gzip data")
|
||||
data = data[i:]
|
||||
except IndexError:
|
||||
self.__data = data
|
||||
return # need more data
|
||||
import zlib
|
||||
self.__data = ""
|
||||
self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
|
||||
data = self.__decoder.decompress(data)
|
||||
if data:
|
||||
self.__consumer.feed(data)
|
||||
|
||||
def close(self):
|
||||
if self.__decoder:
|
||||
data = self.__decoder.flush()
|
||||
if data:
|
||||
self.__consumer.feed(data)
|
||||
self.__consumer.close()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
# the rest of this module is John Lee's stupid code, not
|
||||
# Fredrik's nice code :-)
|
||||
|
||||
class stupid_gzip_consumer:
|
||||
def __init__(self): self.data = []
|
||||
def feed(self, data): self.data.append(data)
|
||||
|
||||
class stupid_gzip_wrapper(_response.closeable_response):
|
||||
def __init__(self, response):
|
||||
self._response = response
|
||||
|
||||
c = stupid_gzip_consumer()
|
||||
gzc = GzipConsumer(c)
|
||||
gzc.feed(response.read())
|
||||
self.__data = StringIO("".join(c.data))
|
||||
|
||||
def read(self, size=-1):
|
||||
return self.__data.read(size)
|
||||
def readline(self, size=-1):
|
||||
return self.__data.readline(size)
|
||||
def readlines(self, sizehint=-1):
|
||||
return self.__data.readlines(sizehint)
|
||||
|
||||
def __getattr__(self, name):
|
||||
# delegate unknown methods/attributes
|
||||
return getattr(self._response, name)
|
||||
|
||||
class HTTPGzipProcessor(_urllib2_fork.BaseHandler):
|
||||
handler_order = 200 # response processing before HTTPEquivProcessor
|
||||
|
||||
def http_request(self, request):
|
||||
request.add_header("Accept-Encoding", "gzip")
|
||||
return request
|
||||
|
||||
def http_response(self, request, response):
|
||||
# post-process response
|
||||
enc_hdrs = response.info().getheaders("Content-encoding")
|
||||
for enc_hdr in enc_hdrs:
|
||||
if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
|
||||
return stupid_gzip_wrapper(response)
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
@@ -1,241 +0,0 @@
|
||||
"""Utility functions for HTTP header value parsing and construction.
|
||||
|
||||
Copyright 1997-1998, Gisle Aas
|
||||
Copyright 2002-2006, John J. Lee
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import os, re
|
||||
from types import StringType
|
||||
from types import UnicodeType
|
||||
STRING_TYPES = StringType, UnicodeType
|
||||
|
||||
from _util import http2time
|
||||
import _rfc3986
|
||||
|
||||
|
||||
def is_html_file_extension(url, allow_xhtml):
|
||||
ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
|
||||
html_exts = [".htm", ".html"]
|
||||
if allow_xhtml:
|
||||
html_exts += [".xhtml"]
|
||||
return ext in html_exts
|
||||
|
||||
|
||||
def is_html(ct_headers, url, allow_xhtml=False):
|
||||
"""
|
||||
ct_headers: Sequence of Content-Type headers
|
||||
url: Response URL
|
||||
|
||||
"""
|
||||
if not ct_headers:
|
||||
return is_html_file_extension(url, allow_xhtml)
|
||||
headers = split_header_words(ct_headers)
|
||||
if len(headers) < 1:
|
||||
return is_html_file_extension(url, allow_xhtml)
|
||||
first_header = headers[0]
|
||||
first_parameter = first_header[0]
|
||||
ct = first_parameter[0]
|
||||
html_types = ["text/html"]
|
||||
if allow_xhtml:
|
||||
html_types += [
|
||||
"text/xhtml", "text/xml",
|
||||
"application/xml", "application/xhtml+xml",
|
||||
]
|
||||
return ct in html_types
|
||||
|
||||
|
||||
def unmatched(match):
|
||||
"""Return unmatched part of re.Match object."""
|
||||
start, end = match.span(0)
|
||||
return match.string[:start]+match.string[end:]
|
||||
|
||||
token_re = re.compile(r"^\s*([^=\s;,]+)")
|
||||
quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
|
||||
value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
|
||||
escape_re = re.compile(r"\\(.)")
|
||||
def split_header_words(header_values):
|
||||
r"""Parse header values into a list of lists containing key,value pairs.
|
||||
|
||||
The function knows how to deal with ",", ";" and "=" as well as quoted
|
||||
values after "=". A list of space separated tokens are parsed as if they
|
||||
were separated by ";".
|
||||
|
||||
If the header_values passed as argument contains multiple values, then they
|
||||
are treated as if they were a single value separated by comma ",".
|
||||
|
||||
This means that this function is useful for parsing header fields that
|
||||
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
|
||||
the requirement for tokens).
|
||||
|
||||
headers = #header
|
||||
header = (token | parameter) *( [";"] (token | parameter))
|
||||
|
||||
token = 1*<any CHAR except CTLs or separators>
|
||||
separators = "(" | ")" | "<" | ">" | "@"
|
||||
| "," | ";" | ":" | "\" | <">
|
||||
| "/" | "[" | "]" | "?" | "="
|
||||
| "{" | "}" | SP | HT
|
||||
|
||||
quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
|
||||
qdtext = <any TEXT except <">>
|
||||
quoted-pair = "\" CHAR
|
||||
|
||||
parameter = attribute "=" value
|
||||
attribute = token
|
||||
value = token | quoted-string
|
||||
|
||||
Each header is represented by a list of key/value pairs. The value for a
|
||||
simple token (not part of a parameter) is None. Syntactically incorrect
|
||||
headers will not necessarily be parsed as you would want.
|
||||
|
||||
This is easier to describe with some examples:
|
||||
|
||||
>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
|
||||
[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
|
||||
>>> split_header_words(['text/html; charset="iso-8859-1"'])
|
||||
[[('text/html', None), ('charset', 'iso-8859-1')]]
|
||||
>>> split_header_words([r'Basic realm="\"foo\bar\""'])
|
||||
[[('Basic', None), ('realm', '"foobar"')]]
|
||||
|
||||
"""
|
||||
assert type(header_values) not in STRING_TYPES
|
||||
result = []
|
||||
for text in header_values:
|
||||
orig_text = text
|
||||
pairs = []
|
||||
while text:
|
||||
m = token_re.search(text)
|
||||
if m:
|
||||
text = unmatched(m)
|
||||
name = m.group(1)
|
||||
m = quoted_value_re.search(text)
|
||||
if m: # quoted value
|
||||
text = unmatched(m)
|
||||
value = m.group(1)
|
||||
value = escape_re.sub(r"\1", value)
|
||||
else:
|
||||
m = value_re.search(text)
|
||||
if m: # unquoted value
|
||||
text = unmatched(m)
|
||||
value = m.group(1)
|
||||
value = value.rstrip()
|
||||
else:
|
||||
# no value, a lone token
|
||||
value = None
|
||||
pairs.append((name, value))
|
||||
elif text.lstrip().startswith(","):
|
||||
# concatenated headers, as per RFC 2616 section 4.2
|
||||
text = text.lstrip()[1:]
|
||||
if pairs: result.append(pairs)
|
||||
pairs = []
|
||||
else:
|
||||
# skip junk
|
||||
non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
|
||||
assert nr_junk_chars > 0, (
|
||||
"split_header_words bug: '%s', '%s', %s" %
|
||||
(orig_text, text, pairs))
|
||||
text = non_junk
|
||||
if pairs: result.append(pairs)
|
||||
return result
|
||||
|
||||
join_escape_re = re.compile(r"([\"\\])")
|
||||
def join_header_words(lists):
|
||||
"""Do the inverse of the conversion done by split_header_words.
|
||||
|
||||
Takes a list of lists of (key, value) pairs and produces a single header
|
||||
value. Attribute values are quoted if needed.
|
||||
|
||||
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
|
||||
'text/plain; charset="iso-8859/1"'
|
||||
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
|
||||
'text/plain, charset="iso-8859/1"'
|
||||
|
||||
"""
|
||||
headers = []
|
||||
for pairs in lists:
|
||||
attr = []
|
||||
for k, v in pairs:
|
||||
if v is not None:
|
||||
if not re.search(r"^\w+$", v):
|
||||
v = join_escape_re.sub(r"\\\1", v) # escape " and \
|
||||
v = '"%s"' % v
|
||||
if k is None: # Netscape cookies may have no name
|
||||
k = v
|
||||
else:
|
||||
k = "%s=%s" % (k, v)
|
||||
attr.append(k)
|
||||
if attr: headers.append("; ".join(attr))
|
||||
return ", ".join(headers)
|
||||
|
||||
def strip_quotes(text):
|
||||
if text.startswith('"'):
|
||||
text = text[1:]
|
||||
if text.endswith('"'):
|
||||
text = text[:-1]
|
||||
return text
|
||||
|
||||
def parse_ns_headers(ns_headers):
|
||||
"""Ad-hoc parser for Netscape protocol cookie-attributes.
|
||||
|
||||
The old Netscape cookie format for Set-Cookie can for instance contain
|
||||
an unquoted "," in the expires field, so we have to use this ad-hoc
|
||||
parser instead of split_header_words.
|
||||
|
||||
XXX This may not make the best possible effort to parse all the crap
|
||||
that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
|
||||
parser is probably better, so could do worse than following that if
|
||||
this ever gives any trouble.
|
||||
|
||||
Currently, this is also used for parsing RFC 2109 cookies.
|
||||
|
||||
"""
|
||||
known_attrs = ("expires", "domain", "path", "secure",
|
||||
# RFC 2109 attrs (may turn up in Netscape cookies, too)
|
||||
"version", "port", "max-age")
|
||||
|
||||
result = []
|
||||
for ns_header in ns_headers:
|
||||
pairs = []
|
||||
version_set = False
|
||||
params = re.split(r";\s*", ns_header)
|
||||
for ii in range(len(params)):
|
||||
param = params[ii]
|
||||
param = param.rstrip()
|
||||
if param == "": continue
|
||||
if "=" not in param:
|
||||
k, v = param, None
|
||||
else:
|
||||
k, v = re.split(r"\s*=\s*", param, 1)
|
||||
k = k.lstrip()
|
||||
if ii != 0:
|
||||
lc = k.lower()
|
||||
if lc in known_attrs:
|
||||
k = lc
|
||||
if k == "version":
|
||||
# This is an RFC 2109 cookie.
|
||||
v = strip_quotes(v)
|
||||
version_set = True
|
||||
if k == "expires":
|
||||
# convert expires date to seconds since epoch
|
||||
v = http2time(strip_quotes(v)) # None if invalid
|
||||
pairs.append((k, v))
|
||||
|
||||
if pairs:
|
||||
if not version_set:
|
||||
pairs.append(("version", "0"))
|
||||
result.append(pairs)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _test():
|
||||
import doctest, _headersutil
|
||||
return doctest.testmod(_headersutil)
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
||||
@@ -1,629 +0,0 @@
|
||||
"""HTML handling.
|
||||
|
||||
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import codecs
|
||||
import copy
|
||||
import htmlentitydefs
|
||||
import re
|
||||
|
||||
import _sgmllib_copy as sgmllib
|
||||
|
||||
import _beautifulsoup
|
||||
import _form
|
||||
from _headersutil import split_header_words, is_html as _is_html
|
||||
import _request
|
||||
import _rfc3986
|
||||
|
||||
DEFAULT_ENCODING = "latin-1"
|
||||
|
||||
COMPRESS_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
class CachingGeneratorFunction(object):
|
||||
"""Caching wrapper around a no-arguments iterable."""
|
||||
|
||||
def __init__(self, iterable):
|
||||
self._cache = []
|
||||
# wrap iterable to make it non-restartable (otherwise, repeated
|
||||
# __call__ would give incorrect results)
|
||||
self._iterator = iter(iterable)
|
||||
|
||||
def __call__(self):
|
||||
cache = self._cache
|
||||
for item in cache:
|
||||
yield item
|
||||
for item in self._iterator:
|
||||
cache.append(item)
|
||||
yield item
|
||||
|
||||
|
||||
class EncodingFinder:
|
||||
def __init__(self, default_encoding):
|
||||
self._default_encoding = default_encoding
|
||||
def encoding(self, response):
|
||||
# HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
|
||||
# headers may be in the response. HTTP-EQUIV headers come last,
|
||||
# so try in order from first to last.
|
||||
for ct in response.info().getheaders("content-type"):
|
||||
for k, v in split_header_words([ct])[0]:
|
||||
if k == "charset":
|
||||
encoding = v
|
||||
try:
|
||||
codecs.lookup(v)
|
||||
except LookupError:
|
||||
continue
|
||||
else:
|
||||
return encoding
|
||||
return self._default_encoding
|
||||
|
||||
|
||||
class ResponseTypeFinder:
|
||||
def __init__(self, allow_xhtml):
|
||||
self._allow_xhtml = allow_xhtml
|
||||
def is_html(self, response, encoding):
|
||||
ct_hdrs = response.info().getheaders("content-type")
|
||||
url = response.geturl()
|
||||
# XXX encoding
|
||||
return _is_html(ct_hdrs, url, self._allow_xhtml)
|
||||
|
||||
|
||||
class Args(object):
|
||||
|
||||
# idea for this argument-processing trick is from Peter Otten
|
||||
|
||||
def __init__(self, args_map):
|
||||
self.__dict__["dictionary"] = dict(args_map)
|
||||
|
||||
def __getattr__(self, key):
|
||||
try:
|
||||
return self.dictionary[key]
|
||||
except KeyError:
|
||||
return getattr(self.__class__, key)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
if key == "dictionary":
|
||||
raise AttributeError()
|
||||
self.dictionary[key] = value
|
||||
|
||||
|
||||
def form_parser_args(
|
||||
select_default=False,
|
||||
form_parser_class=None,
|
||||
request_class=None,
|
||||
backwards_compat=False,
|
||||
):
|
||||
return Args(locals())
|
||||
|
||||
|
||||
class Link:
|
||||
def __init__(self, base_url, url, text, tag, attrs):
|
||||
assert None not in [url, tag, attrs]
|
||||
self.base_url = base_url
|
||||
self.absolute_url = _rfc3986.urljoin(base_url, url)
|
||||
self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
|
||||
def __cmp__(self, other):
|
||||
try:
|
||||
for name in "url", "text", "tag", "attrs":
|
||||
if getattr(self, name) != getattr(other, name):
|
||||
return -1
|
||||
except AttributeError:
|
||||
return -1
|
||||
return 0
|
||||
def __repr__(self):
|
||||
return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
|
||||
self.base_url, self.url, self.text, self.tag, self.attrs)
|
||||
|
||||
|
||||
class LinksFactory:
|
||||
|
||||
def __init__(self,
|
||||
link_parser_class=None,
|
||||
link_class=Link,
|
||||
urltags=None,
|
||||
):
|
||||
import _pullparser
|
||||
if link_parser_class is None:
|
||||
link_parser_class = _pullparser.TolerantPullParser
|
||||
self.link_parser_class = link_parser_class
|
||||
self.link_class = link_class
|
||||
if urltags is None:
|
||||
urltags = {
|
||||
"a": "href",
|
||||
"area": "href",
|
||||
"frame": "src",
|
||||
"iframe": "src",
|
||||
}
|
||||
self.urltags = urltags
|
||||
self._response = None
|
||||
self._encoding = None
|
||||
|
||||
def set_response(self, response, base_url, encoding):
|
||||
self._response = response
|
||||
self._encoding = encoding
|
||||
self._base_url = base_url
|
||||
|
||||
def links(self):
|
||||
"""Return an iterator that provides links of the document."""
|
||||
response = self._response
|
||||
encoding = self._encoding
|
||||
base_url = self._base_url
|
||||
p = self.link_parser_class(response, encoding=encoding)
|
||||
|
||||
try:
|
||||
for token in p.tags(*(self.urltags.keys()+["base"])):
|
||||
if token.type == "endtag":
|
||||
continue
|
||||
if token.data == "base":
|
||||
base_href = dict(token.attrs).get("href")
|
||||
if base_href is not None:
|
||||
base_url = base_href
|
||||
continue
|
||||
attrs = dict(token.attrs)
|
||||
tag = token.data
|
||||
text = None
|
||||
# XXX use attr_encoding for ref'd doc if that doc does not
|
||||
# provide one by other means
|
||||
#attr_encoding = attrs.get("charset")
|
||||
url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL?
|
||||
if not url:
|
||||
# Probably an <A NAME="blah"> link or <AREA NOHREF...>.
|
||||
# For our purposes a link is something with a URL, so
|
||||
# ignore this.
|
||||
continue
|
||||
|
||||
url = _rfc3986.clean_url(url, encoding)
|
||||
if tag == "a":
|
||||
if token.type != "startendtag":
|
||||
# hmm, this'd break if end tag is missing
|
||||
text = p.get_compressed_text(("endtag", tag))
|
||||
# but this doesn't work for e.g.
|
||||
# <a href="blah"><b>Andy</b></a>
|
||||
#text = p.get_compressed_text()
|
||||
|
||||
yield Link(base_url, url, text, tag, token.attrs)
|
||||
except sgmllib.SGMLParseError, exc:
|
||||
raise _form.ParseError(exc)
|
||||
|
||||
class FormsFactory:
|
||||
|
||||
"""Makes a sequence of objects satisfying HTMLForm interface.
|
||||
|
||||
After calling .forms(), the .global_form attribute is a form object
|
||||
containing all controls not a descendant of any FORM element.
|
||||
|
||||
For constructor argument docs, see ParseResponse argument docs.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
select_default=False,
|
||||
form_parser_class=None,
|
||||
request_class=None,
|
||||
backwards_compat=False,
|
||||
):
|
||||
self.select_default = select_default
|
||||
if form_parser_class is None:
|
||||
form_parser_class = _form.FormParser
|
||||
self.form_parser_class = form_parser_class
|
||||
if request_class is None:
|
||||
request_class = _request.Request
|
||||
self.request_class = request_class
|
||||
self.backwards_compat = backwards_compat
|
||||
self._response = None
|
||||
self.encoding = None
|
||||
self.global_form = None
|
||||
|
||||
def set_response(self, response, encoding):
|
||||
self._response = response
|
||||
self.encoding = encoding
|
||||
self.global_form = None
|
||||
|
||||
def forms(self):
|
||||
encoding = self.encoding
|
||||
forms = _form.ParseResponseEx(
|
||||
self._response,
|
||||
select_default=self.select_default,
|
||||
form_parser_class=self.form_parser_class,
|
||||
request_class=self.request_class,
|
||||
encoding=encoding,
|
||||
_urljoin=_rfc3986.urljoin,
|
||||
_urlparse=_rfc3986.urlsplit,
|
||||
_urlunparse=_rfc3986.urlunsplit,
|
||||
)
|
||||
self.global_form = forms[0]
|
||||
return forms[1:]
|
||||
|
||||
class TitleFactory:
|
||||
def __init__(self):
|
||||
self._response = self._encoding = None
|
||||
|
||||
def set_response(self, response, encoding):
|
||||
self._response = response
|
||||
self._encoding = encoding
|
||||
|
||||
def _get_title_text(self, parser):
|
||||
import _pullparser
|
||||
text = []
|
||||
tok = None
|
||||
while 1:
|
||||
try:
|
||||
tok = parser.get_token()
|
||||
except _pullparser.NoMoreTokensError:
|
||||
break
|
||||
if tok.type == "data":
|
||||
text.append(str(tok))
|
||||
elif tok.type == "entityref":
|
||||
t = unescape("&%s;" % tok.data,
|
||||
parser._entitydefs, parser.encoding)
|
||||
text.append(t)
|
||||
elif tok.type == "charref":
|
||||
t = unescape_charref(tok.data, parser.encoding)
|
||||
text.append(t)
|
||||
elif tok.type in ["starttag", "endtag", "startendtag"]:
|
||||
tag_name = tok.data
|
||||
if tok.type == "endtag" and tag_name == "title":
|
||||
break
|
||||
text.append(str(tok))
|
||||
return COMPRESS_RE.sub(" ", "".join(text).strip())
|
||||
|
||||
def title(self):
|
||||
import _pullparser
|
||||
p = _pullparser.TolerantPullParser(
|
||||
self._response, encoding=self._encoding)
|
||||
try:
|
||||
try:
|
||||
p.get_tag("title")
|
||||
except _pullparser.NoMoreTokensError:
|
||||
return None
|
||||
else:
|
||||
return self._get_title_text(p)
|
||||
except sgmllib.SGMLParseError, exc:
|
||||
raise _form.ParseError(exc)
|
||||
|
||||
|
||||
def unescape(data, entities, encoding):
|
||||
if data is None or "&" not in data:
|
||||
return data
|
||||
|
||||
def replace_entities(match):
|
||||
ent = match.group()
|
||||
if ent[1] == "#":
|
||||
return unescape_charref(ent[2:-1], encoding)
|
||||
|
||||
repl = entities.get(ent[1:-1])
|
||||
if repl is not None:
|
||||
repl = unichr(repl)
|
||||
if type(repl) != type(""):
|
||||
try:
|
||||
repl = repl.encode(encoding)
|
||||
except UnicodeError:
|
||||
repl = ent
|
||||
else:
|
||||
repl = ent
|
||||
return repl
|
||||
|
||||
return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
|
||||
|
||||
def unescape_charref(data, encoding):
|
||||
name, base = data, 10
|
||||
if name.startswith("x"):
|
||||
name, base= name[1:], 16
|
||||
uc = unichr(int(name, base))
|
||||
if encoding is None:
|
||||
return uc
|
||||
else:
|
||||
try:
|
||||
repl = uc.encode(encoding)
|
||||
except UnicodeError:
|
||||
repl = "&#%s;" % data
|
||||
return repl
|
||||
|
||||
|
||||
class MechanizeBs(_beautifulsoup.BeautifulSoup):
|
||||
_entitydefs = htmlentitydefs.name2codepoint
|
||||
# don't want the magic Microsoft-char workaround
|
||||
PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
|
||||
lambda(x):x.group(1) + ' />'),
|
||||
(re.compile('<!\s+([^<>]*)>'),
|
||||
lambda(x):'<!' + x.group(1) + '>')
|
||||
]
|
||||
|
||||
def __init__(self, encoding, text=None, avoidParserProblems=True,
|
||||
initialTextIsEverything=True):
|
||||
self._encoding = encoding
|
||||
_beautifulsoup.BeautifulSoup.__init__(
|
||||
self, text, avoidParserProblems, initialTextIsEverything)
|
||||
|
||||
def handle_charref(self, ref):
|
||||
t = unescape("&#%s;"%ref, self._entitydefs, self._encoding)
|
||||
self.handle_data(t)
|
||||
def handle_entityref(self, ref):
|
||||
t = unescape("&%s;"%ref, self._entitydefs, self._encoding)
|
||||
self.handle_data(t)
|
||||
def unescape_attrs(self, attrs):
|
||||
escaped_attrs = []
|
||||
for key, val in attrs:
|
||||
val = unescape(val, self._entitydefs, self._encoding)
|
||||
escaped_attrs.append((key, val))
|
||||
return escaped_attrs
|
||||
|
||||
class RobustLinksFactory:
|
||||
|
||||
compress_re = COMPRESS_RE
|
||||
|
||||
def __init__(self,
|
||||
link_parser_class=None,
|
||||
link_class=Link,
|
||||
urltags=None,
|
||||
):
|
||||
if link_parser_class is None:
|
||||
link_parser_class = MechanizeBs
|
||||
self.link_parser_class = link_parser_class
|
||||
self.link_class = link_class
|
||||
if urltags is None:
|
||||
urltags = {
|
||||
"a": "href",
|
||||
"area": "href",
|
||||
"frame": "src",
|
||||
"iframe": "src",
|
||||
}
|
||||
self.urltags = urltags
|
||||
self._bs = None
|
||||
self._encoding = None
|
||||
self._base_url = None
|
||||
|
||||
def set_soup(self, soup, base_url, encoding):
|
||||
self._bs = soup
|
||||
self._base_url = base_url
|
||||
self._encoding = encoding
|
||||
|
||||
def links(self):
|
||||
bs = self._bs
|
||||
base_url = self._base_url
|
||||
encoding = self._encoding
|
||||
for ch in bs.recursiveChildGenerator():
|
||||
if (isinstance(ch, _beautifulsoup.Tag) and
|
||||
ch.name in self.urltags.keys()+["base"]):
|
||||
link = ch
|
||||
attrs = bs.unescape_attrs(link.attrs)
|
||||
attrs_dict = dict(attrs)
|
||||
if link.name == "base":
|
||||
base_href = attrs_dict.get("href")
|
||||
if base_href is not None:
|
||||
base_url = base_href
|
||||
continue
|
||||
url_attr = self.urltags[link.name]
|
||||
url = attrs_dict.get(url_attr)
|
||||
if not url:
|
||||
continue
|
||||
url = _rfc3986.clean_url(url, encoding)
|
||||
text = link.fetchText(lambda t: True)
|
||||
if not text:
|
||||
# follow _pullparser's weird behaviour rigidly
|
||||
if link.name == "a":
|
||||
text = ""
|
||||
else:
|
||||
text = None
|
||||
else:
|
||||
text = self.compress_re.sub(" ", " ".join(text).strip())
|
||||
yield Link(base_url, url, text, link.name, attrs)
|
||||
|
||||
|
||||
class RobustFormsFactory(FormsFactory):
|
||||
def __init__(self, *args, **kwds):
|
||||
args = form_parser_args(*args, **kwds)
|
||||
if args.form_parser_class is None:
|
||||
args.form_parser_class = _form.RobustFormParser
|
||||
FormsFactory.__init__(self, **args.dictionary)
|
||||
|
||||
def set_response(self, response, encoding):
|
||||
self._response = response
|
||||
self.encoding = encoding
|
||||
|
||||
|
||||
class RobustTitleFactory:
|
||||
def __init__(self):
|
||||
self._bs = self._encoding = None
|
||||
|
||||
def set_soup(self, soup, encoding):
|
||||
self._bs = soup
|
||||
self._encoding = encoding
|
||||
|
||||
def title(self):
|
||||
title = self._bs.first("title")
|
||||
if title == _beautifulsoup.Null:
|
||||
return None
|
||||
else:
|
||||
inner_html = "".join([str(node) for node in title.contents])
|
||||
return COMPRESS_RE.sub(" ", inner_html.strip())
|
||||
|
||||
|
||||
class Factory:
|
||||
"""Factory for forms, links, etc.
|
||||
|
||||
This interface may expand in future.
|
||||
|
||||
Public methods:
|
||||
|
||||
set_request_class(request_class)
|
||||
set_response(response)
|
||||
forms()
|
||||
links()
|
||||
|
||||
Public attributes:
|
||||
|
||||
Note that accessing these attributes may raise ParseError.
|
||||
|
||||
encoding: string specifying the encoding of response if it contains a text
|
||||
document (this value is left unspecified for documents that do not have
|
||||
an encoding, e.g. an image file)
|
||||
is_html: true if response contains an HTML document (XHTML may be
|
||||
regarded as HTML too)
|
||||
title: page title, or None if no title or not HTML
|
||||
global_form: form object containing all controls that are not descendants
|
||||
of any FORM element, or None if the forms_factory does not support
|
||||
supplying a global form
|
||||
|
||||
"""
|
||||
|
||||
LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"]
|
||||
|
||||
def __init__(self, forms_factory, links_factory, title_factory,
|
||||
encoding_finder=EncodingFinder(DEFAULT_ENCODING),
|
||||
response_type_finder=ResponseTypeFinder(allow_xhtml=False),
|
||||
):
|
||||
"""
|
||||
|
||||
Pass keyword arguments only.
|
||||
|
||||
default_encoding: character encoding to use if encoding cannot be
|
||||
determined (or guessed) from the response. You should turn on
|
||||
HTTP-EQUIV handling if you want the best chance of getting this right
|
||||
without resorting to this default. The default value of this
|
||||
parameter (currently latin-1) may change in future.
|
||||
|
||||
"""
|
||||
self._forms_factory = forms_factory
|
||||
self._links_factory = links_factory
|
||||
self._title_factory = title_factory
|
||||
self._encoding_finder = encoding_finder
|
||||
self._response_type_finder = response_type_finder
|
||||
|
||||
self.set_response(None)
|
||||
|
||||
def set_request_class(self, request_class):
|
||||
"""Set request class (mechanize.Request by default).
|
||||
|
||||
HTMLForm instances returned by .forms() will return instances of this
|
||||
class when .click()ed.
|
||||
|
||||
"""
|
||||
self._forms_factory.request_class = request_class
|
||||
|
||||
def set_response(self, response):
|
||||
"""Set response.
|
||||
|
||||
The response must either be None or implement the same interface as
|
||||
objects returned by mechanize.urlopen().
|
||||
|
||||
"""
|
||||
self._response = response
|
||||
self._forms_genf = self._links_genf = None
|
||||
self._get_title = None
|
||||
for name in self.LAZY_ATTRS:
|
||||
try:
|
||||
delattr(self, name)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name not in self.LAZY_ATTRS:
|
||||
return getattr(self.__class__, name)
|
||||
|
||||
if name == "encoding":
|
||||
self.encoding = self._encoding_finder.encoding(
|
||||
copy.copy(self._response))
|
||||
return self.encoding
|
||||
elif name == "is_html":
|
||||
self.is_html = self._response_type_finder.is_html(
|
||||
copy.copy(self._response), self.encoding)
|
||||
return self.is_html
|
||||
elif name == "title":
|
||||
if self.is_html:
|
||||
self.title = self._title_factory.title()
|
||||
else:
|
||||
self.title = None
|
||||
return self.title
|
||||
elif name == "global_form":
|
||||
self.forms()
|
||||
return self.global_form
|
||||
|
||||
def forms(self):
|
||||
"""Return iterable over HTMLForm-like objects.
|
||||
|
||||
Raises mechanize.ParseError on failure.
|
||||
"""
|
||||
# this implementation sets .global_form as a side-effect, for benefit
|
||||
# of __getattr__ impl
|
||||
if self._forms_genf is None:
|
||||
try:
|
||||
self._forms_genf = CachingGeneratorFunction(
|
||||
self._forms_factory.forms())
|
||||
except: # XXXX define exception!
|
||||
self.set_response(self._response)
|
||||
raise
|
||||
self.global_form = getattr(
|
||||
self._forms_factory, "global_form", None)
|
||||
return self._forms_genf()
|
||||
|
||||
def links(self):
|
||||
"""Return iterable over mechanize.Link-like objects.
|
||||
|
||||
Raises mechanize.ParseError on failure.
|
||||
"""
|
||||
if self._links_genf is None:
|
||||
try:
|
||||
self._links_genf = CachingGeneratorFunction(
|
||||
self._links_factory.links())
|
||||
except: # XXXX define exception!
|
||||
self.set_response(self._response)
|
||||
raise
|
||||
return self._links_genf()
|
||||
|
||||
class DefaultFactory(Factory):
|
||||
"""Based on sgmllib."""
|
||||
def __init__(self, i_want_broken_xhtml_support=False):
|
||||
Factory.__init__(
|
||||
self,
|
||||
forms_factory=FormsFactory(),
|
||||
links_factory=LinksFactory(),
|
||||
title_factory=TitleFactory(),
|
||||
response_type_finder=ResponseTypeFinder(
|
||||
allow_xhtml=i_want_broken_xhtml_support),
|
||||
)
|
||||
|
||||
def set_response(self, response):
|
||||
Factory.set_response(self, response)
|
||||
if response is not None:
|
||||
self._forms_factory.set_response(
|
||||
copy.copy(response), self.encoding)
|
||||
self._links_factory.set_response(
|
||||
copy.copy(response), response.geturl(), self.encoding)
|
||||
self._title_factory.set_response(
|
||||
copy.copy(response), self.encoding)
|
||||
|
||||
class RobustFactory(Factory):
|
||||
"""Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is
|
||||
DefaultFactory.
|
||||
|
||||
"""
|
||||
def __init__(self, i_want_broken_xhtml_support=False,
|
||||
soup_class=None):
|
||||
Factory.__init__(
|
||||
self,
|
||||
forms_factory=RobustFormsFactory(),
|
||||
links_factory=RobustLinksFactory(),
|
||||
title_factory=RobustTitleFactory(),
|
||||
response_type_finder=ResponseTypeFinder(
|
||||
allow_xhtml=i_want_broken_xhtml_support),
|
||||
)
|
||||
if soup_class is None:
|
||||
soup_class = MechanizeBs
|
||||
self._soup_class = soup_class
|
||||
|
||||
def set_response(self, response):
|
||||
Factory.set_response(self, response)
|
||||
if response is not None:
|
||||
data = response.read()
|
||||
soup = self._soup_class(self.encoding, data)
|
||||
self._forms_factory.set_response(
|
||||
copy.copy(response), self.encoding)
|
||||
self._links_factory.set_soup(
|
||||
soup, response.geturl(), self.encoding)
|
||||
self._title_factory.set_soup(soup, self.encoding)
|
||||
@@ -1,447 +0,0 @@
|
||||
"""HTTP related handlers.
|
||||
|
||||
Note that some other HTTP handlers live in more specific modules: _auth.py,
|
||||
_gzip.py, etc.
|
||||
|
||||
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import HTMLParser
|
||||
from cStringIO import StringIO
|
||||
import htmlentitydefs
|
||||
import logging
|
||||
import robotparser
|
||||
import socket
|
||||
import time
|
||||
|
||||
import _sgmllib_copy as sgmllib
|
||||
from _urllib2_fork import HTTPError, BaseHandler
|
||||
|
||||
from _headersutil import is_html
|
||||
from _html import unescape, unescape_charref
|
||||
from _request import Request
|
||||
from _response import response_seek_wrapper
|
||||
import _rfc3986
|
||||
import _sockettimeout
|
||||
|
||||
debug = logging.getLogger("mechanize").debug
|
||||
debug_robots = logging.getLogger("mechanize.robots").debug
|
||||
|
||||
# monkeypatch urllib2.HTTPError to show URL
|
||||
## import urllib2
|
||||
## def urllib2_str(self):
|
||||
## return 'HTTP Error %s: %s (%s)' % (
|
||||
## self.code, self.msg, self.geturl())
|
||||
## urllib2.HTTPError.__str__ = urllib2_str
|
||||
|
||||
|
||||
CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes
|
||||
DEFAULT_ENCODING = 'latin-1'
|
||||
|
||||
# XXX would self.reset() work, instead of raising this exception?
|
||||
class EndOfHeadError(Exception): pass
|
||||
class AbstractHeadParser:
|
||||
# only these elements are allowed in or before HEAD of document
|
||||
head_elems = ("html", "head",
|
||||
"title", "base",
|
||||
"script", "style", "meta", "link", "object")
|
||||
_entitydefs = htmlentitydefs.name2codepoint
|
||||
_encoding = DEFAULT_ENCODING
|
||||
|
||||
def __init__(self):
|
||||
self.http_equiv = []
|
||||
|
||||
def start_meta(self, attrs):
|
||||
http_equiv = content = None
|
||||
for key, value in attrs:
|
||||
if key == "http-equiv":
|
||||
http_equiv = self.unescape_attr_if_required(value)
|
||||
elif key == "content":
|
||||
content = self.unescape_attr_if_required(value)
|
||||
if http_equiv is not None and content is not None:
|
||||
self.http_equiv.append((http_equiv, content))
|
||||
|
||||
def end_head(self):
|
||||
raise EndOfHeadError()
|
||||
|
||||
def handle_entityref(self, name):
|
||||
#debug("%s", name)
|
||||
self.handle_data(unescape(
|
||||
'&%s;' % name, self._entitydefs, self._encoding))
|
||||
|
||||
def handle_charref(self, name):
|
||||
#debug("%s", name)
|
||||
self.handle_data(unescape_charref(name, self._encoding))
|
||||
|
||||
def unescape_attr(self, name):
|
||||
#debug("%s", name)
|
||||
return unescape(name, self._entitydefs, self._encoding)
|
||||
|
||||
def unescape_attrs(self, attrs):
|
||||
#debug("%s", attrs)
|
||||
escaped_attrs = {}
|
||||
for key, val in attrs.items():
|
||||
escaped_attrs[key] = self.unescape_attr(val)
|
||||
return escaped_attrs
|
||||
|
||||
def unknown_entityref(self, ref):
|
||||
self.handle_data("&%s;" % ref)
|
||||
|
||||
def unknown_charref(self, ref):
|
||||
self.handle_data("&#%s;" % ref)
|
||||
|
||||
|
||||
class XHTMLCompatibleHeadParser(AbstractHeadParser,
|
||||
HTMLParser.HTMLParser):
|
||||
def __init__(self):
|
||||
HTMLParser.HTMLParser.__init__(self)
|
||||
AbstractHeadParser.__init__(self)
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag not in self.head_elems:
|
||||
raise EndOfHeadError()
|
||||
try:
|
||||
method = getattr(self, 'start_' + tag)
|
||||
except AttributeError:
|
||||
try:
|
||||
method = getattr(self, 'do_' + tag)
|
||||
except AttributeError:
|
||||
pass # unknown tag
|
||||
else:
|
||||
method(attrs)
|
||||
else:
|
||||
method(attrs)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag not in self.head_elems:
|
||||
raise EndOfHeadError()
|
||||
try:
|
||||
method = getattr(self, 'end_' + tag)
|
||||
except AttributeError:
|
||||
pass # unknown tag
|
||||
else:
|
||||
method()
|
||||
|
||||
def unescape(self, name):
|
||||
# Use the entitydefs passed into constructor, not
|
||||
# HTMLParser.HTMLParser's entitydefs.
|
||||
return self.unescape_attr(name)
|
||||
|
||||
def unescape_attr_if_required(self, name):
|
||||
return name # HTMLParser.HTMLParser already did it
|
||||
|
||||
class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
|
||||
|
||||
def _not_called(self):
|
||||
assert False
|
||||
|
||||
def __init__(self):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
AbstractHeadParser.__init__(self)
|
||||
|
||||
def handle_starttag(self, tag, method, attrs):
|
||||
if tag not in self.head_elems:
|
||||
raise EndOfHeadError()
|
||||
if tag == "meta":
|
||||
method(attrs)
|
||||
|
||||
def unknown_starttag(self, tag, attrs):
|
||||
self.handle_starttag(tag, self._not_called, attrs)
|
||||
|
||||
def handle_endtag(self, tag, method):
|
||||
if tag in self.head_elems:
|
||||
method()
|
||||
else:
|
||||
raise EndOfHeadError()
|
||||
|
||||
def unescape_attr_if_required(self, name):
|
||||
return self.unescape_attr(name)
|
||||
|
||||
def parse_head(fileobj, parser):
|
||||
"""Return a list of key, value pairs."""
|
||||
while 1:
|
||||
data = fileobj.read(CHUNK)
|
||||
try:
|
||||
parser.feed(data)
|
||||
except EndOfHeadError:
|
||||
break
|
||||
if len(data) != CHUNK:
|
||||
# this should only happen if there is no HTML body, or if
|
||||
# CHUNK is big
|
||||
break
|
||||
return parser.http_equiv
|
||||
|
||||
class HTTPEquivProcessor(BaseHandler):
|
||||
"""Append META HTTP-EQUIV headers to regular HTTP headers."""
|
||||
|
||||
handler_order = 300 # before handlers that look at HTTP headers
|
||||
|
||||
def __init__(self, head_parser_class=HeadParser,
|
||||
i_want_broken_xhtml_support=False,
|
||||
):
|
||||
self.head_parser_class = head_parser_class
|
||||
self._allow_xhtml = i_want_broken_xhtml_support
|
||||
|
||||
def http_response(self, request, response):
|
||||
if not hasattr(response, "seek"):
|
||||
response = response_seek_wrapper(response)
|
||||
http_message = response.info()
|
||||
url = response.geturl()
|
||||
ct_hdrs = http_message.getheaders("content-type")
|
||||
if is_html(ct_hdrs, url, self._allow_xhtml):
|
||||
try:
|
||||
try:
|
||||
html_headers = parse_head(response,
|
||||
self.head_parser_class())
|
||||
finally:
|
||||
response.seek(0)
|
||||
except (HTMLParser.HTMLParseError,
|
||||
sgmllib.SGMLParseError):
|
||||
pass
|
||||
else:
|
||||
for hdr, val in html_headers:
|
||||
# add a header
|
||||
http_message.dict[hdr.lower()] = val
|
||||
text = hdr + ": " + val
|
||||
for line in text.split("\n"):
|
||||
http_message.headers.append(line + "\n")
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
|
||||
|
||||
class MechanizeRobotFileParser(robotparser.RobotFileParser):
|
||||
|
||||
def __init__(self, url='', opener=None):
|
||||
robotparser.RobotFileParser.__init__(self, url)
|
||||
self._opener = opener
|
||||
self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT
|
||||
|
||||
def set_opener(self, opener=None):
|
||||
import _opener
|
||||
if opener is None:
|
||||
opener = _opener.OpenerDirector()
|
||||
self._opener = opener
|
||||
|
||||
def set_timeout(self, timeout):
|
||||
self._timeout = timeout
|
||||
|
||||
def read(self):
|
||||
"""Reads the robots.txt URL and feeds it to the parser."""
|
||||
if self._opener is None:
|
||||
self.set_opener()
|
||||
req = Request(self.url, unverifiable=True, visit=False,
|
||||
timeout=self._timeout)
|
||||
try:
|
||||
f = self._opener.open(req)
|
||||
except HTTPError, f:
|
||||
pass
|
||||
except (IOError, socket.error, OSError), exc:
|
||||
debug_robots("ignoring error opening %r: %s" %
|
||||
(self.url, exc))
|
||||
return
|
||||
lines = []
|
||||
line = f.readline()
|
||||
while line:
|
||||
lines.append(line.strip())
|
||||
line = f.readline()
|
||||
status = f.code
|
||||
if status == 401 or status == 403:
|
||||
self.disallow_all = True
|
||||
debug_robots("disallow all")
|
||||
elif status >= 400:
|
||||
self.allow_all = True
|
||||
debug_robots("allow all")
|
||||
elif status == 200 and lines:
|
||||
debug_robots("parse lines")
|
||||
self.parse(lines)
|
||||
|
||||
class RobotExclusionError(HTTPError):
|
||||
def __init__(self, request, *args):
|
||||
apply(HTTPError.__init__, (self,)+args)
|
||||
self.request = request
|
||||
|
||||
class HTTPRobotRulesProcessor(BaseHandler):
|
||||
# before redirections, after everything else
|
||||
handler_order = 800
|
||||
|
||||
try:
|
||||
from httplib import HTTPMessage
|
||||
except:
|
||||
from mimetools import Message
|
||||
http_response_class = Message
|
||||
else:
|
||||
http_response_class = HTTPMessage
|
||||
|
||||
def __init__(self, rfp_class=MechanizeRobotFileParser):
|
||||
self.rfp_class = rfp_class
|
||||
self.rfp = None
|
||||
self._host = None
|
||||
|
||||
def http_request(self, request):
|
||||
scheme = request.get_type()
|
||||
if scheme not in ["http", "https"]:
|
||||
# robots exclusion only applies to HTTP
|
||||
return request
|
||||
|
||||
if request.get_selector() == "/robots.txt":
|
||||
# /robots.txt is always OK to fetch
|
||||
return request
|
||||
|
||||
host = request.get_host()
|
||||
|
||||
# robots.txt requests don't need to be allowed by robots.txt :-)
|
||||
origin_req = getattr(request, "_origin_req", None)
|
||||
if (origin_req is not None and
|
||||
origin_req.get_selector() == "/robots.txt" and
|
||||
origin_req.get_host() == host
|
||||
):
|
||||
return request
|
||||
|
||||
if host != self._host:
|
||||
self.rfp = self.rfp_class()
|
||||
try:
|
||||
self.rfp.set_opener(self.parent)
|
||||
except AttributeError:
|
||||
debug("%r instance does not support set_opener" %
|
||||
self.rfp.__class__)
|
||||
self.rfp.set_url(scheme+"://"+host+"/robots.txt")
|
||||
self.rfp.set_timeout(request.timeout)
|
||||
self.rfp.read()
|
||||
self._host = host
|
||||
|
||||
ua = request.get_header("User-agent", "")
|
||||
if self.rfp.can_fetch(ua, request.get_full_url()):
|
||||
return request
|
||||
else:
|
||||
# XXX This should really have raised URLError. Too late now...
|
||||
msg = "request disallowed by robots.txt"
|
||||
raise RobotExclusionError(
|
||||
request,
|
||||
request.get_full_url(),
|
||||
403, msg,
|
||||
self.http_response_class(StringIO()), StringIO(msg))
|
||||
|
||||
https_request = http_request
|
||||
|
||||
class HTTPRefererProcessor(BaseHandler):
|
||||
"""Add Referer header to requests.
|
||||
|
||||
This only makes sense if you use each RefererProcessor for a single
|
||||
chain of requests only (so, for example, if you use a single
|
||||
HTTPRefererProcessor to fetch a series of URLs extracted from a single
|
||||
page, this will break).
|
||||
|
||||
There's a proper implementation of this in mechanize.Browser.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self.referer = None
|
||||
|
||||
def http_request(self, request):
|
||||
if ((self.referer is not None) and
|
||||
not request.has_header("Referer")):
|
||||
request.add_unredirected_header("Referer", self.referer)
|
||||
return request
|
||||
|
||||
def http_response(self, request, response):
|
||||
self.referer = response.geturl()
|
||||
return response
|
||||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
|
||||
def clean_refresh_url(url):
|
||||
# e.g. Firefox 1.5 does (something like) this
|
||||
if ((url.startswith('"') and url.endswith('"')) or
|
||||
(url.startswith("'") and url.endswith("'"))):
|
||||
url = url[1:-1]
|
||||
return _rfc3986.clean_url(url, "latin-1") # XXX encoding
|
||||
|
||||
def parse_refresh_header(refresh):
|
||||
"""
|
||||
>>> parse_refresh_header("1; url=http://example.com/")
|
||||
(1.0, 'http://example.com/')
|
||||
>>> parse_refresh_header("1; url='http://example.com/'")
|
||||
(1.0, 'http://example.com/')
|
||||
>>> parse_refresh_header("1")
|
||||
(1.0, None)
|
||||
>>> parse_refresh_header("blah") # doctest: +IGNORE_EXCEPTION_DETAIL
|
||||
Traceback (most recent call last):
|
||||
ValueError: invalid literal for float(): blah
|
||||
|
||||
"""
|
||||
|
||||
ii = refresh.find(";")
|
||||
if ii != -1:
|
||||
pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
|
||||
jj = newurl_spec.find("=")
|
||||
key = None
|
||||
if jj != -1:
|
||||
key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
|
||||
newurl = clean_refresh_url(newurl)
|
||||
if key is None or key.strip().lower() != "url":
|
||||
raise ValueError()
|
||||
else:
|
||||
pause, newurl = float(refresh), None
|
||||
return pause, newurl
|
||||
|
||||
class HTTPRefreshProcessor(BaseHandler):
|
||||
"""Perform HTTP Refresh redirections.
|
||||
|
||||
Note that if a non-200 HTTP code has occurred (for example, a 30x
|
||||
redirect), this processor will do nothing.
|
||||
|
||||
By default, only zero-time Refresh headers are redirected. Use the
|
||||
max_time attribute / constructor argument to allow Refresh with longer
|
||||
pauses. Use the honor_time attribute / constructor argument to control
|
||||
whether the requested pause is honoured (with a time.sleep()) or
|
||||
skipped in favour of immediate redirection.
|
||||
|
||||
Public attributes:
|
||||
|
||||
max_time: see above
|
||||
honor_time: see above
|
||||
|
||||
"""
|
||||
handler_order = 1000
|
||||
|
||||
def __init__(self, max_time=0, honor_time=True):
|
||||
self.max_time = max_time
|
||||
self.honor_time = honor_time
|
||||
self._sleep = time.sleep
|
||||
|
||||
def http_response(self, request, response):
|
||||
code, msg, hdrs = response.code, response.msg, response.info()
|
||||
|
||||
if code == 200 and hdrs.has_key("refresh"):
|
||||
refresh = hdrs.getheaders("refresh")[0]
|
||||
try:
|
||||
pause, newurl = parse_refresh_header(refresh)
|
||||
except ValueError:
|
||||
debug("bad Refresh header: %r" % refresh)
|
||||
return response
|
||||
|
||||
if newurl is None:
|
||||
newurl = response.geturl()
|
||||
if (self.max_time is None) or (pause <= self.max_time):
|
||||
if pause > 1E-3 and self.honor_time:
|
||||
self._sleep(pause)
|
||||
hdrs["location"] = newurl
|
||||
# hardcoded http is NOT a bug
|
||||
response = self.parent.error(
|
||||
"http", request, response,
|
||||
"refresh", msg, hdrs)
|
||||
else:
|
||||
debug("Refresh header ignored: %r" % refresh)
|
||||
|
||||
return response
|
||||
|
||||
https_response = http_response
|
||||
@@ -1,185 +0,0 @@
|
||||
"""Load / save to libwww-perl (LWP) format files.
|
||||
|
||||
Actually, the format is slightly extended from that used by LWP's
|
||||
(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
|
||||
not recorded by LWP.
|
||||
|
||||
It uses the version string "2.0", though really there isn't an LWP Cookies
|
||||
2.0 format. This indicates that there is extra information in here
|
||||
(domain_dot and port_spec) while still being compatible with libwww-perl,
|
||||
I hope.
|
||||
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
||||
Copyright 1997-1999 Gisle Aas (original libwww-perl code)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import time, re, logging
|
||||
|
||||
from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
|
||||
MISSING_FILENAME_TEXT, LoadError
|
||||
from _headersutil import join_header_words, split_header_words
|
||||
from _util import iso2time, time2isoz
|
||||
|
||||
debug = logging.getLogger("mechanize").debug
|
||||
|
||||
|
||||
def lwp_cookie_str(cookie):
|
||||
"""Return string representation of Cookie in an the LWP cookie file format.
|
||||
|
||||
Actually, the format is extended a bit -- see module docstring.
|
||||
|
||||
"""
|
||||
h = [(cookie.name, cookie.value),
|
||||
("path", cookie.path),
|
||||
("domain", cookie.domain)]
|
||||
if cookie.port is not None: h.append(("port", cookie.port))
|
||||
if cookie.path_specified: h.append(("path_spec", None))
|
||||
if cookie.port_specified: h.append(("port_spec", None))
|
||||
if cookie.domain_initial_dot: h.append(("domain_dot", None))
|
||||
if cookie.secure: h.append(("secure", None))
|
||||
if cookie.expires: h.append(("expires",
|
||||
time2isoz(float(cookie.expires))))
|
||||
if cookie.discard: h.append(("discard", None))
|
||||
if cookie.comment: h.append(("comment", cookie.comment))
|
||||
if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
|
||||
if cookie.rfc2109: h.append(("rfc2109", None))
|
||||
|
||||
keys = cookie.nonstandard_attr_keys()
|
||||
keys.sort()
|
||||
for k in keys:
|
||||
h.append((k, str(cookie.get_nonstandard_attr(k))))
|
||||
|
||||
h.append(("version", str(cookie.version)))
|
||||
|
||||
return join_header_words([h])
|
||||
|
||||
class LWPCookieJar(FileCookieJar):
|
||||
"""
|
||||
The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
|
||||
"Set-Cookie3" is the format used by the libwww-perl libary, not known
|
||||
to be compatible with any browser, but which is easy to read and
|
||||
doesn't lose information about RFC 2965 cookies.
|
||||
|
||||
Additional methods
|
||||
|
||||
as_lwp_str(ignore_discard=True, ignore_expired=True)
|
||||
|
||||
"""
|
||||
|
||||
magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
|
||||
|
||||
def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
|
||||
"""Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
|
||||
|
||||
ignore_discard and ignore_expires: see docstring for FileCookieJar.save
|
||||
|
||||
"""
|
||||
now = time.time()
|
||||
r = []
|
||||
for cookie in self:
|
||||
if not ignore_discard and cookie.discard:
|
||||
debug(" Not saving %s: marked for discard", cookie.name)
|
||||
continue
|
||||
if not ignore_expires and cookie.is_expired(now):
|
||||
debug(" Not saving %s: expired", cookie.name)
|
||||
continue
|
||||
r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
|
||||
return "\n".join(r+[""])
|
||||
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
f = open(filename, "w")
|
||||
try:
|
||||
debug("Saving LWP cookies file")
|
||||
# There really isn't an LWP Cookies 2.0 format, but this indicates
|
||||
# that there is extra information in here (domain_dot and
|
||||
# port_spec) while still being compatible with libwww-perl, I hope.
|
||||
f.write("#LWP-Cookies-2.0\n")
|
||||
f.write(self.as_lwp_str(ignore_discard, ignore_expires))
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def _really_load(self, f, filename, ignore_discard, ignore_expires):
|
||||
magic = f.readline()
|
||||
if not re.search(self.magic_re, magic):
|
||||
msg = "%s does not seem to contain cookies" % filename
|
||||
raise LoadError(msg)
|
||||
|
||||
now = time.time()
|
||||
|
||||
header = "Set-Cookie3:"
|
||||
boolean_attrs = ("port_spec", "path_spec", "domain_dot",
|
||||
"secure", "discard", "rfc2109")
|
||||
value_attrs = ("version",
|
||||
"port", "path", "domain",
|
||||
"expires",
|
||||
"comment", "commenturl")
|
||||
|
||||
try:
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if line == "": break
|
||||
if not line.startswith(header):
|
||||
continue
|
||||
line = line[len(header):].strip()
|
||||
|
||||
for data in split_header_words([line]):
|
||||
name, value = data[0]
|
||||
standard = {}
|
||||
rest = {}
|
||||
for k in boolean_attrs:
|
||||
standard[k] = False
|
||||
for k, v in data[1:]:
|
||||
if k is not None:
|
||||
lc = k.lower()
|
||||
else:
|
||||
lc = None
|
||||
# don't lose case distinction for unknown fields
|
||||
if (lc in value_attrs) or (lc in boolean_attrs):
|
||||
k = lc
|
||||
if k in boolean_attrs:
|
||||
if v is None: v = True
|
||||
standard[k] = v
|
||||
elif k in value_attrs:
|
||||
standard[k] = v
|
||||
else:
|
||||
rest[k] = v
|
||||
|
||||
h = standard.get
|
||||
expires = h("expires")
|
||||
discard = h("discard")
|
||||
if expires is not None:
|
||||
expires = iso2time(expires)
|
||||
if expires is None:
|
||||
discard = True
|
||||
domain = h("domain")
|
||||
domain_specified = domain.startswith(".")
|
||||
c = Cookie(h("version"), name, value,
|
||||
h("port"), h("port_spec"),
|
||||
domain, domain_specified, h("domain_dot"),
|
||||
h("path"), h("path_spec"),
|
||||
h("secure"),
|
||||
expires,
|
||||
discard,
|
||||
h("comment"),
|
||||
h("commenturl"),
|
||||
rest,
|
||||
h("rfc2109"),
|
||||
)
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
self.set_cookie(c)
|
||||
except:
|
||||
reraise_unmasked_exceptions((IOError,))
|
||||
raise LoadError("invalid Set-Cookie3 format file %s" % filename)
|
||||
|
||||
@@ -1,393 +0,0 @@
|
||||
# Taken from Python 2.6.4 for use by _sgmllib.py
|
||||
"""Shared support for scanning document type declarations in HTML and XHTML.
|
||||
|
||||
This module is used as a foundation for the HTMLParser and sgmllib
|
||||
modules (indirectly, for htmllib as well). It has no documented
|
||||
public API and should not be used directly.
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
|
||||
_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
|
||||
_commentclose = re.compile(r'--\s*>')
|
||||
_markedsectionclose = re.compile(r']\s*]\s*>')
|
||||
|
||||
# An analysis of the MS-Word extensions is available at
|
||||
# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
|
||||
|
||||
_msmarkedsectionclose = re.compile(r']\s*>')
|
||||
|
||||
del re
|
||||
|
||||
|
||||
class ParserBase:
|
||||
"""Parser base class which provides some common support methods used
|
||||
by the SGML/HTML and XHTML parsers."""
|
||||
|
||||
def __init__(self):
|
||||
if self.__class__ is ParserBase:
|
||||
raise RuntimeError(
|
||||
"markupbase.ParserBase must be subclassed")
|
||||
|
||||
def error(self, message):
|
||||
raise NotImplementedError(
|
||||
"subclasses of ParserBase must override error()")
|
||||
|
||||
def reset(self):
|
||||
self.lineno = 1
|
||||
self.offset = 0
|
||||
|
||||
def getpos(self):
|
||||
"""Return current line number and offset."""
|
||||
return self.lineno, self.offset
|
||||
|
||||
# Internal -- update line number and offset. This should be
|
||||
# called for each piece of data exactly once, in order -- in other
|
||||
# words the concatenation of all the input strings to this
|
||||
# function should be exactly the entire input.
|
||||
def updatepos(self, i, j):
|
||||
if i >= j:
|
||||
return j
|
||||
rawdata = self.rawdata
|
||||
nlines = rawdata.count("\n", i, j)
|
||||
if nlines:
|
||||
self.lineno = self.lineno + nlines
|
||||
pos = rawdata.rindex("\n", i, j) # Should not fail
|
||||
self.offset = j-(pos+1)
|
||||
else:
|
||||
self.offset = self.offset + j-i
|
||||
return j
|
||||
|
||||
_decl_otherchars = ''
|
||||
|
||||
# Internal -- parse declaration (for use by subclasses).
|
||||
def parse_declaration(self, i):
|
||||
# This is some sort of declaration; in "HTML as
|
||||
# deployed," this should only be the document type
|
||||
# declaration ("<!DOCTYPE html...>").
|
||||
# ISO 8879:1986, however, has more complex
|
||||
# declaration syntax for elements in <!...>, including:
|
||||
# --comment--
|
||||
# [marked section]
|
||||
# name in the following list: ENTITY, DOCTYPE, ELEMENT,
|
||||
# ATTLIST, NOTATION, SHORTREF, USEMAP,
|
||||
# LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
|
||||
rawdata = self.rawdata
|
||||
j = i + 2
|
||||
assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
|
||||
if rawdata[j:j+1] == ">":
|
||||
# the empty comment <!>
|
||||
return j + 1
|
||||
if rawdata[j:j+1] in ("-", ""):
|
||||
# Start of comment followed by buffer boundary,
|
||||
# or just a buffer boundary.
|
||||
return -1
|
||||
# A simple, practical version could look like: ((name|stringlit) S*) + '>'
|
||||
n = len(rawdata)
|
||||
if rawdata[j:j+2] == '--': #comment
|
||||
# Locate --.*-- as the body of the comment
|
||||
return self.parse_comment(i)
|
||||
elif rawdata[j] == '[': #marked section
|
||||
# Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
|
||||
# Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
|
||||
# Note that this is extended by Microsoft Office "Save as Web" function
|
||||
# to include [if...] and [endif].
|
||||
return self.parse_marked_section(i)
|
||||
else: #all other declaration elements
|
||||
decltype, j = self._scan_name(j, i)
|
||||
if j < 0:
|
||||
return j
|
||||
if decltype == "doctype":
|
||||
self._decl_otherchars = ''
|
||||
while j < n:
|
||||
c = rawdata[j]
|
||||
if c == ">":
|
||||
# end of declaration syntax
|
||||
data = rawdata[i+2:j]
|
||||
if decltype == "doctype":
|
||||
self.handle_decl(data)
|
||||
else:
|
||||
self.unknown_decl(data)
|
||||
return j + 1
|
||||
if c in "\"'":
|
||||
m = _declstringlit_match(rawdata, j)
|
||||
if not m:
|
||||
return -1 # incomplete
|
||||
j = m.end()
|
||||
elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
|
||||
name, j = self._scan_name(j, i)
|
||||
elif c in self._decl_otherchars:
|
||||
j = j + 1
|
||||
elif c == "[":
|
||||
# this could be handled in a separate doctype parser
|
||||
if decltype == "doctype":
|
||||
j = self._parse_doctype_subset(j + 1, i)
|
||||
elif decltype in ("attlist", "linktype", "link", "element"):
|
||||
# must tolerate []'d groups in a content model in an element declaration
|
||||
# also in data attribute specifications of attlist declaration
|
||||
# also link type declaration subsets in linktype declarations
|
||||
# also link attribute specification lists in link declarations
|
||||
self.error("unsupported '[' char in %s declaration" % decltype)
|
||||
else:
|
||||
self.error("unexpected '[' char in declaration")
|
||||
else:
|
||||
self.error(
|
||||
"unexpected %r char in declaration" % rawdata[j])
|
||||
if j < 0:
|
||||
return j
|
||||
return -1 # incomplete
|
||||
|
||||
# Internal -- parse a marked section
|
||||
# Override this to handle MS-word extension syntax <![if word]>content<![endif]>
|
||||
def parse_marked_section(self, i, report=1):
|
||||
rawdata= self.rawdata
|
||||
assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
|
||||
sectName, j = self._scan_name( i+3, i )
|
||||
if j < 0:
|
||||
return j
|
||||
if sectName in ("temp", "cdata", "ignore", "include", "rcdata"):
|
||||
# look for standard ]]> ending
|
||||
match= _markedsectionclose.search(rawdata, i+3)
|
||||
elif sectName in ("if", "else", "endif"):
|
||||
# look for MS Office ]> ending
|
||||
match= _msmarkedsectionclose.search(rawdata, i+3)
|
||||
else:
|
||||
self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
|
||||
if not match:
|
||||
return -1
|
||||
if report:
|
||||
j = match.start(0)
|
||||
self.unknown_decl(rawdata[i+3: j])
|
||||
return match.end(0)
|
||||
|
||||
# Internal -- parse comment, return length or -1 if not terminated
|
||||
def parse_comment(self, i, report=1):
|
||||
rawdata = self.rawdata
|
||||
if rawdata[i:i+4] != '<!--':
|
||||
self.error('unexpected call to parse_comment()')
|
||||
match = _commentclose.search(rawdata, i+4)
|
||||
if not match:
|
||||
return -1
|
||||
if report:
|
||||
j = match.start(0)
|
||||
self.handle_comment(rawdata[i+4: j])
|
||||
return match.end(0)
|
||||
|
||||
# Internal -- scan past the internal subset in a <!DOCTYPE declaration,
|
||||
# returning the index just past any whitespace following the trailing ']'.
|
||||
def _parse_doctype_subset(self, i, declstartpos):
|
||||
rawdata = self.rawdata
|
||||
n = len(rawdata)
|
||||
j = i
|
||||
while j < n:
|
||||
c = rawdata[j]
|
||||
if c == "<":
|
||||
s = rawdata[j:j+2]
|
||||
if s == "<":
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
if s != "<!":
|
||||
self.updatepos(declstartpos, j + 1)
|
||||
self.error("unexpected char in internal subset (in %r)" % s)
|
||||
if (j + 2) == n:
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
if (j + 4) > n:
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
if rawdata[j:j+4] == "<!--":
|
||||
j = self.parse_comment(j, report=0)
|
||||
if j < 0:
|
||||
return j
|
||||
continue
|
||||
name, j = self._scan_name(j + 2, declstartpos)
|
||||
if j == -1:
|
||||
return -1
|
||||
if name not in ("attlist", "element", "entity", "notation"):
|
||||
self.updatepos(declstartpos, j + 2)
|
||||
self.error(
|
||||
"unknown declaration %r in internal subset" % name)
|
||||
# handle the individual names
|
||||
meth = getattr(self, "_parse_doctype_" + name)
|
||||
j = meth(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
elif c == "%":
|
||||
# parameter entity reference
|
||||
if (j + 1) == n:
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
s, j = self._scan_name(j + 1, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
if rawdata[j] == ";":
|
||||
j = j + 1
|
||||
elif c == "]":
|
||||
j = j + 1
|
||||
while j < n and rawdata[j].isspace():
|
||||
j = j + 1
|
||||
if j < n:
|
||||
if rawdata[j] == ">":
|
||||
return j
|
||||
self.updatepos(declstartpos, j)
|
||||
self.error("unexpected char after internal subset")
|
||||
else:
|
||||
return -1
|
||||
elif c.isspace():
|
||||
j = j + 1
|
||||
else:
|
||||
self.updatepos(declstartpos, j)
|
||||
self.error("unexpected char %r in internal subset" % c)
|
||||
# end of buffer reached
|
||||
return -1
|
||||
|
||||
# Internal -- scan past <!ELEMENT declarations
|
||||
def _parse_doctype_element(self, i, declstartpos):
|
||||
name, j = self._scan_name(i, declstartpos)
|
||||
if j == -1:
|
||||
return -1
|
||||
# style content model; just skip until '>'
|
||||
rawdata = self.rawdata
|
||||
if '>' in rawdata[j:]:
|
||||
return rawdata.find(">", j) + 1
|
||||
return -1
|
||||
|
||||
# Internal -- scan past <!ATTLIST declarations
|
||||
def _parse_doctype_attlist(self, i, declstartpos):
|
||||
rawdata = self.rawdata
|
||||
name, j = self._scan_name(i, declstartpos)
|
||||
c = rawdata[j:j+1]
|
||||
if c == "":
|
||||
return -1
|
||||
if c == ">":
|
||||
return j + 1
|
||||
while 1:
|
||||
# scan a series of attribute descriptions; simplified:
|
||||
# name type [value] [#constraint]
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
c = rawdata[j:j+1]
|
||||
if c == "":
|
||||
return -1
|
||||
if c == "(":
|
||||
# an enumerated type; look for ')'
|
||||
if ")" in rawdata[j:]:
|
||||
j = rawdata.find(")", j) + 1
|
||||
else:
|
||||
return -1
|
||||
while rawdata[j:j+1].isspace():
|
||||
j = j + 1
|
||||
if not rawdata[j:]:
|
||||
# end of buffer, incomplete
|
||||
return -1
|
||||
else:
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c in "'\"":
|
||||
m = _declstringlit_match(rawdata, j)
|
||||
if m:
|
||||
j = m.end()
|
||||
else:
|
||||
return -1
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c == "#":
|
||||
if rawdata[j:] == "#":
|
||||
# end of buffer
|
||||
return -1
|
||||
name, j = self._scan_name(j + 1, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c == '>':
|
||||
# all done
|
||||
return j + 1
|
||||
|
||||
# Internal -- scan past <!NOTATION declarations
|
||||
def _parse_doctype_notation(self, i, declstartpos):
|
||||
name, j = self._scan_name(i, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
rawdata = self.rawdata
|
||||
while 1:
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
if c == '>':
|
||||
return j + 1
|
||||
if c in "'\"":
|
||||
m = _declstringlit_match(rawdata, j)
|
||||
if not m:
|
||||
return -1
|
||||
j = m.end()
|
||||
else:
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
|
||||
# Internal -- scan past <!ENTITY declarations
|
||||
def _parse_doctype_entity(self, i, declstartpos):
|
||||
rawdata = self.rawdata
|
||||
if rawdata[i:i+1] == "%":
|
||||
j = i + 1
|
||||
while 1:
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c.isspace():
|
||||
j = j + 1
|
||||
else:
|
||||
break
|
||||
else:
|
||||
j = i
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
while 1:
|
||||
c = self.rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c in "'\"":
|
||||
m = _declstringlit_match(rawdata, j)
|
||||
if m:
|
||||
j = m.end()
|
||||
else:
|
||||
return -1 # incomplete
|
||||
elif c == ">":
|
||||
return j + 1
|
||||
else:
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
|
||||
# Internal -- scan a name token and the new position and the token, or
|
||||
# return -1 if we've reached the end of the buffer.
|
||||
def _scan_name(self, i, declstartpos):
|
||||
rawdata = self.rawdata
|
||||
n = len(rawdata)
|
||||
if i == n:
|
||||
return None, -1
|
||||
m = _declname_match(rawdata, i)
|
||||
if m:
|
||||
s = m.group()
|
||||
name = s.strip()
|
||||
if (i + len(s)) == n:
|
||||
return None, -1 # end of buffer
|
||||
return name.lower(), m.end()
|
||||
else:
|
||||
self.updatepos(declstartpos, i)
|
||||
self.error("expected name token at %r"
|
||||
% rawdata[declstartpos:declstartpos+20])
|
||||
|
||||
# To be overridden -- handlers for unknown objects
|
||||
def unknown_decl(self, data):
|
||||
pass
|
||||
@@ -1,669 +0,0 @@
|
||||
"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize.
|
||||
|
||||
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
|
||||
Copyright 2003 Andy Lester (original Perl code)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import copy, re, os, urllib, urllib2
|
||||
|
||||
from _html import DefaultFactory
|
||||
import _response
|
||||
import _request
|
||||
import _rfc3986
|
||||
import _sockettimeout
|
||||
import _urllib2_fork
|
||||
from _useragent import UserAgentBase
|
||||
|
||||
class BrowserStateError(Exception): pass
|
||||
class LinkNotFoundError(Exception): pass
|
||||
class FormNotFoundError(Exception): pass
|
||||
|
||||
|
||||
def sanepathname2url(path):
|
||||
urlpath = urllib.pathname2url(path)
|
||||
if os.name == "nt" and urlpath.startswith("///"):
|
||||
urlpath = urlpath[2:]
|
||||
# XXX don't ask me about the mac...
|
||||
return urlpath
|
||||
|
||||
|
||||
class History:
|
||||
"""
|
||||
|
||||
Though this will become public, the implied interface is not yet stable.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self._history = [] # LIFO
|
||||
def add(self, request, response):
|
||||
self._history.append((request, response))
|
||||
def back(self, n, _response):
|
||||
response = _response # XXX move Browser._response into this class?
|
||||
while n > 0 or response is None:
|
||||
try:
|
||||
request, response = self._history.pop()
|
||||
except IndexError:
|
||||
raise BrowserStateError("already at start of history")
|
||||
n -= 1
|
||||
return request, response
|
||||
def clear(self):
|
||||
del self._history[:]
|
||||
def close(self):
|
||||
for request, response in self._history:
|
||||
if response is not None:
|
||||
response.close()
|
||||
del self._history[:]
|
||||
|
||||
|
||||
class HTTPRefererProcessor(_urllib2_fork.BaseHandler):
|
||||
def http_request(self, request):
|
||||
# See RFC 2616 14.36. The only times we know the source of the
|
||||
# request URI has a URI associated with it are redirect, and
|
||||
# Browser.click() / Browser.submit() / Browser.follow_link().
|
||||
# Otherwise, it's the user's job to add any Referer header before
|
||||
# .open()ing.
|
||||
if hasattr(request, "redirect_dict"):
|
||||
request = self.parent._add_referer_header(
|
||||
request, origin_request=False)
|
||||
return request
|
||||
|
||||
https_request = http_request
|
||||
|
||||
|
||||
class Browser(UserAgentBase):
|
||||
"""Browser-like class with support for history, forms and links.
|
||||
|
||||
BrowserStateError is raised whenever the browser is in the wrong state to
|
||||
complete the requested operation - e.g., when .back() is called when the
|
||||
browser history is empty, or when .follow_link() is called when the current
|
||||
response does not contain HTML data.
|
||||
|
||||
Public attributes:
|
||||
|
||||
request: current request (mechanize.Request)
|
||||
form: currently selected form (see .select_form())
|
||||
|
||||
"""
|
||||
|
||||
handler_classes = copy.copy(UserAgentBase.handler_classes)
|
||||
handler_classes["_referer"] = HTTPRefererProcessor
|
||||
default_features = copy.copy(UserAgentBase.default_features)
|
||||
default_features.append("_referer")
|
||||
|
||||
def __init__(self,
|
||||
factory=None,
|
||||
history=None,
|
||||
request_class=None,
|
||||
):
|
||||
"""
|
||||
|
||||
Only named arguments should be passed to this constructor.
|
||||
|
||||
factory: object implementing the mechanize.Factory interface.
|
||||
history: object implementing the mechanize.History interface. Note
|
||||
this interface is still experimental and may change in future.
|
||||
request_class: Request class to use. Defaults to mechanize.Request
|
||||
|
||||
The Factory and History objects passed in are 'owned' by the Browser,
|
||||
so they should not be shared across Browsers. In particular,
|
||||
factory.set_response() should not be called except by the owning
|
||||
Browser itself.
|
||||
|
||||
Note that the supplied factory's request_class is overridden by this
|
||||
constructor, to ensure only one Request class is used.
|
||||
|
||||
"""
|
||||
self._handle_referer = True
|
||||
|
||||
if history is None:
|
||||
history = History()
|
||||
self._history = history
|
||||
|
||||
if request_class is None:
|
||||
request_class = _request.Request
|
||||
|
||||
if factory is None:
|
||||
factory = DefaultFactory()
|
||||
factory.set_request_class(request_class)
|
||||
self._factory = factory
|
||||
self.request_class = request_class
|
||||
|
||||
self.request = None
|
||||
self._set_response(None, False)
|
||||
|
||||
# do this last to avoid __getattr__ problems
|
||||
UserAgentBase.__init__(self)
|
||||
|
||||
def close(self):
|
||||
UserAgentBase.close(self)
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
if self._history is not None:
|
||||
self._history.close()
|
||||
self._history = None
|
||||
|
||||
# make use after .close easy to spot
|
||||
self.form = None
|
||||
self.request = self._response = None
|
||||
self.request = self.response = self.set_response = None
|
||||
self.geturl = self.reload = self.back = None
|
||||
self.clear_history = self.set_cookie = self.links = self.forms = None
|
||||
self.viewing_html = self.encoding = self.title = None
|
||||
self.select_form = self.click = self.submit = self.click_link = None
|
||||
self.follow_link = self.find_link = None
|
||||
|
||||
def set_handle_referer(self, handle):
|
||||
"""Set whether to add Referer header to each request."""
|
||||
self._set_handler("_referer", handle)
|
||||
self._handle_referer = bool(handle)
|
||||
|
||||
def _add_referer_header(self, request, origin_request=True):
|
||||
if self.request is None:
|
||||
return request
|
||||
scheme = request.get_type()
|
||||
original_scheme = self.request.get_type()
|
||||
if scheme not in ["http", "https"]:
|
||||
return request
|
||||
if not origin_request and not self.request.has_header("Referer"):
|
||||
return request
|
||||
|
||||
if (self._handle_referer and
|
||||
original_scheme in ["http", "https"] and
|
||||
not (original_scheme == "https" and scheme != "https")):
|
||||
# strip URL fragment (RFC 2616 14.36)
|
||||
parts = _rfc3986.urlsplit(self.request.get_full_url())
|
||||
parts = parts[:-1]+(None,)
|
||||
referer = _rfc3986.urlunsplit(parts)
|
||||
request.add_unredirected_header("Referer", referer)
|
||||
return request
|
||||
|
||||
def open_novisit(self, url, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
"""Open a URL without visiting it.
|
||||
|
||||
Browser state (including request, response, history, forms and links)
|
||||
is left unchanged by calling this function.
|
||||
|
||||
The interface is the same as for .open().
|
||||
|
||||
This is useful for things like fetching images.
|
||||
|
||||
See also .retrieve().
|
||||
|
||||
"""
|
||||
return self._mech_open(url, data, visit=False, timeout=timeout)
|
||||
|
||||
def open(self, url, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
return self._mech_open(url, data, timeout=timeout)
|
||||
|
||||
def _mech_open(self, url, data=None, update_history=True, visit=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
try:
|
||||
url.get_full_url
|
||||
except AttributeError:
|
||||
# string URL -- convert to absolute URL if required
|
||||
scheme, authority = _rfc3986.urlsplit(url)[:2]
|
||||
if scheme is None:
|
||||
# relative URL
|
||||
if self._response is None:
|
||||
raise BrowserStateError(
|
||||
"can't fetch relative reference: "
|
||||
"not viewing any document")
|
||||
url = _rfc3986.urljoin(self._response.geturl(), url)
|
||||
|
||||
request = self._request(url, data, visit, timeout)
|
||||
visit = request.visit
|
||||
if visit is None:
|
||||
visit = True
|
||||
|
||||
if visit:
|
||||
self._visit_request(request, update_history)
|
||||
|
||||
success = True
|
||||
try:
|
||||
response = UserAgentBase.open(self, request, data)
|
||||
except urllib2.HTTPError, error:
|
||||
success = False
|
||||
if error.fp is None: # not a response
|
||||
raise
|
||||
response = error
|
||||
## except (IOError, socket.error, OSError), error:
|
||||
## # Yes, urllib2 really does raise all these :-((
|
||||
## # See test_urllib2.py for examples of socket.gaierror and OSError,
|
||||
## # plus note that FTPHandler raises IOError.
|
||||
## # XXX I don't seem to have an example of exactly socket.error being
|
||||
## # raised, only socket.gaierror...
|
||||
## # I don't want to start fixing these here, though, since this is a
|
||||
## # subclass of OpenerDirector, and it would break old code. Even in
|
||||
## # Python core, a fix would need some backwards-compat. hack to be
|
||||
## # acceptable.
|
||||
## raise
|
||||
|
||||
if visit:
|
||||
self._set_response(response, False)
|
||||
response = copy.copy(self._response)
|
||||
elif response is not None:
|
||||
response = _response.upgrade_response(response)
|
||||
|
||||
if not success:
|
||||
raise response
|
||||
return response
|
||||
|
||||
def __str__(self):
|
||||
text = []
|
||||
text.append("<%s " % self.__class__.__name__)
|
||||
if self._response:
|
||||
text.append("visiting %s" % self._response.geturl())
|
||||
else:
|
||||
text.append("(not visiting a URL)")
|
||||
if self.form:
|
||||
text.append("\n selected form:\n %s\n" % str(self.form))
|
||||
text.append(">")
|
||||
return "".join(text)
|
||||
|
||||
def response(self):
|
||||
"""Return a copy of the current response.
|
||||
|
||||
The returned object has the same interface as the object returned by
|
||||
.open() (or mechanize.urlopen()).
|
||||
|
||||
"""
|
||||
return copy.copy(self._response)
|
||||
|
||||
def open_local_file(self, filename):
|
||||
path = sanepathname2url(os.path.abspath(filename))
|
||||
url = 'file://'+path
|
||||
return self.open(url)
|
||||
|
||||
def set_response(self, response):
|
||||
"""Replace current response with (a copy of) response.
|
||||
|
||||
response may be None.
|
||||
|
||||
This is intended mostly for HTML-preprocessing.
|
||||
"""
|
||||
self._set_response(response, True)
|
||||
|
||||
def _set_response(self, response, close_current):
|
||||
# sanity check, necessary but far from sufficient
|
||||
if not (response is None or
|
||||
(hasattr(response, "info") and hasattr(response, "geturl") and
|
||||
hasattr(response, "read")
|
||||
)
|
||||
):
|
||||
raise ValueError("not a response object")
|
||||
|
||||
self.form = None
|
||||
if response is not None:
|
||||
response = _response.upgrade_response(response)
|
||||
if close_current and self._response is not None:
|
||||
self._response.close()
|
||||
self._response = response
|
||||
self._factory.set_response(response)
|
||||
|
||||
def visit_response(self, response, request=None):
|
||||
"""Visit the response, as if it had been .open()ed.
|
||||
|
||||
Unlike .set_response(), this updates history rather than replacing the
|
||||
current response.
|
||||
"""
|
||||
if request is None:
|
||||
request = _request.Request(response.geturl())
|
||||
self._visit_request(request, True)
|
||||
self._set_response(response, False)
|
||||
|
||||
def _visit_request(self, request, update_history):
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
if self.request is not None and update_history:
|
||||
self._history.add(self.request, self._response)
|
||||
self._response = None
|
||||
# we want self.request to be assigned even if UserAgentBase.open
|
||||
# fails
|
||||
self.request = request
|
||||
|
||||
def geturl(self):
|
||||
"""Get URL of current document."""
|
||||
if self._response is None:
|
||||
raise BrowserStateError("not viewing any document")
|
||||
return self._response.geturl()
|
||||
|
||||
def reload(self):
|
||||
"""Reload current document, and return response object."""
|
||||
if self.request is None:
|
||||
raise BrowserStateError("no URL has yet been .open()ed")
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
return self._mech_open(self.request, update_history=False)
|
||||
|
||||
def back(self, n=1):
|
||||
"""Go back n steps in history, and return response object.
|
||||
|
||||
n: go back this number of steps (default 1 step)
|
||||
|
||||
"""
|
||||
if self._response is not None:
|
||||
self._response.close()
|
||||
self.request, response = self._history.back(n, self._response)
|
||||
self.set_response(response)
|
||||
if not response.read_complete:
|
||||
return self.reload()
|
||||
return copy.copy(response)
|
||||
|
||||
def clear_history(self):
|
||||
self._history.clear()
|
||||
|
||||
def set_cookie(self, cookie_string):
|
||||
"""Request to set a cookie.
|
||||
|
||||
Note that it is NOT necessary to call this method under ordinary
|
||||
circumstances: cookie handling is normally entirely automatic. The
|
||||
intended use case is rather to simulate the setting of a cookie by
|
||||
client script in a web page (e.g. JavaScript). In that case, use of
|
||||
this method is necessary because mechanize currently does not support
|
||||
JavaScript, VBScript, etc.
|
||||
|
||||
The cookie is added in the same way as if it had arrived with the
|
||||
current response, as a result of the current request. This means that,
|
||||
for example, if it is not appropriate to set the cookie based on the
|
||||
current request, no cookie will be set.
|
||||
|
||||
The cookie will be returned automatically with subsequent responses
|
||||
made by the Browser instance whenever that's appropriate.
|
||||
|
||||
cookie_string should be a valid value of the Set-Cookie header.
|
||||
|
||||
For example:
|
||||
|
||||
browser.set_cookie(
|
||||
"sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT")
|
||||
|
||||
Currently, this method does not allow for adding RFC 2986 cookies.
|
||||
This limitation will be lifted if anybody requests it.
|
||||
|
||||
"""
|
||||
if self._response is None:
|
||||
raise BrowserStateError("not viewing any document")
|
||||
if self.request.get_type() not in ["http", "https"]:
|
||||
raise BrowserStateError("can't set cookie for non-HTTP/HTTPS "
|
||||
"transactions")
|
||||
cookiejar = self._ua_handlers["_cookies"].cookiejar
|
||||
response = self.response() # copy
|
||||
headers = response.info()
|
||||
headers["Set-cookie"] = cookie_string
|
||||
cookiejar.extract_cookies(response, self.request)
|
||||
|
||||
def links(self, **kwds):
|
||||
"""Return iterable over links (mechanize.Link objects)."""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
links = self._factory.links()
|
||||
if kwds:
|
||||
return self._filter_links(links, **kwds)
|
||||
else:
|
||||
return links
|
||||
|
||||
def forms(self):
|
||||
"""Return iterable over forms.
|
||||
|
||||
The returned form objects implement the mechanize.HTMLForm interface.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
return self._factory.forms()
|
||||
|
||||
def global_form(self):
|
||||
"""Return the global form object, or None if the factory implementation
|
||||
did not supply one.
|
||||
|
||||
The "global" form object contains all controls that are not descendants
|
||||
of any FORM element.
|
||||
|
||||
The returned form object implements the mechanize.HTMLForm interface.
|
||||
|
||||
This is a separate method since the global form is not regarded as part
|
||||
of the sequence of forms in the document -- mostly for
|
||||
backwards-compatibility.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
return self._factory.global_form
|
||||
|
||||
def viewing_html(self):
|
||||
"""Return whether the current response contains HTML data."""
|
||||
if self._response is None:
|
||||
raise BrowserStateError("not viewing any document")
|
||||
return self._factory.is_html
|
||||
|
||||
def encoding(self):
|
||||
if self._response is None:
|
||||
raise BrowserStateError("not viewing any document")
|
||||
return self._factory.encoding
|
||||
|
||||
def title(self):
|
||||
r"""Return title, or None if there is no title element in the document.
|
||||
|
||||
Treatment of any tag children of attempts to follow Firefox and IE
|
||||
(currently, tags are preserved).
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
return self._factory.title
|
||||
|
||||
def select_form(self, name=None, predicate=None, nr=None):
|
||||
"""Select an HTML form for input.
|
||||
|
||||
This is a bit like giving a form the "input focus" in a browser.
|
||||
|
||||
If a form is selected, the Browser object supports the HTMLForm
|
||||
interface, so you can call methods like .set_value(), .set(), and
|
||||
.click().
|
||||
|
||||
Another way to select a form is to assign to the .form attribute. The
|
||||
form assigned should be one of the objects returned by the .forms()
|
||||
method.
|
||||
|
||||
At least one of the name, predicate and nr arguments must be supplied.
|
||||
If no matching form is found, mechanize.FormNotFoundError is raised.
|
||||
|
||||
If name is specified, then the form must have the indicated name.
|
||||
|
||||
If predicate is specified, then the form must match that function. The
|
||||
predicate function is passed the HTMLForm as its single argument, and
|
||||
should return a boolean value indicating whether the form matched.
|
||||
|
||||
nr, if supplied, is the sequence number of the form (where 0 is the
|
||||
first). Note that control 0 is the first form matching all the other
|
||||
arguments (if supplied); it is not necessarily the first control in the
|
||||
form. The "global form" (consisting of all form controls not contained
|
||||
in any FORM element) is considered not to be part of this sequence and
|
||||
to have no name, so will not be matched unless both name and nr are
|
||||
None.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
if (name is None) and (predicate is None) and (nr is None):
|
||||
raise ValueError(
|
||||
"at least one argument must be supplied to specify form")
|
||||
|
||||
global_form = self._factory.global_form
|
||||
if nr is None and name is None and \
|
||||
predicate is not None and predicate(global_form):
|
||||
self.form = global_form
|
||||
return
|
||||
|
||||
orig_nr = nr
|
||||
for form in self.forms():
|
||||
if name is not None and name != form.name:
|
||||
continue
|
||||
if predicate is not None and not predicate(form):
|
||||
continue
|
||||
if nr:
|
||||
nr -= 1
|
||||
continue
|
||||
self.form = form
|
||||
break # success
|
||||
else:
|
||||
# failure
|
||||
description = []
|
||||
if name is not None: description.append("name '%s'" % name)
|
||||
if predicate is not None:
|
||||
description.append("predicate %s" % predicate)
|
||||
if orig_nr is not None: description.append("nr %d" % orig_nr)
|
||||
description = ", ".join(description)
|
||||
raise FormNotFoundError("no form matching "+description)
|
||||
|
||||
def click(self, *args, **kwds):
|
||||
"""See mechanize.HTMLForm.click for documentation."""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
request = self.form.click(*args, **kwds)
|
||||
return self._add_referer_header(request)
|
||||
|
||||
def submit(self, *args, **kwds):
|
||||
"""Submit current form.
|
||||
|
||||
Arguments are as for mechanize.HTMLForm.click().
|
||||
|
||||
Return value is same as for Browser.open().
|
||||
|
||||
"""
|
||||
return self.open(self.click(*args, **kwds))
|
||||
|
||||
def click_link(self, link=None, **kwds):
|
||||
"""Find a link and return a Request object for it.
|
||||
|
||||
Arguments are as for .find_link(), except that a link may be supplied
|
||||
as the first argument.
|
||||
|
||||
"""
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
if not link:
|
||||
link = self.find_link(**kwds)
|
||||
else:
|
||||
if kwds:
|
||||
raise ValueError(
|
||||
"either pass a Link, or keyword arguments, not both")
|
||||
request = self.request_class(link.absolute_url)
|
||||
return self._add_referer_header(request)
|
||||
|
||||
def follow_link(self, link=None, **kwds):
|
||||
"""Find a link and .open() it.
|
||||
|
||||
Arguments are as for .click_link().
|
||||
|
||||
Return value is same as for Browser.open().
|
||||
|
||||
"""
|
||||
return self.open(self.click_link(link, **kwds))
|
||||
|
||||
def find_link(self, **kwds):
|
||||
"""Find a link in current page.
|
||||
|
||||
Links are returned as mechanize.Link objects.
|
||||
|
||||
# Return third link that .search()-matches the regexp "python"
|
||||
# (by ".search()-matches", I mean that the regular expression method
|
||||
# .search() is used, rather than .match()).
|
||||
find_link(text_regex=re.compile("python"), nr=2)
|
||||
|
||||
# Return first http link in the current page that points to somewhere
|
||||
# on python.org whose link text (after tags have been removed) is
|
||||
# exactly "monty python".
|
||||
find_link(text="monty python",
|
||||
url_regex=re.compile("http.*python.org"))
|
||||
|
||||
# Return first link with exactly three HTML attributes.
|
||||
find_link(predicate=lambda link: len(link.attrs) == 3)
|
||||
|
||||
Links include anchors (<a>), image maps (<area>), and frames (<frame>,
|
||||
<iframe>).
|
||||
|
||||
All arguments must be passed by keyword, not position. Zero or more
|
||||
arguments may be supplied. In order to find a link, all arguments
|
||||
supplied must match.
|
||||
|
||||
If a matching link is not found, mechanize.LinkNotFoundError is raised.
|
||||
|
||||
text: link text between link tags: e.g. <a href="blah">this bit</a> (as
|
||||
returned by pullparser.get_compressed_text(), ie. without tags but
|
||||
with opening tags "textified" as per the pullparser docs) must compare
|
||||
equal to this argument, if supplied
|
||||
text_regex: link text between tag (as defined above) must match the
|
||||
regular expression object or regular expression string passed as this
|
||||
argument, if supplied
|
||||
name, name_regex: as for text and text_regex, but matched against the
|
||||
name HTML attribute of the link tag
|
||||
url, url_regex: as for text and text_regex, but matched against the
|
||||
URL of the link tag (note this matches against Link.url, which is a
|
||||
relative or absolute URL according to how it was written in the HTML)
|
||||
tag: element name of opening tag, e.g. "a"
|
||||
predicate: a function taking a Link object as its single argument,
|
||||
returning a boolean result, indicating whether the links
|
||||
nr: matches the nth link that matches all other criteria (default 0)
|
||||
|
||||
"""
|
||||
try:
|
||||
return self._filter_links(self._factory.links(), **kwds).next()
|
||||
except StopIteration:
|
||||
raise LinkNotFoundError()
|
||||
|
||||
def __getattr__(self, name):
|
||||
# pass through _form.HTMLForm methods and attributes
|
||||
form = self.__dict__.get("form")
|
||||
if form is None:
|
||||
raise AttributeError(
|
||||
"%s instance has no attribute %s (perhaps you forgot to "
|
||||
".select_form()?)" % (self.__class__, name))
|
||||
return getattr(form, name)
|
||||
|
||||
def _filter_links(self, links,
|
||||
text=None, text_regex=None,
|
||||
name=None, name_regex=None,
|
||||
url=None, url_regex=None,
|
||||
tag=None,
|
||||
predicate=None,
|
||||
nr=0
|
||||
):
|
||||
if not self.viewing_html():
|
||||
raise BrowserStateError("not viewing HTML")
|
||||
|
||||
orig_nr = nr
|
||||
|
||||
for link in links:
|
||||
if url is not None and url != link.url:
|
||||
continue
|
||||
if url_regex is not None and not re.search(url_regex, link.url):
|
||||
continue
|
||||
if (text is not None and
|
||||
(link.text is None or text != link.text)):
|
||||
continue
|
||||
if (text_regex is not None and
|
||||
(link.text is None or not re.search(text_regex, link.text))):
|
||||
continue
|
||||
if name is not None and name != dict(link.attrs).get("name"):
|
||||
continue
|
||||
if name_regex is not None:
|
||||
link_name = dict(link.attrs).get("name")
|
||||
if link_name is None or not re.search(name_regex, link_name):
|
||||
continue
|
||||
if tag is not None and tag != link.tag:
|
||||
continue
|
||||
if predicate is not None and not predicate(link):
|
||||
continue
|
||||
if nr:
|
||||
nr -= 1
|
||||
continue
|
||||
yield link
|
||||
nr = orig_nr
|
||||
@@ -1,161 +0,0 @@
|
||||
"""Mozilla / Netscape cookie loading / saving.
|
||||
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
||||
Copyright 1997-1999 Gisle Aas (original libwww-perl code)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import re, time, logging
|
||||
|
||||
from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
|
||||
MISSING_FILENAME_TEXT, LoadError
|
||||
debug = logging.getLogger("ClientCookie").debug
|
||||
|
||||
|
||||
class MozillaCookieJar(FileCookieJar):
|
||||
"""
|
||||
|
||||
WARNING: you may want to backup your browser's cookies file if you use
|
||||
this class to save cookies. I *think* it works, but there have been
|
||||
bugs in the past!
|
||||
|
||||
This class differs from CookieJar only in the format it uses to save and
|
||||
load cookies to and from a file. This class uses the Mozilla/Netscape
|
||||
`cookies.txt' format. lynx uses this file format, too.
|
||||
|
||||
Don't expect cookies saved while the browser is running to be noticed by
|
||||
the browser (in fact, Mozilla on unix will overwrite your saved cookies if
|
||||
you change them on disk while it's running; on Windows, you probably can't
|
||||
save at all while the browser is running).
|
||||
|
||||
Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
|
||||
Netscape cookies on saving.
|
||||
|
||||
In particular, the cookie version and port number information is lost,
|
||||
together with information about whether or not Path, Port and Discard were
|
||||
specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
|
||||
domain as set in the HTTP header started with a dot (yes, I'm aware some
|
||||
domains in Netscape files start with a dot and some don't -- trust me, you
|
||||
really don't want to know any more about this).
|
||||
|
||||
Note that though Mozilla and Netscape use the same format, they use
|
||||
slightly different headers. The class saves cookies using the Netscape
|
||||
header by default (Mozilla can cope with that).
|
||||
|
||||
"""
|
||||
magic_re = "#( Netscape)? HTTP Cookie File"
|
||||
header = """\
|
||||
# Netscape HTTP Cookie File
|
||||
# http://www.netscape.com/newsref/std/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
"""
|
||||
|
||||
def _really_load(self, f, filename, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
|
||||
magic = f.readline()
|
||||
if not re.search(self.magic_re, magic):
|
||||
f.close()
|
||||
raise LoadError(
|
||||
"%s does not look like a Netscape format cookies file" %
|
||||
filename)
|
||||
|
||||
try:
|
||||
while 1:
|
||||
line = f.readline()
|
||||
if line == "": break
|
||||
|
||||
# last field may be absent, so keep any trailing tab
|
||||
if line.endswith("\n"): line = line[:-1]
|
||||
|
||||
# skip comments and blank lines XXX what is $ for?
|
||||
if (line.strip().startswith("#") or
|
||||
line.strip().startswith("$") or
|
||||
line.strip() == ""):
|
||||
continue
|
||||
|
||||
domain, domain_specified, path, secure, expires, name, value = \
|
||||
line.split("\t", 6)
|
||||
secure = (secure == "TRUE")
|
||||
domain_specified = (domain_specified == "TRUE")
|
||||
if name == "":
|
||||
name = value
|
||||
value = None
|
||||
|
||||
initial_dot = domain.startswith(".")
|
||||
if domain_specified != initial_dot:
|
||||
raise LoadError("domain and domain specified flag don't "
|
||||
"match in %s: %s" % (filename, line))
|
||||
|
||||
discard = False
|
||||
if expires == "":
|
||||
expires = None
|
||||
discard = True
|
||||
|
||||
# assume path_specified is false
|
||||
c = Cookie(0, name, value,
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
path, False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
{})
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
self.set_cookie(c)
|
||||
|
||||
except:
|
||||
reraise_unmasked_exceptions((IOError, LoadError))
|
||||
raise LoadError("invalid Netscape format file %s: %s" %
|
||||
(filename, line))
|
||||
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
f = open(filename, "w")
|
||||
try:
|
||||
debug("Saving Netscape cookies.txt file")
|
||||
f.write(self.header)
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if not ignore_discard and cookie.discard:
|
||||
debug(" Not saving %s: marked for discard", cookie.name)
|
||||
continue
|
||||
if not ignore_expires and cookie.is_expired(now):
|
||||
debug(" Not saving %s: expired", cookie.name)
|
||||
continue
|
||||
if cookie.secure: secure = "TRUE"
|
||||
else: secure = "FALSE"
|
||||
if cookie.domain.startswith("."): initial_dot = "TRUE"
|
||||
else: initial_dot = "FALSE"
|
||||
if cookie.expires is not None:
|
||||
expires = str(cookie.expires)
|
||||
else:
|
||||
expires = ""
|
||||
if cookie.value is None:
|
||||
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||
# with no name, whereas cookielib regards it as a
|
||||
# cookie with no value.
|
||||
name = ""
|
||||
value = cookie.name
|
||||
else:
|
||||
name = cookie.name
|
||||
value = cookie.value
|
||||
f.write(
|
||||
"\t".join([cookie.domain, initial_dot, cookie.path,
|
||||
secure, expires, name, value])+
|
||||
"\n")
|
||||
finally:
|
||||
f.close()
|
||||
@@ -1,388 +0,0 @@
|
||||
"""Microsoft Internet Explorer cookie loading on Windows.
|
||||
|
||||
Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
# XXX names and comments are not great here
|
||||
|
||||
import os, re, time, struct, logging
|
||||
if os.name == "nt":
|
||||
import _winreg
|
||||
|
||||
from _clientcookie import FileCookieJar, CookieJar, Cookie, \
|
||||
MISSING_FILENAME_TEXT, LoadError
|
||||
|
||||
debug = logging.getLogger("mechanize").debug
|
||||
|
||||
|
||||
def regload(path, leaf):
|
||||
key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
|
||||
_winreg.KEY_ALL_ACCESS)
|
||||
try:
|
||||
value = _winreg.QueryValueEx(key, leaf)[0]
|
||||
except WindowsError:
|
||||
value = None
|
||||
return value
|
||||
|
||||
WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
|
||||
|
||||
def epoch_time_offset_from_win32_filetime(filetime):
|
||||
"""Convert from win32 filetime to seconds-since-epoch value.
|
||||
|
||||
MSIE stores create and expire times as Win32 FILETIME, which is 64
|
||||
bits of 100 nanosecond intervals since Jan 01 1601.
|
||||
|
||||
mechanize expects time in 32-bit value expressed in seconds since the
|
||||
epoch (Jan 01 1970).
|
||||
|
||||
"""
|
||||
if filetime < WIN32_EPOCH:
|
||||
raise ValueError("filetime (%d) is before epoch (%d)" %
|
||||
(filetime, WIN32_EPOCH))
|
||||
|
||||
return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
|
||||
|
||||
def binary_to_char(c): return "%02X" % ord(c)
|
||||
def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
|
||||
|
||||
class MSIEBase:
|
||||
magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
|
||||
padding = "\x0d\xf0\xad\x0b"
|
||||
|
||||
msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
|
||||
cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
|
||||
"(.+\@[\x21-\xFF]+\.txt)")
|
||||
|
||||
# path under HKEY_CURRENT_USER from which to get location of index.dat
|
||||
reg_path = r"software\microsoft\windows" \
|
||||
r"\currentversion\explorer\shell folders"
|
||||
reg_key = "Cookies"
|
||||
|
||||
def __init__(self):
|
||||
self._delayload_domains = {}
|
||||
|
||||
def _delayload_domain(self, domain):
|
||||
# if necessary, lazily load cookies for this domain
|
||||
delayload_info = self._delayload_domains.get(domain)
|
||||
if delayload_info is not None:
|
||||
cookie_file, ignore_discard, ignore_expires = delayload_info
|
||||
try:
|
||||
self.load_cookie_data(cookie_file,
|
||||
ignore_discard, ignore_expires)
|
||||
except (LoadError, IOError):
|
||||
debug("error reading cookie file, skipping: %s", cookie_file)
|
||||
else:
|
||||
del self._delayload_domains[domain]
|
||||
|
||||
def _load_cookies_from_file(self, filename):
|
||||
debug("Loading MSIE cookies file: %s", filename)
|
||||
cookies = []
|
||||
|
||||
cookies_fh = open(filename)
|
||||
|
||||
try:
|
||||
while 1:
|
||||
key = cookies_fh.readline()
|
||||
if key == "": break
|
||||
|
||||
rl = cookies_fh.readline
|
||||
def getlong(rl=rl): return long(rl().rstrip())
|
||||
def getstr(rl=rl): return rl().rstrip()
|
||||
|
||||
key = key.rstrip()
|
||||
value = getstr()
|
||||
domain_path = getstr()
|
||||
flags = getlong() # 0x2000 bit is for secure I think
|
||||
lo_expire = getlong()
|
||||
hi_expire = getlong()
|
||||
lo_create = getlong()
|
||||
hi_create = getlong()
|
||||
sep = getstr()
|
||||
|
||||
if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
|
||||
hi_create, lo_create, sep) or (sep != "*"):
|
||||
break
|
||||
|
||||
m = self.msie_domain_re.search(domain_path)
|
||||
if m:
|
||||
domain = m.group(1)
|
||||
path = m.group(2)
|
||||
|
||||
cookies.append({"KEY": key, "VALUE": value,
|
||||
"DOMAIN": domain, "PATH": path,
|
||||
"FLAGS": flags, "HIXP": hi_expire,
|
||||
"LOXP": lo_expire, "HICREATE": hi_create,
|
||||
"LOCREATE": lo_create})
|
||||
finally:
|
||||
cookies_fh.close()
|
||||
|
||||
return cookies
|
||||
|
||||
def load_cookie_data(self, filename,
|
||||
ignore_discard=False, ignore_expires=False):
|
||||
"""Load cookies from file containing actual cookie data.
|
||||
|
||||
Old cookies are kept unless overwritten by newly loaded ones.
|
||||
|
||||
You should not call this method if the delayload attribute is set.
|
||||
|
||||
I think each of these files contain all cookies for one user, domain,
|
||||
and path.
|
||||
|
||||
filename: file containing cookies -- usually found in a file like
|
||||
C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
|
||||
|
||||
"""
|
||||
now = int(time.time())
|
||||
|
||||
cookie_data = self._load_cookies_from_file(filename)
|
||||
|
||||
for cookie in cookie_data:
|
||||
flags = cookie["FLAGS"]
|
||||
secure = ((flags & 0x2000) != 0)
|
||||
filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
|
||||
expires = epoch_time_offset_from_win32_filetime(filetime)
|
||||
if expires < now:
|
||||
discard = True
|
||||
else:
|
||||
discard = False
|
||||
domain = cookie["DOMAIN"]
|
||||
initial_dot = domain.startswith(".")
|
||||
if initial_dot:
|
||||
domain_specified = True
|
||||
else:
|
||||
# MSIE 5 does not record whether the domain cookie-attribute
|
||||
# was specified.
|
||||
# Assuming it wasn't is conservative, because with strict
|
||||
# domain matching this will match less frequently; with regular
|
||||
# Netscape tail-matching, this will match at exactly the same
|
||||
# times that domain_specified = True would. It also means we
|
||||
# don't have to prepend a dot to achieve consistency with our
|
||||
# own & Mozilla's domain-munging scheme.
|
||||
domain_specified = False
|
||||
|
||||
# assume path_specified is false
|
||||
# XXX is there other stuff in here? -- e.g. comment, commentURL?
|
||||
c = Cookie(0,
|
||||
cookie["KEY"], cookie["VALUE"],
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
cookie["PATH"], False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
{"flags": flags})
|
||||
if not ignore_discard and c.discard:
|
||||
continue
|
||||
if not ignore_expires and c.is_expired(now):
|
||||
continue
|
||||
CookieJar.set_cookie(self, c)
|
||||
|
||||
def load_from_registry(self, ignore_discard=False, ignore_expires=False,
|
||||
username=None):
|
||||
"""
|
||||
username: only required on win9x
|
||||
|
||||
"""
|
||||
cookies_dir = regload(self.reg_path, self.reg_key)
|
||||
filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
|
||||
self.load(filename, ignore_discard, ignore_expires, username)
|
||||
|
||||
def _really_load(self, index, filename, ignore_discard, ignore_expires,
|
||||
username):
|
||||
now = int(time.time())
|
||||
|
||||
if username is None:
|
||||
username = os.environ['USERNAME'].lower()
|
||||
|
||||
cookie_dir = os.path.dirname(filename)
|
||||
|
||||
data = index.read(256)
|
||||
if len(data) != 256:
|
||||
raise LoadError("%s file is too short" % filename)
|
||||
|
||||
# Cookies' index.dat file starts with 32 bytes of signature
|
||||
# followed by an offset to the first record, stored as a little-
|
||||
# endian DWORD.
|
||||
sig, size, data = data[:32], data[32:36], data[36:]
|
||||
size = struct.unpack("<L", size)[0]
|
||||
|
||||
# check that sig is valid
|
||||
if not self.magic_re.match(sig) or size != 0x4000:
|
||||
raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
|
||||
(str(filename), sig, size))
|
||||
|
||||
# skip to start of first record
|
||||
index.seek(size, 0)
|
||||
|
||||
sector = 128 # size of sector in bytes
|
||||
|
||||
while 1:
|
||||
data = ""
|
||||
|
||||
# Cookies are usually in two contiguous sectors, so read in two
|
||||
# sectors and adjust if not a Cookie.
|
||||
to_read = 2 * sector
|
||||
d = index.read(to_read)
|
||||
if len(d) != to_read:
|
||||
break
|
||||
data = data + d
|
||||
|
||||
# Each record starts with a 4-byte signature and a count
|
||||
# (little-endian DWORD) of sectors for the record.
|
||||
sig, size, data = data[:4], data[4:8], data[8:]
|
||||
size = struct.unpack("<L", size)[0]
|
||||
|
||||
to_read = (size - 2) * sector
|
||||
|
||||
## from urllib import quote
|
||||
## print "data", quote(data)
|
||||
## print "sig", quote(sig)
|
||||
## print "size in sectors", size
|
||||
## print "size in bytes", size*sector
|
||||
## print "size in units of 16 bytes", (size*sector) / 16
|
||||
## print "size to read in bytes", to_read
|
||||
## print
|
||||
|
||||
if sig != "URL ":
|
||||
assert sig in ("HASH", "LEAK", \
|
||||
self.padding, "\x00\x00\x00\x00"), \
|
||||
"unrecognized MSIE index.dat record: %s" % \
|
||||
binary_to_str(sig)
|
||||
if sig == "\x00\x00\x00\x00":
|
||||
# assume we've got all the cookies, and stop
|
||||
break
|
||||
if sig == self.padding:
|
||||
continue
|
||||
# skip the rest of this record
|
||||
assert to_read >= 0
|
||||
if size != 2:
|
||||
assert to_read != 0
|
||||
index.seek(to_read, 1)
|
||||
continue
|
||||
|
||||
# read in rest of record if necessary
|
||||
if size > 2:
|
||||
more_data = index.read(to_read)
|
||||
if len(more_data) != to_read: break
|
||||
data = data + more_data
|
||||
|
||||
cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
|
||||
"(%s\@[\x21-\xFF]+\.txt)" % username)
|
||||
m = re.search(cookie_re, data, re.I)
|
||||
if m:
|
||||
cookie_file = os.path.join(cookie_dir, m.group(2))
|
||||
if not self.delayload:
|
||||
try:
|
||||
self.load_cookie_data(cookie_file,
|
||||
ignore_discard, ignore_expires)
|
||||
except (LoadError, IOError):
|
||||
debug("error reading cookie file, skipping: %s",
|
||||
cookie_file)
|
||||
else:
|
||||
domain = m.group(1)
|
||||
i = domain.find("/")
|
||||
if i != -1:
|
||||
domain = domain[:i]
|
||||
|
||||
self._delayload_domains[domain] = (
|
||||
cookie_file, ignore_discard, ignore_expires)
|
||||
|
||||
|
||||
class MSIECookieJar(MSIEBase, FileCookieJar):
|
||||
"""FileCookieJar that reads from the Windows MSIE cookies database.
|
||||
|
||||
MSIECookieJar can read the cookie files of Microsoft Internet Explorer
|
||||
(MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
|
||||
Windows 98. Other configurations may also work, but are untested. Saving
|
||||
cookies in MSIE format is NOT supported. If you save cookies, they'll be
|
||||
in the usual Set-Cookie3 format, which you can read back in using an
|
||||
instance of the plain old CookieJar class. Don't save using the same
|
||||
filename that you loaded cookies from, because you may succeed in
|
||||
clobbering your MSIE cookies index file!
|
||||
|
||||
You should be able to have LWP share Internet Explorer's cookies like
|
||||
this (note you need to supply a username to load_from_registry if you're on
|
||||
Windows 9x or Windows ME):
|
||||
|
||||
cj = MSIECookieJar(delayload=1)
|
||||
# find cookies index file in registry and load cookies from it
|
||||
cj.load_from_registry()
|
||||
opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
|
||||
response = opener.open("http://example.com/")
|
||||
|
||||
Iterating over a delayloaded MSIECookieJar instance will not cause any
|
||||
cookies to be read from disk. To force reading of all cookies from disk,
|
||||
call read_all_cookies. Note that the following methods iterate over self:
|
||||
clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
|
||||
and as_string.
|
||||
|
||||
Additional methods:
|
||||
|
||||
load_from_registry(ignore_discard=False, ignore_expires=False,
|
||||
username=None)
|
||||
load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
|
||||
read_all_cookies()
|
||||
|
||||
"""
|
||||
def __init__(self, filename=None, delayload=False, policy=None):
|
||||
MSIEBase.__init__(self)
|
||||
FileCookieJar.__init__(self, filename, delayload, policy)
|
||||
|
||||
def set_cookie(self, cookie):
|
||||
if self.delayload:
|
||||
self._delayload_domain(cookie.domain)
|
||||
CookieJar.set_cookie(self, cookie)
|
||||
|
||||
def _cookies_for_request(self, request):
|
||||
"""Return a list of cookies to be returned to server."""
|
||||
domains = self._cookies.copy()
|
||||
domains.update(self._delayload_domains)
|
||||
domains = domains.keys()
|
||||
|
||||
cookies = []
|
||||
for domain in domains:
|
||||
cookies.extend(self._cookies_for_domain(domain, request))
|
||||
return cookies
|
||||
|
||||
def _cookies_for_domain(self, domain, request):
|
||||
if not self._policy.domain_return_ok(domain, request):
|
||||
return []
|
||||
debug("Checking %s for cookies to return", domain)
|
||||
if self.delayload:
|
||||
self._delayload_domain(domain)
|
||||
return CookieJar._cookies_for_domain(self, domain, request)
|
||||
|
||||
def read_all_cookies(self):
|
||||
"""Eagerly read in all cookies."""
|
||||
if self.delayload:
|
||||
for domain in self._delayload_domains.keys():
|
||||
self._delayload_domain(domain)
|
||||
|
||||
def load(self, filename, ignore_discard=False, ignore_expires=False,
|
||||
username=None):
|
||||
"""Load cookies from an MSIE 'index.dat' cookies index file.
|
||||
|
||||
filename: full path to cookie index file
|
||||
username: only required on win9x
|
||||
|
||||
"""
|
||||
if filename is None:
|
||||
if self.filename is not None: filename = self.filename
|
||||
else: raise ValueError(MISSING_FILENAME_TEXT)
|
||||
|
||||
index = open(filename, "rb")
|
||||
|
||||
try:
|
||||
self._really_load(index, filename, ignore_discard, ignore_expires,
|
||||
username)
|
||||
finally:
|
||||
index.close()
|
||||
@@ -1,442 +0,0 @@
|
||||
"""URL opener.
|
||||
|
||||
Copyright 2004-2006 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import os, urllib2, bisect, httplib, types, tempfile
|
||||
try:
|
||||
import threading as _threading
|
||||
except ImportError:
|
||||
import dummy_threading as _threading
|
||||
try:
|
||||
set
|
||||
except NameError:
|
||||
import sets
|
||||
set = sets.Set
|
||||
|
||||
from _request import Request
|
||||
import _response
|
||||
import _rfc3986
|
||||
import _sockettimeout
|
||||
import _urllib2_fork
|
||||
from _util import isstringlike
|
||||
|
||||
open_file = open
|
||||
|
||||
|
||||
class ContentTooShortError(urllib2.URLError):
|
||||
def __init__(self, reason, result):
|
||||
urllib2.URLError.__init__(self, reason)
|
||||
self.result = result
|
||||
|
||||
|
||||
def set_request_attr(req, name, value, default):
|
||||
try:
|
||||
getattr(req, name)
|
||||
except AttributeError:
|
||||
setattr(req, name, default)
|
||||
if value is not default:
|
||||
setattr(req, name, value)
|
||||
|
||||
|
||||
class OpenerDirector(_urllib2_fork.OpenerDirector):
|
||||
def __init__(self):
|
||||
_urllib2_fork.OpenerDirector.__init__(self)
|
||||
# really none of these are (sanely) public -- the lack of initial
|
||||
# underscore on some is just due to following urllib2
|
||||
self.process_response = {}
|
||||
self.process_request = {}
|
||||
self._any_request = {}
|
||||
self._any_response = {}
|
||||
self._handler_index_valid = True
|
||||
self._tempfiles = []
|
||||
|
||||
def add_handler(self, handler):
|
||||
if not hasattr(handler, "add_parent"):
|
||||
raise TypeError("expected BaseHandler instance, got %r" %
|
||||
type(handler))
|
||||
|
||||
if handler in self.handlers:
|
||||
return
|
||||
# XXX why does self.handlers need to be sorted?
|
||||
bisect.insort(self.handlers, handler)
|
||||
handler.add_parent(self)
|
||||
self._handler_index_valid = False
|
||||
|
||||
def _maybe_reindex_handlers(self):
|
||||
if self._handler_index_valid:
|
||||
return
|
||||
|
||||
handle_error = {}
|
||||
handle_open = {}
|
||||
process_request = {}
|
||||
process_response = {}
|
||||
any_request = set()
|
||||
any_response = set()
|
||||
unwanted = []
|
||||
|
||||
for handler in self.handlers:
|
||||
added = False
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
if meth == "any_request":
|
||||
any_request.add(handler)
|
||||
added = True
|
||||
continue
|
||||
elif meth == "any_response":
|
||||
any_response.add(handler)
|
||||
added = True
|
||||
continue
|
||||
|
||||
ii = meth.find("_")
|
||||
scheme = meth[:ii]
|
||||
condition = meth[ii+1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
jj = meth[ii+1:].find("_") + ii + 1
|
||||
kind = meth[jj+1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = handle_error.setdefault(scheme, {})
|
||||
elif condition == "open":
|
||||
kind = scheme
|
||||
lookup = handle_open
|
||||
elif condition == "request":
|
||||
kind = scheme
|
||||
lookup = process_request
|
||||
elif condition == "response":
|
||||
kind = scheme
|
||||
lookup = process_response
|
||||
else:
|
||||
continue
|
||||
|
||||
lookup.setdefault(kind, set()).add(handler)
|
||||
added = True
|
||||
|
||||
if not added:
|
||||
unwanted.append(handler)
|
||||
|
||||
for handler in unwanted:
|
||||
self.handlers.remove(handler)
|
||||
|
||||
# sort indexed methods
|
||||
# XXX could be cleaned up
|
||||
for lookup in [process_request, process_response]:
|
||||
for scheme, handlers in lookup.iteritems():
|
||||
lookup[scheme] = handlers
|
||||
for scheme, lookup in handle_error.iteritems():
|
||||
for code, handlers in lookup.iteritems():
|
||||
handlers = list(handlers)
|
||||
handlers.sort()
|
||||
lookup[code] = handlers
|
||||
for scheme, handlers in handle_open.iteritems():
|
||||
handlers = list(handlers)
|
||||
handlers.sort()
|
||||
handle_open[scheme] = handlers
|
||||
|
||||
# cache the indexes
|
||||
self.handle_error = handle_error
|
||||
self.handle_open = handle_open
|
||||
self.process_request = process_request
|
||||
self.process_response = process_response
|
||||
self._any_request = any_request
|
||||
self._any_response = any_response
|
||||
|
||||
def _request(self, url_or_req, data, visit,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
if isstringlike(url_or_req):
|
||||
req = Request(url_or_req, data, visit=visit, timeout=timeout)
|
||||
else:
|
||||
# already a mechanize.Request instance
|
||||
req = url_or_req
|
||||
if data is not None:
|
||||
req.add_data(data)
|
||||
# XXX yuck
|
||||
set_request_attr(req, "visit", visit, None)
|
||||
set_request_attr(req, "timeout", timeout,
|
||||
_sockettimeout._GLOBAL_DEFAULT_TIMEOUT)
|
||||
return req
|
||||
|
||||
def open(self, fullurl, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
req = self._request(fullurl, data, None, timeout)
|
||||
req_scheme = req.get_type()
|
||||
|
||||
self._maybe_reindex_handlers()
|
||||
|
||||
# pre-process request
|
||||
# XXX should we allow a Processor to change the URL scheme
|
||||
# of the request?
|
||||
request_processors = set(self.process_request.get(req_scheme, []))
|
||||
request_processors.update(self._any_request)
|
||||
request_processors = list(request_processors)
|
||||
request_processors.sort()
|
||||
for processor in request_processors:
|
||||
for meth_name in ["any_request", req_scheme+"_request"]:
|
||||
meth = getattr(processor, meth_name, None)
|
||||
if meth:
|
||||
req = meth(req)
|
||||
|
||||
# In Python >= 2.4, .open() supports processors already, so we must
|
||||
# call ._open() instead.
|
||||
urlopen = _urllib2_fork.OpenerDirector._open
|
||||
response = urlopen(self, req, data)
|
||||
|
||||
# post-process response
|
||||
response_processors = set(self.process_response.get(req_scheme, []))
|
||||
response_processors.update(self._any_response)
|
||||
response_processors = list(response_processors)
|
||||
response_processors.sort()
|
||||
for processor in response_processors:
|
||||
for meth_name in ["any_response", req_scheme+"_response"]:
|
||||
meth = getattr(processor, meth_name, None)
|
||||
if meth:
|
||||
response = meth(req, response)
|
||||
|
||||
return response
|
||||
|
||||
def error(self, proto, *args):
|
||||
if proto in ['http', 'https']:
|
||||
# XXX http[s] protocols are special-cased
|
||||
dict = self.handle_error['http'] # https is not different than http
|
||||
proto = args[2] # YUCK!
|
||||
meth_name = 'http_error_%s' % proto
|
||||
http_err = 1
|
||||
orig_args = args
|
||||
else:
|
||||
dict = self.handle_error
|
||||
meth_name = proto + '_error'
|
||||
http_err = 0
|
||||
args = (dict, proto, meth_name) + args
|
||||
result = apply(self._call_chain, args)
|
||||
if result:
|
||||
return result
|
||||
|
||||
if http_err:
|
||||
args = (dict, 'default', 'http_error_default') + orig_args
|
||||
return apply(self._call_chain, args)
|
||||
|
||||
BLOCK_SIZE = 1024*8
|
||||
def retrieve(self, fullurl, filename=None, reporthook=None, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT,
|
||||
open=open_file):
|
||||
"""Returns (filename, headers).
|
||||
|
||||
For remote objects, the default filename will refer to a temporary
|
||||
file. Temporary files are removed when the OpenerDirector.close()
|
||||
method is called.
|
||||
|
||||
For file: URLs, at present the returned filename is None. This may
|
||||
change in future.
|
||||
|
||||
If the actual number of bytes read is less than indicated by the
|
||||
Content-Length header, raises ContentTooShortError (a URLError
|
||||
subclass). The exception's .result attribute contains the (filename,
|
||||
headers) that would have been returned.
|
||||
|
||||
"""
|
||||
req = self._request(fullurl, data, False, timeout)
|
||||
scheme = req.get_type()
|
||||
fp = self.open(req)
|
||||
try:
|
||||
headers = fp.info()
|
||||
if filename is None and scheme == 'file':
|
||||
# XXX req.get_selector() seems broken here, return None,
|
||||
# pending sanity :-/
|
||||
return None, headers
|
||||
#return urllib.url2pathname(req.get_selector()), headers
|
||||
if filename:
|
||||
tfp = open(filename, 'wb')
|
||||
else:
|
||||
path = _rfc3986.urlsplit(req.get_full_url())[2]
|
||||
suffix = os.path.splitext(path)[1]
|
||||
fd, filename = tempfile.mkstemp(suffix)
|
||||
self._tempfiles.append(filename)
|
||||
tfp = os.fdopen(fd, 'wb')
|
||||
try:
|
||||
result = filename, headers
|
||||
bs = self.BLOCK_SIZE
|
||||
size = -1
|
||||
read = 0
|
||||
blocknum = 0
|
||||
if reporthook:
|
||||
if "content-length" in headers:
|
||||
size = int(headers["Content-Length"])
|
||||
reporthook(blocknum, bs, size)
|
||||
while 1:
|
||||
block = fp.read(bs)
|
||||
if block == "":
|
||||
break
|
||||
read += len(block)
|
||||
tfp.write(block)
|
||||
blocknum += 1
|
||||
if reporthook:
|
||||
reporthook(blocknum, bs, size)
|
||||
finally:
|
||||
tfp.close()
|
||||
finally:
|
||||
fp.close()
|
||||
|
||||
# raise exception if actual size does not match content-length header
|
||||
if size >= 0 and read < size:
|
||||
raise ContentTooShortError(
|
||||
"retrieval incomplete: "
|
||||
"got only %i out of %i bytes" % (read, size),
|
||||
result
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
_urllib2_fork.OpenerDirector.close(self)
|
||||
|
||||
# make it very obvious this object is no longer supposed to be used
|
||||
self.open = self.error = self.retrieve = self.add_handler = None
|
||||
|
||||
if self._tempfiles:
|
||||
for filename in self._tempfiles:
|
||||
try:
|
||||
os.unlink(filename)
|
||||
except OSError:
|
||||
pass
|
||||
del self._tempfiles[:]
|
||||
|
||||
|
||||
def wrapped_open(urlopen, process_response_object, fullurl, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
success = True
|
||||
try:
|
||||
response = urlopen(fullurl, data, timeout)
|
||||
except urllib2.HTTPError, error:
|
||||
success = False
|
||||
if error.fp is None: # not a response
|
||||
raise
|
||||
response = error
|
||||
|
||||
if response is not None:
|
||||
response = process_response_object(response)
|
||||
|
||||
if not success:
|
||||
raise response
|
||||
return response
|
||||
|
||||
class ResponseProcessingOpener(OpenerDirector):
|
||||
|
||||
def open(self, fullurl, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
def bound_open(fullurl, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
return OpenerDirector.open(self, fullurl, data, timeout)
|
||||
return wrapped_open(
|
||||
bound_open, self.process_response_object, fullurl, data, timeout)
|
||||
|
||||
def process_response_object(self, response):
|
||||
return response
|
||||
|
||||
|
||||
class SeekableResponseOpener(ResponseProcessingOpener):
|
||||
def process_response_object(self, response):
|
||||
return _response.seek_wrapped_response(response)
|
||||
|
||||
|
||||
def isclass(obj):
|
||||
return isinstance(obj, (types.ClassType, type))
|
||||
|
||||
|
||||
class OpenerFactory:
|
||||
"""This class's interface is quite likely to change."""
|
||||
|
||||
default_classes = [
|
||||
# handlers
|
||||
_urllib2_fork.ProxyHandler,
|
||||
_urllib2_fork.UnknownHandler,
|
||||
_urllib2_fork.HTTPHandler,
|
||||
_urllib2_fork.HTTPDefaultErrorHandler,
|
||||
_urllib2_fork.HTTPRedirectHandler,
|
||||
_urllib2_fork.FTPHandler,
|
||||
_urllib2_fork.FileHandler,
|
||||
# processors
|
||||
_urllib2_fork.HTTPCookieProcessor,
|
||||
_urllib2_fork.HTTPErrorProcessor,
|
||||
]
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
default_classes.append(_urllib2_fork.HTTPSHandler)
|
||||
handlers = []
|
||||
replacement_handlers = []
|
||||
|
||||
def __init__(self, klass=OpenerDirector):
|
||||
self.klass = klass
|
||||
|
||||
def build_opener(self, *handlers):
|
||||
"""Create an opener object from a list of handlers and processors.
|
||||
|
||||
The opener will use several default handlers and processors, including
|
||||
support for HTTP and FTP.
|
||||
|
||||
If any of the handlers passed as arguments are subclasses of the
|
||||
default handlers, the default handlers will not be used.
|
||||
|
||||
"""
|
||||
opener = self.klass()
|
||||
default_classes = list(self.default_classes)
|
||||
skip = set()
|
||||
for klass in default_classes:
|
||||
for check in handlers:
|
||||
if isclass(check):
|
||||
if issubclass(check, klass):
|
||||
skip.add(klass)
|
||||
elif isinstance(check, klass):
|
||||
skip.add(klass)
|
||||
for klass in skip:
|
||||
default_classes.remove(klass)
|
||||
|
||||
for klass in default_classes:
|
||||
opener.add_handler(klass())
|
||||
for h in handlers:
|
||||
if isclass(h):
|
||||
h = h()
|
||||
opener.add_handler(h)
|
||||
|
||||
return opener
|
||||
|
||||
|
||||
build_opener = OpenerFactory().build_opener
|
||||
|
||||
_opener = None
|
||||
urlopen_lock = _threading.Lock()
|
||||
def urlopen(url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
global _opener
|
||||
if _opener is None:
|
||||
urlopen_lock.acquire()
|
||||
try:
|
||||
if _opener is None:
|
||||
_opener = build_opener()
|
||||
finally:
|
||||
urlopen_lock.release()
|
||||
return _opener.open(url, data, timeout)
|
||||
|
||||
def urlretrieve(url, filename=None, reporthook=None, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
global _opener
|
||||
if _opener is None:
|
||||
urlopen_lock.acquire()
|
||||
try:
|
||||
if _opener is None:
|
||||
_opener = build_opener()
|
||||
finally:
|
||||
urlopen_lock.release()
|
||||
return _opener.retrieve(url, filename, reporthook, data, timeout)
|
||||
|
||||
def install_opener(opener):
|
||||
global _opener
|
||||
_opener = opener
|
||||
@@ -1,391 +0,0 @@
|
||||
"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser.
|
||||
|
||||
Examples
|
||||
|
||||
This program extracts all links from a document. It will print one
|
||||
line for each link, containing the URL and the textual description
|
||||
between the <A>...</A> tags:
|
||||
|
||||
import pullparser, sys
|
||||
f = file(sys.argv[1])
|
||||
p = pullparser.PullParser(f)
|
||||
for token in p.tags("a"):
|
||||
if token.type == "endtag": continue
|
||||
url = dict(token.attrs).get("href", "-")
|
||||
text = p.get_compressed_text(endat=("endtag", "a"))
|
||||
print "%s\t%s" % (url, text)
|
||||
|
||||
This program extracts the <TITLE> from the document:
|
||||
|
||||
import pullparser, sys
|
||||
f = file(sys.argv[1])
|
||||
p = pullparser.PullParser(f)
|
||||
if p.get_tag("title"):
|
||||
title = p.get_compressed_text()
|
||||
print "Title: %s" % title
|
||||
|
||||
|
||||
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
|
||||
Copyright 1998-2001 Gisle Aas (original libwww-perl code)
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses.
|
||||
|
||||
"""
|
||||
|
||||
import re, htmlentitydefs
|
||||
import _sgmllib_copy as sgmllib
|
||||
import HTMLParser
|
||||
from xml.sax import saxutils
|
||||
|
||||
from _html import unescape, unescape_charref
|
||||
|
||||
|
||||
class NoMoreTokensError(Exception): pass
|
||||
|
||||
class Token:
|
||||
"""Represents an HTML tag, declaration, processing instruction etc.
|
||||
|
||||
Behaves as both a tuple-like object (ie. iterable) and has attributes
|
||||
.type, .data and .attrs.
|
||||
|
||||
>>> t = Token("starttag", "a", [("href", "http://www.python.org/")])
|
||||
>>> t == ("starttag", "a", [("href", "http://www.python.org/")])
|
||||
True
|
||||
>>> (t.type, t.data) == ("starttag", "a")
|
||||
True
|
||||
>>> t.attrs == [("href", "http://www.python.org/")]
|
||||
True
|
||||
|
||||
Public attributes
|
||||
|
||||
type: one of "starttag", "endtag", "startendtag", "charref", "entityref",
|
||||
"data", "comment", "decl", "pi", after the corresponding methods of
|
||||
HTMLParser.HTMLParser
|
||||
data: For a tag, the tag name; otherwise, the relevant data carried by the
|
||||
tag, as a string
|
||||
attrs: list of (name, value) pairs representing HTML attributes
|
||||
(or None if token does not represent an opening tag)
|
||||
|
||||
"""
|
||||
def __init__(self, type, data, attrs=None):
|
||||
self.type = type
|
||||
self.data = data
|
||||
self.attrs = attrs
|
||||
def __iter__(self):
|
||||
return iter((self.type, self.data, self.attrs))
|
||||
def __eq__(self, other):
|
||||
type, data, attrs = other
|
||||
if (self.type == type and
|
||||
self.data == data and
|
||||
self.attrs == attrs):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
def __ne__(self, other): return not self.__eq__(other)
|
||||
def __repr__(self):
|
||||
args = ", ".join(map(repr, [self.type, self.data, self.attrs]))
|
||||
return self.__class__.__name__+"(%s)" % args
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
>>> print Token("starttag", "br")
|
||||
<br>
|
||||
>>> print Token("starttag", "a",
|
||||
... [("href", "http://www.python.org/"), ("alt", '"foo"')])
|
||||
<a href="http://www.python.org/" alt='"foo"'>
|
||||
>>> print Token("startendtag", "br")
|
||||
<br />
|
||||
>>> print Token("startendtag", "br", [("spam", "eggs")])
|
||||
<br spam="eggs" />
|
||||
>>> print Token("endtag", "p")
|
||||
</p>
|
||||
>>> print Token("charref", "38")
|
||||
&
|
||||
>>> print Token("entityref", "amp")
|
||||
&
|
||||
>>> print Token("data", "foo\\nbar")
|
||||
foo
|
||||
bar
|
||||
>>> print Token("comment", "Life is a bowl\\nof cherries.")
|
||||
<!--Life is a bowl
|
||||
of cherries.-->
|
||||
>>> print Token("decl", "decl")
|
||||
<!decl>
|
||||
>>> print Token("pi", "pi")
|
||||
<?pi>
|
||||
"""
|
||||
if self.attrs is not None:
|
||||
attrs = "".join([" %s=%s" % (k, saxutils.quoteattr(v)) for
|
||||
k, v in self.attrs])
|
||||
else:
|
||||
attrs = ""
|
||||
if self.type == "starttag":
|
||||
return "<%s%s>" % (self.data, attrs)
|
||||
elif self.type == "startendtag":
|
||||
return "<%s%s />" % (self.data, attrs)
|
||||
elif self.type == "endtag":
|
||||
return "</%s>" % self.data
|
||||
elif self.type == "charref":
|
||||
return "&#%s;" % self.data
|
||||
elif self.type == "entityref":
|
||||
return "&%s;" % self.data
|
||||
elif self.type == "data":
|
||||
return self.data
|
||||
elif self.type == "comment":
|
||||
return "<!--%s-->" % self.data
|
||||
elif self.type == "decl":
|
||||
return "<!%s>" % self.data
|
||||
elif self.type == "pi":
|
||||
return "<?%s>" % self.data
|
||||
assert False
|
||||
|
||||
|
||||
def iter_until_exception(fn, exception, *args, **kwds):
|
||||
while 1:
|
||||
try:
|
||||
yield fn(*args, **kwds)
|
||||
except exception:
|
||||
raise StopIteration
|
||||
|
||||
|
||||
class _AbstractParser:
|
||||
chunk = 1024
|
||||
compress_re = re.compile(r"\s+")
|
||||
def __init__(self, fh, textify={"img": "alt", "applet": "alt"},
|
||||
encoding="ascii", entitydefs=None):
|
||||
"""
|
||||
fh: file-like object (only a .read() method is required) from which to
|
||||
read HTML to be parsed
|
||||
textify: mapping used by .get_text() and .get_compressed_text() methods
|
||||
to represent opening tags as text
|
||||
encoding: encoding used to encode numeric character references by
|
||||
.get_text() and .get_compressed_text() ("ascii" by default)
|
||||
|
||||
entitydefs: mapping like {"amp": "&", ...} containing HTML entity
|
||||
definitions (a sensible default is used). This is used to unescape
|
||||
entities in .get_text() (and .get_compressed_text()) and attribute
|
||||
values. If the encoding can not represent the character, the entity
|
||||
reference is left unescaped. Note that entity references (both
|
||||
numeric - e.g. { or ઼ - and non-numeric - e.g. &) are
|
||||
unescaped in attribute values and the return value of .get_text(), but
|
||||
not in data outside of tags. Instead, entity references outside of
|
||||
tags are represented as tokens. This is a bit odd, it's true :-/
|
||||
|
||||
If the element name of an opening tag matches a key in the textify
|
||||
mapping then that tag is converted to text. The corresponding value is
|
||||
used to specify which tag attribute to obtain the text from. textify
|
||||
maps from element names to either:
|
||||
|
||||
- an HTML attribute name, in which case the HTML attribute value is
|
||||
used as its text value along with the element name in square
|
||||
brackets (e.g. "alt text goes here[IMG]", or, if the alt attribute
|
||||
were missing, just "[IMG]")
|
||||
- a callable object (e.g. a function) which takes a Token and returns
|
||||
the string to be used as its text value
|
||||
|
||||
If textify has no key for an element name, nothing is substituted for
|
||||
the opening tag.
|
||||
|
||||
Public attributes:
|
||||
|
||||
encoding and textify: see above
|
||||
|
||||
"""
|
||||
self._fh = fh
|
||||
self._tokenstack = [] # FIFO
|
||||
self.textify = textify
|
||||
self.encoding = encoding
|
||||
if entitydefs is None:
|
||||
entitydefs = htmlentitydefs.name2codepoint
|
||||
self._entitydefs = entitydefs
|
||||
|
||||
def __iter__(self): return self
|
||||
|
||||
def tags(self, *names):
|
||||
return iter_until_exception(self.get_tag, NoMoreTokensError, *names)
|
||||
|
||||
def tokens(self, *tokentypes):
|
||||
return iter_until_exception(self.get_token, NoMoreTokensError,
|
||||
*tokentypes)
|
||||
|
||||
def next(self):
|
||||
try:
|
||||
return self.get_token()
|
||||
except NoMoreTokensError:
|
||||
raise StopIteration()
|
||||
|
||||
def get_token(self, *tokentypes):
|
||||
"""Pop the next Token object from the stack of parsed tokens.
|
||||
|
||||
If arguments are given, they are taken to be token types in which the
|
||||
caller is interested: tokens representing other elements will be
|
||||
skipped. Element names must be given in lower case.
|
||||
|
||||
Raises NoMoreTokensError.
|
||||
|
||||
"""
|
||||
while 1:
|
||||
while self._tokenstack:
|
||||
token = self._tokenstack.pop(0)
|
||||
if tokentypes:
|
||||
if token.type in tokentypes:
|
||||
return token
|
||||
else:
|
||||
return token
|
||||
data = self._fh.read(self.chunk)
|
||||
if not data:
|
||||
raise NoMoreTokensError()
|
||||
self.feed(data)
|
||||
|
||||
def unget_token(self, token):
|
||||
"""Push a Token back onto the stack."""
|
||||
self._tokenstack.insert(0, token)
|
||||
|
||||
def get_tag(self, *names):
|
||||
"""Return the next Token that represents an opening or closing tag.
|
||||
|
||||
If arguments are given, they are taken to be element names in which the
|
||||
caller is interested: tags representing other elements will be skipped.
|
||||
Element names must be given in lower case.
|
||||
|
||||
Raises NoMoreTokensError.
|
||||
|
||||
"""
|
||||
while 1:
|
||||
tok = self.get_token()
|
||||
if tok.type not in ["starttag", "endtag", "startendtag"]:
|
||||
continue
|
||||
if names:
|
||||
if tok.data in names:
|
||||
return tok
|
||||
else:
|
||||
return tok
|
||||
|
||||
def get_text(self, endat=None):
|
||||
"""Get some text.
|
||||
|
||||
endat: stop reading text at this tag (the tag is included in the
|
||||
returned text); endtag is a tuple (type, name) where type is
|
||||
"starttag", "endtag" or "startendtag", and name is the element name of
|
||||
the tag (element names must be given in lower case)
|
||||
|
||||
If endat is not given, .get_text() will stop at the next opening or
|
||||
closing tag, or when there are no more tokens (no exception is raised).
|
||||
Note that .get_text() includes the text representation (if any) of the
|
||||
opening tag, but pushes the opening tag back onto the stack. As a
|
||||
result, if you want to call .get_text() again, you need to call
|
||||
.get_tag() first (unless you want an empty string returned when you
|
||||
next call .get_text()).
|
||||
|
||||
Entity references are translated using the value of the entitydefs
|
||||
constructor argument (a mapping from names to characters like that
|
||||
provided by the standard module htmlentitydefs). Named entity
|
||||
references that are not in this mapping are left unchanged.
|
||||
|
||||
The textify attribute is used to translate opening tags into text: see
|
||||
the class docstring.
|
||||
|
||||
"""
|
||||
text = []
|
||||
tok = None
|
||||
while 1:
|
||||
try:
|
||||
tok = self.get_token()
|
||||
except NoMoreTokensError:
|
||||
# unget last token (not the one we just failed to get)
|
||||
if tok: self.unget_token(tok)
|
||||
break
|
||||
if tok.type == "data":
|
||||
text.append(tok.data)
|
||||
elif tok.type == "entityref":
|
||||
t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
|
||||
text.append(t)
|
||||
elif tok.type == "charref":
|
||||
t = unescape_charref(tok.data, self.encoding)
|
||||
text.append(t)
|
||||
elif tok.type in ["starttag", "endtag", "startendtag"]:
|
||||
tag_name = tok.data
|
||||
if tok.type in ["starttag", "startendtag"]:
|
||||
alt = self.textify.get(tag_name)
|
||||
if alt is not None:
|
||||
if callable(alt):
|
||||
text.append(alt(tok))
|
||||
elif tok.attrs is not None:
|
||||
for k, v in tok.attrs:
|
||||
if k == alt:
|
||||
text.append(v)
|
||||
text.append("[%s]" % tag_name.upper())
|
||||
if endat is None or endat == (tok.type, tag_name):
|
||||
self.unget_token(tok)
|
||||
break
|
||||
return "".join(text)
|
||||
|
||||
def get_compressed_text(self, *args, **kwds):
|
||||
"""
|
||||
As .get_text(), but collapses each group of contiguous whitespace to a
|
||||
single space character, and removes all initial and trailing
|
||||
whitespace.
|
||||
|
||||
"""
|
||||
text = self.get_text(*args, **kwds)
|
||||
text = text.strip()
|
||||
return self.compress_re.sub(" ", text)
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
self._tokenstack.append(Token("startendtag", tag, attrs))
|
||||
def handle_starttag(self, tag, attrs):
|
||||
self._tokenstack.append(Token("starttag", tag, attrs))
|
||||
def handle_endtag(self, tag):
|
||||
self._tokenstack.append(Token("endtag", tag))
|
||||
def handle_charref(self, name):
|
||||
self._tokenstack.append(Token("charref", name))
|
||||
def handle_entityref(self, name):
|
||||
self._tokenstack.append(Token("entityref", name))
|
||||
def handle_data(self, data):
|
||||
self._tokenstack.append(Token("data", data))
|
||||
def handle_comment(self, data):
|
||||
self._tokenstack.append(Token("comment", data))
|
||||
def handle_decl(self, decl):
|
||||
self._tokenstack.append(Token("decl", decl))
|
||||
def unknown_decl(self, data):
|
||||
# XXX should this call self.error instead?
|
||||
#self.error("unknown declaration: " + `data`)
|
||||
self._tokenstack.append(Token("decl", data))
|
||||
def handle_pi(self, data):
|
||||
self._tokenstack.append(Token("pi", data))
|
||||
|
||||
def unescape_attr(self, name):
|
||||
return unescape(name, self._entitydefs, self.encoding)
|
||||
def unescape_attrs(self, attrs):
|
||||
escaped_attrs = []
|
||||
for key, val in attrs:
|
||||
escaped_attrs.append((key, self.unescape_attr(val)))
|
||||
return escaped_attrs
|
||||
|
||||
class PullParser(_AbstractParser, HTMLParser.HTMLParser):
|
||||
def __init__(self, *args, **kwds):
|
||||
HTMLParser.HTMLParser.__init__(self)
|
||||
_AbstractParser.__init__(self, *args, **kwds)
|
||||
def unescape(self, name):
|
||||
# Use the entitydefs passed into constructor, not
|
||||
# HTMLParser.HTMLParser's entitydefs.
|
||||
return self.unescape_attr(name)
|
||||
|
||||
class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
|
||||
def __init__(self, *args, **kwds):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
_AbstractParser.__init__(self, *args, **kwds)
|
||||
def unknown_starttag(self, tag, attrs):
|
||||
attrs = self.unescape_attrs(attrs)
|
||||
self._tokenstack.append(Token("starttag", tag, attrs))
|
||||
def unknown_endtag(self, tag):
|
||||
self._tokenstack.append(Token("endtag", tag))
|
||||
|
||||
|
||||
def _test():
|
||||
import doctest, _pullparser
|
||||
return doctest.testmod(_pullparser)
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
||||
@@ -1,40 +0,0 @@
|
||||
"""Integration with Python standard library module urllib2: Request class.
|
||||
|
||||
Copyright 2004-2006 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
import _rfc3986
|
||||
import _sockettimeout
|
||||
import _urllib2_fork
|
||||
|
||||
warn = logging.getLogger("mechanize").warning
|
||||
|
||||
|
||||
class Request(_urllib2_fork.Request):
|
||||
def __init__(self, url, data=None, headers={},
|
||||
origin_req_host=None, unverifiable=False, visit=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
# In mechanize 0.2, the interpretation of a unicode url argument will
|
||||
# change: A unicode url argument will be interpreted as an IRI, and a
|
||||
# bytestring as a URI. For now, we accept unicode or bytestring. We
|
||||
# don't insist that the value is always a URI (specifically, must only
|
||||
# contain characters which are legal), because that might break working
|
||||
# code (who knows what bytes some servers want to see, especially with
|
||||
# browser plugins for internationalised URIs).
|
||||
if not _rfc3986.is_clean_uri(url):
|
||||
warn("url argument is not a URI "
|
||||
"(contains illegal characters) %r" % url)
|
||||
_urllib2_fork.Request.__init__(self, url, data, headers)
|
||||
self.selector = None
|
||||
self.visit = visit
|
||||
self.timeout = timeout
|
||||
|
||||
def __str__(self):
|
||||
return "<Request for %s>" % self.get_full_url()
|
||||
@@ -1,525 +0,0 @@
|
||||
"""Response classes.
|
||||
|
||||
The seek_wrapper code is not used if you're using UserAgent with
|
||||
.set_seekable_responses(False), or if you're using the urllib2-level interface
|
||||
HTTPEquivProcessor. Class closeable_response is instantiated by some handlers
|
||||
(AbstractHTTPHandler), but the closeable_response interface is only depended
|
||||
upon by Browser-level code. Function upgrade_response is only used if you're
|
||||
using Browser.
|
||||
|
||||
|
||||
Copyright 2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import copy, mimetools, urllib2
|
||||
from cStringIO import StringIO
|
||||
|
||||
|
||||
def len_of_seekable(file_):
|
||||
# this function exists because evaluation of len(file_.getvalue()) on every
|
||||
# .read() from seek_wrapper would be O(N**2) in number of .read()s
|
||||
pos = file_.tell()
|
||||
file_.seek(0, 2) # to end
|
||||
try:
|
||||
return file_.tell()
|
||||
finally:
|
||||
file_.seek(pos)
|
||||
|
||||
|
||||
# XXX Andrew Dalke kindly sent me a similar class in response to my request on
|
||||
# comp.lang.python, which I then proceeded to lose. I wrote this class
|
||||
# instead, but I think he's released his code publicly since, could pinch the
|
||||
# tests from it, at least...
|
||||
|
||||
# For testing seek_wrapper invariant (note that
|
||||
# test_urllib2.HandlerTest.test_seekable is expected to fail when this
|
||||
# invariant checking is turned on). The invariant checking is done by module
|
||||
# ipdc, which is available here:
|
||||
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
|
||||
## from ipdbc import ContractBase
|
||||
## class seek_wrapper(ContractBase):
|
||||
class seek_wrapper:
|
||||
"""Adds a seek method to a file object.
|
||||
|
||||
This is only designed for seeking on readonly file-like objects.
|
||||
|
||||
Wrapped file-like object must have a read method. The readline method is
|
||||
only supported if that method is present on the wrapped object. The
|
||||
readlines method is always supported. xreadlines and iteration are
|
||||
supported only for Python 2.2 and above.
|
||||
|
||||
Public attributes:
|
||||
|
||||
wrapped: the wrapped file object
|
||||
is_closed: true iff .close() has been called
|
||||
|
||||
WARNING: All other attributes of the wrapped object (ie. those that are not
|
||||
one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
|
||||
are passed through unaltered, which may or may not make sense for your
|
||||
particular file object.
|
||||
|
||||
"""
|
||||
# General strategy is to check that cache is full enough, then delegate to
|
||||
# the cache (self.__cache, which is a cStringIO.StringIO instance). A seek
|
||||
# position (self.__pos) is maintained independently of the cache, in order
|
||||
# that a single cache may be shared between multiple seek_wrapper objects.
|
||||
# Copying using module copy shares the cache in this way.
|
||||
|
||||
def __init__(self, wrapped):
|
||||
self.wrapped = wrapped
|
||||
self.__read_complete_state = [False]
|
||||
self.__is_closed_state = [False]
|
||||
self.__have_readline = hasattr(self.wrapped, "readline")
|
||||
self.__cache = StringIO()
|
||||
self.__pos = 0 # seek position
|
||||
|
||||
def invariant(self):
|
||||
# The end of the cache is always at the same place as the end of the
|
||||
# wrapped file (though the .tell() method is not required to be present
|
||||
# on wrapped file).
|
||||
return self.wrapped.tell() == len(self.__cache.getvalue())
|
||||
|
||||
def close(self):
|
||||
self.wrapped.close()
|
||||
self.is_closed = True
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name == "is_closed":
|
||||
return self.__is_closed_state[0]
|
||||
elif name == "read_complete":
|
||||
return self.__read_complete_state[0]
|
||||
|
||||
wrapped = self.__dict__.get("wrapped")
|
||||
if wrapped:
|
||||
return getattr(wrapped, name)
|
||||
|
||||
return getattr(self.__class__, name)
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name == "is_closed":
|
||||
self.__is_closed_state[0] = bool(value)
|
||||
elif name == "read_complete":
|
||||
if not self.is_closed:
|
||||
self.__read_complete_state[0] = bool(value)
|
||||
else:
|
||||
self.__dict__[name] = value
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
assert whence in [0,1,2]
|
||||
|
||||
# how much data, if any, do we need to read?
|
||||
if whence == 2: # 2: relative to end of *wrapped* file
|
||||
if offset < 0: raise ValueError("negative seek offset")
|
||||
# since we don't know yet where the end of that file is, we must
|
||||
# read everything
|
||||
to_read = None
|
||||
else:
|
||||
if whence == 0: # 0: absolute
|
||||
if offset < 0: raise ValueError("negative seek offset")
|
||||
dest = offset
|
||||
else: # 1: relative to current position
|
||||
pos = self.__pos
|
||||
if pos < offset:
|
||||
raise ValueError("seek to before start of file")
|
||||
dest = pos + offset
|
||||
end = len_of_seekable(self.__cache)
|
||||
to_read = dest - end
|
||||
if to_read < 0:
|
||||
to_read = 0
|
||||
|
||||
if to_read != 0:
|
||||
self.__cache.seek(0, 2)
|
||||
if to_read is None:
|
||||
assert whence == 2
|
||||
self.__cache.write(self.wrapped.read())
|
||||
self.read_complete = True
|
||||
self.__pos = self.__cache.tell() - offset
|
||||
else:
|
||||
data = self.wrapped.read(to_read)
|
||||
if not data:
|
||||
self.read_complete = True
|
||||
else:
|
||||
self.__cache.write(data)
|
||||
# Don't raise an exception even if we've seek()ed past the end
|
||||
# of .wrapped, since fseek() doesn't complain in that case.
|
||||
# Also like fseek(), pretend we have seek()ed past the end,
|
||||
# i.e. not:
|
||||
#self.__pos = self.__cache.tell()
|
||||
# but rather:
|
||||
self.__pos = dest
|
||||
else:
|
||||
self.__pos = dest
|
||||
|
||||
def tell(self):
|
||||
return self.__pos
|
||||
|
||||
def __copy__(self):
|
||||
cpy = self.__class__(self.wrapped)
|
||||
cpy.__cache = self.__cache
|
||||
cpy.__read_complete_state = self.__read_complete_state
|
||||
cpy.__is_closed_state = self.__is_closed_state
|
||||
return cpy
|
||||
|
||||
def get_data(self):
|
||||
pos = self.__pos
|
||||
try:
|
||||
self.seek(0)
|
||||
return self.read(-1)
|
||||
finally:
|
||||
self.__pos = pos
|
||||
|
||||
def read(self, size=-1):
|
||||
pos = self.__pos
|
||||
end = len_of_seekable(self.__cache)
|
||||
available = end - pos
|
||||
|
||||
# enough data already cached?
|
||||
if size <= available and size != -1:
|
||||
self.__cache.seek(pos)
|
||||
self.__pos = pos+size
|
||||
return self.__cache.read(size)
|
||||
|
||||
# no, so read sufficient data from wrapped file and cache it
|
||||
self.__cache.seek(0, 2)
|
||||
if size == -1:
|
||||
self.__cache.write(self.wrapped.read())
|
||||
self.read_complete = True
|
||||
else:
|
||||
to_read = size - available
|
||||
assert to_read > 0
|
||||
data = self.wrapped.read(to_read)
|
||||
if not data:
|
||||
self.read_complete = True
|
||||
else:
|
||||
self.__cache.write(data)
|
||||
self.__cache.seek(pos)
|
||||
|
||||
data = self.__cache.read(size)
|
||||
self.__pos = self.__cache.tell()
|
||||
assert self.__pos == pos + len(data)
|
||||
return data
|
||||
|
||||
def readline(self, size=-1):
|
||||
if not self.__have_readline:
|
||||
raise NotImplementedError("no readline method on wrapped object")
|
||||
|
||||
# line we're about to read might not be complete in the cache, so
|
||||
# read another line first
|
||||
pos = self.__pos
|
||||
self.__cache.seek(0, 2)
|
||||
data = self.wrapped.readline()
|
||||
if not data:
|
||||
self.read_complete = True
|
||||
else:
|
||||
self.__cache.write(data)
|
||||
self.__cache.seek(pos)
|
||||
|
||||
data = self.__cache.readline()
|
||||
if size != -1:
|
||||
r = data[:size]
|
||||
self.__pos = pos+size
|
||||
else:
|
||||
r = data
|
||||
self.__pos = pos+len(data)
|
||||
return r
|
||||
|
||||
def readlines(self, sizehint=-1):
|
||||
pos = self.__pos
|
||||
self.__cache.seek(0, 2)
|
||||
self.__cache.write(self.wrapped.read())
|
||||
self.read_complete = True
|
||||
self.__cache.seek(pos)
|
||||
data = self.__cache.readlines(sizehint)
|
||||
self.__pos = self.__cache.tell()
|
||||
return data
|
||||
|
||||
def __iter__(self): return self
|
||||
def next(self):
|
||||
line = self.readline()
|
||||
if line == "": raise StopIteration
|
||||
return line
|
||||
|
||||
xreadlines = __iter__
|
||||
|
||||
def __repr__(self):
|
||||
return ("<%s at %s whose wrapped object = %r>" %
|
||||
(self.__class__.__name__, hex(abs(id(self))), self.wrapped))
|
||||
|
||||
|
||||
class response_seek_wrapper(seek_wrapper):
|
||||
|
||||
"""
|
||||
Supports copying response objects and setting response body data.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, wrapped):
|
||||
seek_wrapper.__init__(self, wrapped)
|
||||
self._headers = self.wrapped.info()
|
||||
|
||||
def __copy__(self):
|
||||
cpy = seek_wrapper.__copy__(self)
|
||||
# copy headers from delegate
|
||||
cpy._headers = copy.copy(self.info())
|
||||
return cpy
|
||||
|
||||
# Note that .info() and .geturl() (the only two urllib2 response methods
|
||||
# that are not implemented by seek_wrapper) must be here explicitly rather
|
||||
# than by seek_wrapper's __getattr__ delegation) so that the nasty
|
||||
# dynamically-created HTTPError classes in get_seek_wrapper_class() get the
|
||||
# wrapped object's implementation, and not HTTPError's.
|
||||
|
||||
def info(self):
|
||||
return self._headers
|
||||
|
||||
def geturl(self):
|
||||
return self.wrapped.geturl()
|
||||
|
||||
def set_data(self, data):
|
||||
self.seek(0)
|
||||
self.read()
|
||||
self.close()
|
||||
cache = self._seek_wrapper__cache = StringIO()
|
||||
cache.write(data)
|
||||
self.seek(0)
|
||||
|
||||
|
||||
class eoffile:
|
||||
# file-like object that always claims to be at end-of-file...
|
||||
def read(self, size=-1): return ""
|
||||
def readline(self, size=-1): return ""
|
||||
def __iter__(self): return self
|
||||
def next(self): return ""
|
||||
def close(self): pass
|
||||
|
||||
class eofresponse(eoffile):
|
||||
def __init__(self, url, headers, code, msg):
|
||||
self._url = url
|
||||
self._headers = headers
|
||||
self.code = code
|
||||
self.msg = msg
|
||||
def geturl(self): return self._url
|
||||
def info(self): return self._headers
|
||||
|
||||
|
||||
class closeable_response:
|
||||
"""Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
|
||||
|
||||
Only supports responses returned by mechanize.HTTPHandler.
|
||||
|
||||
After .close(), the following methods are supported:
|
||||
|
||||
.read()
|
||||
.readline()
|
||||
.info()
|
||||
.geturl()
|
||||
.__iter__()
|
||||
.next()
|
||||
.close()
|
||||
|
||||
and the following attributes are supported:
|
||||
|
||||
.code
|
||||
.msg
|
||||
|
||||
Also supports pickling (but the stdlib currently does something to prevent
|
||||
it: http://python.org/sf/1144636).
|
||||
|
||||
"""
|
||||
# presence of this attr indicates is useable after .close()
|
||||
closeable_response = None
|
||||
|
||||
def __init__(self, fp, headers, url, code, msg):
|
||||
self._set_fp(fp)
|
||||
self._headers = headers
|
||||
self._url = url
|
||||
self.code = code
|
||||
self.msg = msg
|
||||
|
||||
def _set_fp(self, fp):
|
||||
self.fp = fp
|
||||
self.read = self.fp.read
|
||||
self.readline = self.fp.readline
|
||||
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
|
||||
if hasattr(self.fp, "fileno"):
|
||||
self.fileno = self.fp.fileno
|
||||
else:
|
||||
self.fileno = lambda: None
|
||||
self.__iter__ = self.fp.__iter__
|
||||
self.next = self.fp.next
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s at %s whose fp = %r>' % (
|
||||
self.__class__.__name__, hex(abs(id(self))), self.fp)
|
||||
|
||||
def info(self):
|
||||
return self._headers
|
||||
|
||||
def geturl(self):
|
||||
return self._url
|
||||
|
||||
def close(self):
|
||||
wrapped = self.fp
|
||||
wrapped.close()
|
||||
new_wrapped = eofresponse(
|
||||
self._url, self._headers, self.code, self.msg)
|
||||
self._set_fp(new_wrapped)
|
||||
|
||||
def __getstate__(self):
|
||||
# There are three obvious options here:
|
||||
# 1. truncate
|
||||
# 2. read to end
|
||||
# 3. close socket, pickle state including read position, then open
|
||||
# again on unpickle and use Range header
|
||||
# XXXX um, 4. refuse to pickle unless .close()d. This is better,
|
||||
# actually ("errors should never pass silently"). Pickling doesn't
|
||||
# work anyway ATM, because of http://python.org/sf/1144636 so fix
|
||||
# this later
|
||||
|
||||
# 2 breaks pickle protocol, because one expects the original object
|
||||
# to be left unscathed by pickling. 3 is too complicated and
|
||||
# surprising (and too much work ;-) to happen in a sane __getstate__.
|
||||
# So we do 1.
|
||||
|
||||
state = self.__dict__.copy()
|
||||
new_wrapped = eofresponse(
|
||||
self._url, self._headers, self.code, self.msg)
|
||||
state["wrapped"] = new_wrapped
|
||||
return state
|
||||
|
||||
def test_response(data='test data', headers=[],
|
||||
url="http://example.com/", code=200, msg="OK"):
|
||||
return make_response(data, headers, url, code, msg)
|
||||
|
||||
def test_html_response(data='test data', headers=[],
|
||||
url="http://example.com/", code=200, msg="OK"):
|
||||
headers += [("Content-type", "text/html")]
|
||||
return make_response(data, headers, url, code, msg)
|
||||
|
||||
def make_response(data, headers, url, code, msg):
|
||||
"""Convenient factory for objects implementing response interface.
|
||||
|
||||
data: string containing response body data
|
||||
headers: sequence of (name, value) pairs
|
||||
url: URL of response
|
||||
code: integer response code (e.g. 200)
|
||||
msg: string response code message (e.g. "OK")
|
||||
|
||||
"""
|
||||
mime_headers = make_headers(headers)
|
||||
r = closeable_response(StringIO(data), mime_headers, url, code, msg)
|
||||
return response_seek_wrapper(r)
|
||||
|
||||
|
||||
def make_headers(headers):
|
||||
"""
|
||||
headers: sequence of (name, value) pairs
|
||||
"""
|
||||
hdr_text = []
|
||||
for name_value in headers:
|
||||
hdr_text.append("%s: %s" % name_value)
|
||||
return mimetools.Message(StringIO("\n".join(hdr_text)))
|
||||
|
||||
|
||||
# Rest of this module is especially horrible, but needed, at least until fork
|
||||
# urllib2. Even then, may want to preseve urllib2 compatibility.
|
||||
|
||||
def get_seek_wrapper_class(response):
|
||||
# in order to wrap response objects that are also exceptions, we must
|
||||
# dynamically subclass the exception :-(((
|
||||
if (isinstance(response, urllib2.HTTPError) and
|
||||
not hasattr(response, "seek")):
|
||||
if response.__class__.__module__ == "__builtin__":
|
||||
exc_class_name = response.__class__.__name__
|
||||
else:
|
||||
exc_class_name = "%s.%s" % (
|
||||
response.__class__.__module__, response.__class__.__name__)
|
||||
|
||||
class httperror_seek_wrapper(response_seek_wrapper, response.__class__):
|
||||
# this only derives from HTTPError in order to be a subclass --
|
||||
# the HTTPError behaviour comes from delegation
|
||||
|
||||
_exc_class_name = exc_class_name
|
||||
|
||||
def __init__(self, wrapped):
|
||||
response_seek_wrapper.__init__(self, wrapped)
|
||||
# be compatible with undocumented HTTPError attributes :-(
|
||||
self.hdrs = wrapped.info()
|
||||
self.filename = wrapped.geturl()
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
"<%s (%s instance) at %s "
|
||||
"whose wrapped object = %r>" % (
|
||||
self.__class__.__name__, self._exc_class_name,
|
||||
hex(abs(id(self))), self.wrapped)
|
||||
)
|
||||
wrapper_class = httperror_seek_wrapper
|
||||
else:
|
||||
wrapper_class = response_seek_wrapper
|
||||
return wrapper_class
|
||||
|
||||
def seek_wrapped_response(response):
|
||||
"""Return a copy of response that supports seekable response interface.
|
||||
|
||||
Accepts responses from both mechanize and urllib2 handlers.
|
||||
|
||||
Copes with both ordinary response instances and HTTPError instances (which
|
||||
can't be simply wrapped due to the requirement of preserving the exception
|
||||
base class).
|
||||
"""
|
||||
if not hasattr(response, "seek"):
|
||||
wrapper_class = get_seek_wrapper_class(response)
|
||||
response = wrapper_class(response)
|
||||
assert hasattr(response, "get_data")
|
||||
return response
|
||||
|
||||
def upgrade_response(response):
|
||||
"""Return a copy of response that supports Browser response interface.
|
||||
|
||||
Browser response interface is that of "seekable responses"
|
||||
(response_seek_wrapper), plus the requirement that responses must be
|
||||
useable after .close() (closeable_response).
|
||||
|
||||
Accepts responses from both mechanize and urllib2 handlers.
|
||||
|
||||
Copes with both ordinary response instances and HTTPError instances (which
|
||||
can't be simply wrapped due to the requirement of preserving the exception
|
||||
base class).
|
||||
"""
|
||||
wrapper_class = get_seek_wrapper_class(response)
|
||||
if hasattr(response, "closeable_response"):
|
||||
if not hasattr(response, "seek"):
|
||||
response = wrapper_class(response)
|
||||
assert hasattr(response, "get_data")
|
||||
return copy.copy(response)
|
||||
|
||||
# a urllib2 handler constructed the response, i.e. the response is an
|
||||
# urllib.addinfourl or a urllib2.HTTPError, instead of a
|
||||
# _Util.closeable_response as returned by e.g. mechanize.HTTPHandler
|
||||
try:
|
||||
code = response.code
|
||||
except AttributeError:
|
||||
code = None
|
||||
try:
|
||||
msg = response.msg
|
||||
except AttributeError:
|
||||
msg = None
|
||||
|
||||
# may have already-.read() data from .seek() cache
|
||||
data = None
|
||||
get_data = getattr(response, "get_data", None)
|
||||
if get_data:
|
||||
data = get_data()
|
||||
|
||||
response = closeable_response(
|
||||
response.fp, response.info(), response.geturl(), code, msg)
|
||||
response = wrapper_class(response)
|
||||
if data:
|
||||
response.set_data(data)
|
||||
return response
|
||||
@@ -1,245 +0,0 @@
|
||||
"""RFC 3986 URI parsing and relative reference resolution / absolutization.
|
||||
|
||||
(aka splitting and joining)
|
||||
|
||||
Copyright 2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.
|
||||
|
||||
import re, urllib
|
||||
|
||||
## def chr_range(a, b):
|
||||
## return "".join(map(chr, range(ord(a), ord(b)+1)))
|
||||
|
||||
## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
## "abcdefghijklmnopqrstuvwxyz"
|
||||
## "0123456789"
|
||||
## "-_.~")
|
||||
## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
|
||||
## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
|
||||
# this re matches any character that's not in URI_CHARS
|
||||
BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
|
||||
|
||||
|
||||
def clean_url(url, encoding):
|
||||
# percent-encode illegal URI characters
|
||||
# Trying to come up with test cases for this gave me a headache, revisit
|
||||
# when do switch to unicode.
|
||||
# Somebody else's comments (lost the attribution):
|
||||
## - IE will return you the url in the encoding you send it
|
||||
## - Mozilla/Firefox will send you latin-1 if there's no non latin-1
|
||||
## characters in your link. It will send you utf-8 however if there are...
|
||||
if type(url) == type(""):
|
||||
url = url.decode(encoding, "replace")
|
||||
url = url.strip()
|
||||
# for second param to urllib.quote(), we want URI_CHARS, minus the
|
||||
# 'always_safe' characters that urllib.quote() never percent-encodes
|
||||
return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
|
||||
|
||||
def is_clean_uri(uri):
|
||||
"""
|
||||
>>> is_clean_uri("ABC!")
|
||||
True
|
||||
>>> is_clean_uri(u"ABC!")
|
||||
True
|
||||
>>> is_clean_uri("ABC|")
|
||||
False
|
||||
>>> is_clean_uri(u"ABC|")
|
||||
False
|
||||
>>> is_clean_uri("http://example.com/0")
|
||||
True
|
||||
>>> is_clean_uri(u"http://example.com/0")
|
||||
True
|
||||
"""
|
||||
# note module re treats bytestrings as through they were decoded as latin-1
|
||||
# so this function accepts both unicode and bytestrings
|
||||
return not bool(BAD_URI_CHARS_RE.search(uri))
|
||||
|
||||
|
||||
SPLIT_MATCH = re.compile(
|
||||
r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
|
||||
def urlsplit(absolute_uri):
|
||||
"""Return scheme, authority, path, query, fragment."""
|
||||
match = SPLIT_MATCH(absolute_uri)
|
||||
if match:
|
||||
g = match.groups()
|
||||
return g[1], g[3], g[4], g[6], g[8]
|
||||
|
||||
def urlunsplit(parts):
|
||||
scheme, authority, path, query, fragment = parts
|
||||
r = []
|
||||
append = r.append
|
||||
if scheme is not None:
|
||||
append(scheme)
|
||||
append(":")
|
||||
if authority is not None:
|
||||
append("//")
|
||||
append(authority)
|
||||
append(path)
|
||||
if query is not None:
|
||||
append("?")
|
||||
append(query)
|
||||
if fragment is not None:
|
||||
append("#")
|
||||
append(fragment)
|
||||
return "".join(r)
|
||||
|
||||
def urljoin(base_uri, uri_reference):
|
||||
"""Join a base URI with a URI reference and return the resulting URI.
|
||||
|
||||
See RFC 3986.
|
||||
"""
|
||||
return urlunsplit(urljoin_parts(urlsplit(base_uri),
|
||||
urlsplit(uri_reference)))
|
||||
|
||||
# oops, this doesn't do the same thing as the literal translation
|
||||
# from the RFC below
|
||||
## import posixpath
|
||||
## def urljoin_parts(base_parts, reference_parts):
|
||||
## scheme, authority, path, query, fragment = base_parts
|
||||
## rscheme, rauthority, rpath, rquery, rfragment = reference_parts
|
||||
|
||||
## # compute target URI path
|
||||
## if rpath == "":
|
||||
## tpath = path
|
||||
## else:
|
||||
## tpath = rpath
|
||||
## if not tpath.startswith("/"):
|
||||
## tpath = merge(authority, path, tpath)
|
||||
## tpath = posixpath.normpath(tpath)
|
||||
|
||||
## if rscheme is not None:
|
||||
## return (rscheme, rauthority, tpath, rquery, rfragment)
|
||||
## elif rauthority is not None:
|
||||
## return (scheme, rauthority, tpath, rquery, rfragment)
|
||||
## elif rpath == "":
|
||||
## if rquery is not None:
|
||||
## tquery = rquery
|
||||
## else:
|
||||
## tquery = query
|
||||
## return (scheme, authority, tpath, tquery, rfragment)
|
||||
## else:
|
||||
## return (scheme, authority, tpath, rquery, rfragment)
|
||||
|
||||
def urljoin_parts(base_parts, reference_parts):
|
||||
scheme, authority, path, query, fragment = base_parts
|
||||
rscheme, rauthority, rpath, rquery, rfragment = reference_parts
|
||||
|
||||
if rscheme == scheme:
|
||||
rscheme = None
|
||||
|
||||
if rscheme is not None:
|
||||
tscheme, tauthority, tpath, tquery = (
|
||||
rscheme, rauthority, remove_dot_segments(rpath), rquery)
|
||||
else:
|
||||
if rauthority is not None:
|
||||
tauthority, tpath, tquery = (
|
||||
rauthority, remove_dot_segments(rpath), rquery)
|
||||
else:
|
||||
if rpath == "":
|
||||
tpath = path
|
||||
if rquery is not None:
|
||||
tquery = rquery
|
||||
else:
|
||||
tquery = query
|
||||
else:
|
||||
if rpath.startswith("/"):
|
||||
tpath = remove_dot_segments(rpath)
|
||||
else:
|
||||
tpath = merge(authority, path, rpath)
|
||||
tpath = remove_dot_segments(tpath)
|
||||
tquery = rquery
|
||||
tauthority = authority
|
||||
tscheme = scheme
|
||||
tfragment = rfragment
|
||||
return (tscheme, tauthority, tpath, tquery, tfragment)
|
||||
|
||||
# um, something *vaguely* like this is what I want, but I have to generate
|
||||
# lots of test cases first, if only to understand what it is that
|
||||
# remove_dot_segments really does...
|
||||
## def remove_dot_segments(path):
|
||||
## if path == '':
|
||||
## return ''
|
||||
## comps = path.split('/')
|
||||
## new_comps = []
|
||||
## for comp in comps:
|
||||
## if comp in ['.', '']:
|
||||
## if not new_comps or new_comps[-1]:
|
||||
## new_comps.append('')
|
||||
## continue
|
||||
## if comp != '..':
|
||||
## new_comps.append(comp)
|
||||
## elif new_comps:
|
||||
## new_comps.pop()
|
||||
## return '/'.join(new_comps)
|
||||
|
||||
|
||||
def remove_dot_segments(path):
|
||||
r = []
|
||||
while path:
|
||||
# A
|
||||
if path.startswith("../"):
|
||||
path = path[3:]
|
||||
continue
|
||||
if path.startswith("./"):
|
||||
path = path[2:]
|
||||
continue
|
||||
# B
|
||||
if path.startswith("/./"):
|
||||
path = path[2:]
|
||||
continue
|
||||
if path == "/.":
|
||||
path = "/"
|
||||
continue
|
||||
# C
|
||||
if path.startswith("/../"):
|
||||
path = path[3:]
|
||||
if r:
|
||||
r.pop()
|
||||
continue
|
||||
if path == "/..":
|
||||
path = "/"
|
||||
if r:
|
||||
r.pop()
|
||||
continue
|
||||
# D
|
||||
if path == ".":
|
||||
path = path[1:]
|
||||
continue
|
||||
if path == "..":
|
||||
path = path[2:]
|
||||
continue
|
||||
# E
|
||||
start = 0
|
||||
if path.startswith("/"):
|
||||
start = 1
|
||||
ii = path.find("/", start)
|
||||
if ii < 0:
|
||||
ii = None
|
||||
r.append(path[:ii])
|
||||
if ii is None:
|
||||
break
|
||||
path = path[ii:]
|
||||
return "".join(r)
|
||||
|
||||
def merge(base_authority, base_path, ref_path):
|
||||
# XXXX Oddly, the sample Perl implementation of this by Roy Fielding
|
||||
# doesn't even take base_authority as a parameter, despite the wording in
|
||||
# the RFC suggesting otherwise. Perhaps I'm missing some obvious identity.
|
||||
#if base_authority is not None and base_path == "":
|
||||
if base_path == "":
|
||||
return "/" + ref_path
|
||||
ii = base_path.rfind("/")
|
||||
if ii >= 0:
|
||||
return base_path[:ii+1] + ref_path
|
||||
return ref_path
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
@@ -1,559 +0,0 @@
|
||||
# Taken from Python 2.6.4 and regexp module constants modified
|
||||
"""A parser for SGML, using the derived class as a static DTD."""
|
||||
|
||||
# XXX This only supports those SGML features used by HTML.
|
||||
|
||||
# XXX There should be a way to distinguish between PCDATA (parsed
|
||||
# character data -- the normal case), RCDATA (replaceable character
|
||||
# data -- only char and entity references and end tags are special)
|
||||
# and CDATA (character data -- only end tags are special). RCDATA is
|
||||
# not supported at all.
|
||||
|
||||
|
||||
# from warnings import warnpy3k
|
||||
# warnpy3k("the sgmllib module has been removed in Python 3.0",
|
||||
# stacklevel=2)
|
||||
# del warnpy3k
|
||||
|
||||
import markupbase
|
||||
import re
|
||||
|
||||
__all__ = ["SGMLParser", "SGMLParseError"]
|
||||
|
||||
# Regular expressions used for parsing
|
||||
|
||||
interesting = re.compile('[&<]')
|
||||
incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
|
||||
'<([a-zA-Z][^<>]*|'
|
||||
'/([a-zA-Z][^<>]*)?|'
|
||||
'![^<>]*)?')
|
||||
|
||||
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
||||
# hack to fix http://bugs.python.org/issue803422
|
||||
# charref = re.compile('&#([0-9]+)[^0-9]')
|
||||
charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
|
||||
|
||||
starttagopen = re.compile('<[>a-zA-Z]')
|
||||
shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
|
||||
shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
|
||||
piclose = re.compile('>')
|
||||
endbracket = re.compile('[<>]')
|
||||
# hack moved from _beautifulsoup.py (bundled BeautifulSoup version 2)
|
||||
#This code makes Beautiful Soup able to parse XML with namespaces
|
||||
# tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
|
||||
tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
|
||||
attrfind = re.compile(
|
||||
r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
|
||||
|
||||
|
||||
class SGMLParseError(RuntimeError):
|
||||
"""Exception raised for all parse errors."""
|
||||
pass
|
||||
|
||||
|
||||
# SGML parser base class -- find tags and call handler functions.
|
||||
# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
|
||||
# The dtd is defined by deriving a class which defines methods
|
||||
# with special names to handle tags: start_foo and end_foo to handle
|
||||
# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
|
||||
# (Tags are converted to lower case for this purpose.) The data
|
||||
# between tags is passed to the parser by calling self.handle_data()
|
||||
# with some data as argument (the data may be split up in arbitrary
|
||||
# chunks). Entity references are passed by calling
|
||||
# self.handle_entityref() with the entity reference as argument.
|
||||
|
||||
class SGMLParser(markupbase.ParserBase):
|
||||
# Definition of entities -- derived classes may override
|
||||
entity_or_charref = re.compile('&(?:'
|
||||
'([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
|
||||
')(;?)')
|
||||
|
||||
def __init__(self, verbose=0):
|
||||
"""Initialize and reset this instance."""
|
||||
self.verbose = verbose
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""Reset this instance. Loses all unprocessed data."""
|
||||
self.__starttag_text = None
|
||||
self.rawdata = ''
|
||||
self.stack = []
|
||||
self.lasttag = '???'
|
||||
self.nomoretags = 0
|
||||
self.literal = 0
|
||||
markupbase.ParserBase.reset(self)
|
||||
|
||||
def setnomoretags(self):
|
||||
"""Enter literal mode (CDATA) till EOF.
|
||||
|
||||
Intended for derived classes only.
|
||||
"""
|
||||
self.nomoretags = self.literal = 1
|
||||
|
||||
def setliteral(self, *args):
|
||||
"""Enter literal mode (CDATA).
|
||||
|
||||
Intended for derived classes only.
|
||||
"""
|
||||
self.literal = 1
|
||||
|
||||
def feed(self, data):
|
||||
"""Feed some data to the parser.
|
||||
|
||||
Call this as often as you want, with as little or as much text
|
||||
as you want (may include '\n'). (This just saves the text,
|
||||
all the processing is done by goahead().)
|
||||
"""
|
||||
|
||||
self.rawdata = self.rawdata + data
|
||||
self.goahead(0)
|
||||
|
||||
def close(self):
|
||||
"""Handle the remaining data."""
|
||||
self.goahead(1)
|
||||
|
||||
def error(self, message):
|
||||
raise SGMLParseError(message)
|
||||
|
||||
# Internal -- handle data as far as reasonable. May leave state
|
||||
# and data to be processed by a subsequent call. If 'end' is
|
||||
# true, force handling all data as if followed by EOF marker.
|
||||
def goahead(self, end):
|
||||
rawdata = self.rawdata
|
||||
i = 0
|
||||
n = len(rawdata)
|
||||
while i < n:
|
||||
if self.nomoretags:
|
||||
self.handle_data(rawdata[i:n])
|
||||
i = n
|
||||
break
|
||||
match = interesting.search(rawdata, i)
|
||||
if match: j = match.start()
|
||||
else: j = n
|
||||
if i < j:
|
||||
self.handle_data(rawdata[i:j])
|
||||
i = j
|
||||
if i == n: break
|
||||
if rawdata[i] == '<':
|
||||
if starttagopen.match(rawdata, i):
|
||||
if self.literal:
|
||||
self.handle_data(rawdata[i])
|
||||
i = i+1
|
||||
continue
|
||||
k = self.parse_starttag(i)
|
||||
if k < 0: break
|
||||
i = k
|
||||
continue
|
||||
if rawdata.startswith("</", i):
|
||||
k = self.parse_endtag(i)
|
||||
if k < 0: break
|
||||
i = k
|
||||
self.literal = 0
|
||||
continue
|
||||
if self.literal:
|
||||
if n > (i + 1):
|
||||
self.handle_data("<")
|
||||
i = i+1
|
||||
else:
|
||||
# incomplete
|
||||
break
|
||||
continue
|
||||
if rawdata.startswith("<!--", i):
|
||||
# Strictly speaking, a comment is --.*--
|
||||
# within a declaration tag <!...>.
|
||||
# This should be removed,
|
||||
# and comments handled only in parse_declaration.
|
||||
k = self.parse_comment(i)
|
||||
if k < 0: break
|
||||
i = k
|
||||
continue
|
||||
if rawdata.startswith("<?", i):
|
||||
k = self.parse_pi(i)
|
||||
if k < 0: break
|
||||
i = i+k
|
||||
continue
|
||||
if rawdata.startswith("<!", i):
|
||||
# This is some sort of declaration; in "HTML as
|
||||
# deployed," this should only be the document type
|
||||
# declaration ("<!DOCTYPE html...>").
|
||||
k = self.parse_declaration(i)
|
||||
if k < 0: break
|
||||
i = k
|
||||
continue
|
||||
elif rawdata[i] == '&':
|
||||
if self.literal:
|
||||
self.handle_data(rawdata[i])
|
||||
i = i+1
|
||||
continue
|
||||
match = charref.match(rawdata, i)
|
||||
if match:
|
||||
name = match.group(1)
|
||||
self.handle_charref(name)
|
||||
i = match.end(0)
|
||||
if rawdata[i-1] != ';': i = i-1
|
||||
continue
|
||||
match = entityref.match(rawdata, i)
|
||||
if match:
|
||||
name = match.group(1)
|
||||
self.handle_entityref(name)
|
||||
i = match.end(0)
|
||||
if rawdata[i-1] != ';': i = i-1
|
||||
continue
|
||||
else:
|
||||
self.error('neither < nor & ??')
|
||||
# We get here only if incomplete matches but
|
||||
# nothing else
|
||||
match = incomplete.match(rawdata, i)
|
||||
if not match:
|
||||
self.handle_data(rawdata[i])
|
||||
i = i+1
|
||||
continue
|
||||
j = match.end(0)
|
||||
if j == n:
|
||||
break # Really incomplete
|
||||
self.handle_data(rawdata[i:j])
|
||||
i = j
|
||||
# end while
|
||||
if end and i < n:
|
||||
self.handle_data(rawdata[i:n])
|
||||
i = n
|
||||
self.rawdata = rawdata[i:]
|
||||
# XXX if end: check for empty stack
|
||||
|
||||
# Extensions for the DOCTYPE scanner:
|
||||
_decl_otherchars = '='
|
||||
|
||||
# Internal -- parse processing instr, return length or -1 if not terminated
|
||||
def parse_pi(self, i):
|
||||
rawdata = self.rawdata
|
||||
if rawdata[i:i+2] != '<?':
|
||||
self.error('unexpected call to parse_pi()')
|
||||
match = piclose.search(rawdata, i+2)
|
||||
if not match:
|
||||
return -1
|
||||
j = match.start(0)
|
||||
self.handle_pi(rawdata[i+2: j])
|
||||
j = match.end(0)
|
||||
return j-i
|
||||
|
||||
def get_starttag_text(self):
|
||||
return self.__starttag_text
|
||||
|
||||
# Internal -- handle starttag, return length or -1 if not terminated
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
start_pos = i
|
||||
rawdata = self.rawdata
|
||||
if shorttagopen.match(rawdata, i):
|
||||
# SGML shorthand: <tag/data/ == <tag>data</tag>
|
||||
# XXX Can data contain &... (entity or char refs)?
|
||||
# XXX Can data contain < or > (tag characters)?
|
||||
# XXX Can there be whitespace before the first /?
|
||||
match = shorttag.match(rawdata, i)
|
||||
if not match:
|
||||
return -1
|
||||
tag, data = match.group(1, 2)
|
||||
self.__starttag_text = '<%s/' % tag
|
||||
tag = tag.lower()
|
||||
k = match.end(0)
|
||||
self.finish_shorttag(tag, data)
|
||||
self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
|
||||
return k
|
||||
# XXX The following should skip matching quotes (' or ")
|
||||
# As a shortcut way to exit, this isn't so bad, but shouldn't
|
||||
# be used to locate the actual end of the start tag since the
|
||||
# < or > characters may be embedded in an attribute value.
|
||||
match = endbracket.search(rawdata, i+1)
|
||||
if not match:
|
||||
return -1
|
||||
j = match.start(0)
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
if rawdata[i:i+2] == '<>':
|
||||
# SGML shorthand: <> == <last open tag seen>
|
||||
k = j
|
||||
tag = self.lasttag
|
||||
else:
|
||||
match = tagfind.match(rawdata, i+1)
|
||||
if not match:
|
||||
self.error('unexpected call to parse_starttag')
|
||||
k = match.end(0)
|
||||
tag = rawdata[i+1:k].lower()
|
||||
self.lasttag = tag
|
||||
while k < j:
|
||||
match = attrfind.match(rawdata, k)
|
||||
if not match: break
|
||||
attrname, rest, attrvalue = match.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = attrname
|
||||
else:
|
||||
if (attrvalue[:1] == "'" == attrvalue[-1:] or
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]):
|
||||
# strip quotes
|
||||
attrvalue = attrvalue[1:-1]
|
||||
attrvalue = self.entity_or_charref.sub(
|
||||
self._convert_ref, attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = match.end(0)
|
||||
if rawdata[j] == '>':
|
||||
j = j+1
|
||||
self.__starttag_text = rawdata[start_pos:j]
|
||||
self.finish_starttag(tag, attrs)
|
||||
return j
|
||||
|
||||
# Internal -- convert entity or character reference
|
||||
def _convert_ref(self, match):
|
||||
if match.group(2):
|
||||
return self.convert_charref(match.group(2)) or \
|
||||
'&#%s%s' % match.groups()[1:]
|
||||
elif match.group(3):
|
||||
return self.convert_entityref(match.group(1)) or \
|
||||
'&%s;' % match.group(1)
|
||||
else:
|
||||
return '&%s' % match.group(1)
|
||||
|
||||
# Internal -- parse endtag
|
||||
def parse_endtag(self, i):
|
||||
rawdata = self.rawdata
|
||||
match = endbracket.search(rawdata, i+1)
|
||||
if not match:
|
||||
return -1
|
||||
j = match.start(0)
|
||||
tag = rawdata[i+2:j].strip().lower()
|
||||
if rawdata[j] == '>':
|
||||
j = j+1
|
||||
self.finish_endtag(tag)
|
||||
return j
|
||||
|
||||
# Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
|
||||
def finish_shorttag(self, tag, data):
|
||||
self.finish_starttag(tag, [])
|
||||
self.handle_data(data)
|
||||
self.finish_endtag(tag)
|
||||
|
||||
# Internal -- finish processing of start tag
|
||||
# Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
|
||||
def finish_starttag(self, tag, attrs):
|
||||
try:
|
||||
method = getattr(self, 'start_' + tag)
|
||||
except AttributeError:
|
||||
try:
|
||||
method = getattr(self, 'do_' + tag)
|
||||
except AttributeError:
|
||||
self.unknown_starttag(tag, attrs)
|
||||
return -1
|
||||
else:
|
||||
self.handle_starttag(tag, method, attrs)
|
||||
return 0
|
||||
else:
|
||||
self.stack.append(tag)
|
||||
self.handle_starttag(tag, method, attrs)
|
||||
return 1
|
||||
|
||||
# Internal -- finish processing of end tag
|
||||
def finish_endtag(self, tag):
|
||||
if not tag:
|
||||
found = len(self.stack) - 1
|
||||
if found < 0:
|
||||
self.unknown_endtag(tag)
|
||||
return
|
||||
else:
|
||||
if tag not in self.stack:
|
||||
try:
|
||||
method = getattr(self, 'end_' + tag)
|
||||
except AttributeError:
|
||||
self.unknown_endtag(tag)
|
||||
else:
|
||||
self.report_unbalanced(tag)
|
||||
return
|
||||
found = len(self.stack)
|
||||
for i in range(found):
|
||||
if self.stack[i] == tag: found = i
|
||||
while len(self.stack) > found:
|
||||
tag = self.stack[-1]
|
||||
try:
|
||||
method = getattr(self, 'end_' + tag)
|
||||
except AttributeError:
|
||||
method = None
|
||||
if method:
|
||||
self.handle_endtag(tag, method)
|
||||
else:
|
||||
self.unknown_endtag(tag)
|
||||
del self.stack[-1]
|
||||
|
||||
# Overridable -- handle start tag
|
||||
def handle_starttag(self, tag, method, attrs):
|
||||
method(attrs)
|
||||
|
||||
# Overridable -- handle end tag
|
||||
def handle_endtag(self, tag, method):
|
||||
method()
|
||||
|
||||
# Example -- report an unbalanced </...> tag.
|
||||
def report_unbalanced(self, tag):
|
||||
if self.verbose:
|
||||
print '*** Unbalanced </' + tag + '>'
|
||||
print '*** Stack:', self.stack
|
||||
|
||||
def convert_charref(self, name):
|
||||
"""Convert character reference, may be overridden."""
|
||||
try:
|
||||
n = int(name)
|
||||
except ValueError:
|
||||
return
|
||||
if not 0 <= n <= 127:
|
||||
return
|
||||
return self.convert_codepoint(n)
|
||||
|
||||
def convert_codepoint(self, codepoint):
|
||||
return chr(codepoint)
|
||||
|
||||
def handle_charref(self, name):
|
||||
"""Handle character reference, no need to override."""
|
||||
replacement = self.convert_charref(name)
|
||||
if replacement is None:
|
||||
self.unknown_charref(name)
|
||||
else:
|
||||
self.handle_data(replacement)
|
||||
|
||||
# Definition of entities -- derived classes may override
|
||||
entitydefs = \
|
||||
{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
|
||||
|
||||
def convert_entityref(self, name):
|
||||
"""Convert entity references.
|
||||
|
||||
As an alternative to overriding this method; one can tailor the
|
||||
results by setting up the self.entitydefs mapping appropriately.
|
||||
"""
|
||||
table = self.entitydefs
|
||||
if name in table:
|
||||
return table[name]
|
||||
else:
|
||||
return
|
||||
|
||||
def handle_entityref(self, name):
|
||||
"""Handle entity references, no need to override."""
|
||||
replacement = self.convert_entityref(name)
|
||||
if replacement is None:
|
||||
self.unknown_entityref(name)
|
||||
else:
|
||||
self.handle_data(replacement)
|
||||
|
||||
# Example -- handle data, should be overridden
|
||||
def handle_data(self, data):
|
||||
pass
|
||||
|
||||
# Example -- handle comment, could be overridden
|
||||
def handle_comment(self, data):
|
||||
pass
|
||||
|
||||
# Example -- handle declaration, could be overridden
|
||||
def handle_decl(self, decl):
|
||||
pass
|
||||
|
||||
# Example -- handle processing instruction, could be overridden
|
||||
def handle_pi(self, data):
|
||||
pass
|
||||
|
||||
# To be overridden -- handlers for unknown objects
|
||||
def unknown_starttag(self, tag, attrs): pass
|
||||
def unknown_endtag(self, tag): pass
|
||||
def unknown_charref(self, ref): pass
|
||||
def unknown_entityref(self, ref): pass
|
||||
|
||||
|
||||
class TestSGMLParser(SGMLParser):
|
||||
|
||||
def __init__(self, verbose=0):
|
||||
self.testdata = ""
|
||||
SGMLParser.__init__(self, verbose)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.testdata = self.testdata + data
|
||||
if len(repr(self.testdata)) >= 70:
|
||||
self.flush()
|
||||
|
||||
def flush(self):
|
||||
data = self.testdata
|
||||
if data:
|
||||
self.testdata = ""
|
||||
print 'data:', repr(data)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self.flush()
|
||||
r = repr(data)
|
||||
if len(r) > 68:
|
||||
r = r[:32] + '...' + r[-32:]
|
||||
print 'comment:', r
|
||||
|
||||
def unknown_starttag(self, tag, attrs):
|
||||
self.flush()
|
||||
if not attrs:
|
||||
print 'start tag: <' + tag + '>'
|
||||
else:
|
||||
print 'start tag: <' + tag,
|
||||
for name, value in attrs:
|
||||
print name + '=' + '"' + value + '"',
|
||||
print '>'
|
||||
|
||||
def unknown_endtag(self, tag):
|
||||
self.flush()
|
||||
print 'end tag: </' + tag + '>'
|
||||
|
||||
def unknown_entityref(self, ref):
|
||||
self.flush()
|
||||
print '*** unknown entity ref: &' + ref + ';'
|
||||
|
||||
def unknown_charref(self, ref):
|
||||
self.flush()
|
||||
print '*** unknown char ref: &#' + ref + ';'
|
||||
|
||||
def unknown_decl(self, data):
|
||||
self.flush()
|
||||
print '*** unknown decl: [' + data + ']'
|
||||
|
||||
def close(self):
|
||||
SGMLParser.close(self)
|
||||
self.flush()
|
||||
|
||||
|
||||
def test(args = None):
|
||||
import sys
|
||||
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
|
||||
if args and args[0] == '-s':
|
||||
args = args[1:]
|
||||
klass = SGMLParser
|
||||
else:
|
||||
klass = TestSGMLParser
|
||||
|
||||
if args:
|
||||
file = args[0]
|
||||
else:
|
||||
file = 'test.html'
|
||||
|
||||
if file == '-':
|
||||
f = sys.stdin
|
||||
else:
|
||||
try:
|
||||
f = open(file, 'r')
|
||||
except IOError, msg:
|
||||
print file, ":", msg
|
||||
sys.exit(1)
|
||||
|
||||
data = f.read()
|
||||
if f is not sys.stdin:
|
||||
f.close()
|
||||
|
||||
x = klass()
|
||||
for c in data:
|
||||
x.feed(c)
|
||||
x.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
@@ -1,6 +0,0 @@
|
||||
import socket
|
||||
|
||||
try:
|
||||
_GLOBAL_DEFAULT_TIMEOUT = socket._GLOBAL_DEFAULT_TIMEOUT
|
||||
except AttributeError:
|
||||
_GLOBAL_DEFAULT_TIMEOUT = object()
|
||||
@@ -1,162 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
|
||||
class SetupStack(object):
|
||||
|
||||
def __init__(self):
|
||||
self._on_teardown = []
|
||||
|
||||
def add_teardown(self, teardown):
|
||||
self._on_teardown.append(teardown)
|
||||
|
||||
def tear_down(self):
|
||||
for func in reversed(self._on_teardown):
|
||||
func()
|
||||
|
||||
|
||||
class TearDownConvenience(object):
|
||||
|
||||
def __init__(self, setup_stack=None):
|
||||
self._own_setup_stack = setup_stack is None
|
||||
if setup_stack is None:
|
||||
setup_stack = SetupStack()
|
||||
self._setup_stack = setup_stack
|
||||
|
||||
# only call this convenience method if no setup_stack was supplied to c'tor
|
||||
def tear_down(self):
|
||||
assert self._own_setup_stack
|
||||
self._setup_stack.tear_down()
|
||||
|
||||
|
||||
class TempDirMaker(TearDownConvenience):
|
||||
|
||||
def make_temp_dir(self, dir_=None):
|
||||
temp_dir = tempfile.mkdtemp(prefix="tmp-%s-" % self.__class__.__name__,
|
||||
dir=dir_)
|
||||
def tear_down():
|
||||
shutil.rmtree(temp_dir)
|
||||
self._setup_stack.add_teardown(tear_down)
|
||||
return temp_dir
|
||||
|
||||
|
||||
class MonkeyPatcher(TearDownConvenience):
|
||||
|
||||
Unset = object()
|
||||
|
||||
def monkey_patch(self, obj, name, value):
|
||||
orig_value = getattr(obj, name)
|
||||
setattr(obj, name, value)
|
||||
def reverse_patch():
|
||||
setattr(obj, name, orig_value)
|
||||
self._setup_stack.add_teardown(reverse_patch)
|
||||
|
||||
def _set_environ(self, env, name, value):
|
||||
if value is self.Unset:
|
||||
try:
|
||||
del env[name]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
env[name] = value
|
||||
|
||||
def monkey_patch_environ(self, name, value, env=os.environ):
|
||||
orig_value = env.get(name, self.Unset)
|
||||
self._set_environ(env, name, value)
|
||||
def reverse_patch():
|
||||
self._set_environ(env, name, orig_value)
|
||||
self._setup_stack.add_teardown(reverse_patch)
|
||||
|
||||
|
||||
class FixtureFactory(object):
|
||||
|
||||
def __init__(self):
|
||||
self._setup_stack = SetupStack()
|
||||
self._context_managers = {}
|
||||
self._fixtures = {}
|
||||
|
||||
def register_context_manager(self, name, context_manager):
|
||||
self._context_managers[name] = context_manager
|
||||
|
||||
def get_fixture(self, name, add_teardown):
|
||||
context_manager = self._context_managers[name]
|
||||
fixture = context_manager.__enter__()
|
||||
add_teardown(lambda: context_manager.__exit__(None, None, None))
|
||||
return fixture
|
||||
|
||||
def get_cached_fixture(self, name):
|
||||
fixture = self._fixtures.get(name)
|
||||
if fixture is None:
|
||||
fixture = self.get_fixture(name, self._setup_stack.add_teardown)
|
||||
self._fixtures[name] = fixture
|
||||
return fixture
|
||||
|
||||
def tear_down(self):
|
||||
self._setup_stack.tear_down()
|
||||
|
||||
|
||||
class TestCase(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self._setup_stack = SetupStack()
|
||||
self._monkey_patcher = MonkeyPatcher(self._setup_stack)
|
||||
|
||||
def tearDown(self):
|
||||
self._setup_stack.tear_down()
|
||||
|
||||
def register_context_manager(self, name, context_manager):
|
||||
return self.fixture_factory.register_context_manager(
|
||||
name, context_manager)
|
||||
|
||||
def get_fixture(self, name):
|
||||
return self.fixture_factory.get_fixture(name, self.add_teardown)
|
||||
|
||||
def get_cached_fixture(self, name):
|
||||
return self.fixture_factory.get_cached_fixture(name)
|
||||
|
||||
def add_teardown(self, *args, **kwds):
|
||||
self._setup_stack.add_teardown(*args, **kwds)
|
||||
|
||||
def make_temp_dir(self, *args, **kwds):
|
||||
return TempDirMaker(self._setup_stack).make_temp_dir(*args, **kwds)
|
||||
|
||||
def monkey_patch(self, *args, **kwds):
|
||||
return self._monkey_patcher.monkey_patch(*args, **kwds)
|
||||
|
||||
def monkey_patch_environ(self, *args, **kwds):
|
||||
return self._monkey_patcher.monkey_patch_environ(*args, **kwds)
|
||||
|
||||
def assert_contains(self, container, containee):
|
||||
self.assertTrue(containee in container, "%r not in %r" %
|
||||
(containee, container))
|
||||
|
||||
def assert_less_than(self, got, expected):
|
||||
self.assertTrue(got < expected, "%r >= %r" %
|
||||
(got, expected))
|
||||
|
||||
|
||||
# http://lackingrhoticity.blogspot.com/2009/01/testing-using-golden-files-in-python.html
|
||||
|
||||
class GoldenTestCase(TestCase):
|
||||
|
||||
run_meld = False
|
||||
|
||||
def assert_golden(self, dir_got, dir_expect):
|
||||
assert os.path.exists(dir_expect), dir_expect
|
||||
proc = subprocess.Popen(["diff", "--recursive", "-u", "-N",
|
||||
"--exclude=.*", dir_expect, dir_got],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, stderr = proc.communicate()
|
||||
if len(stdout) > 0:
|
||||
if self.run_meld:
|
||||
# Put expected output on the right because that is the
|
||||
# side we usually edit.
|
||||
subprocess.call(["meld", dir_got, dir_expect])
|
||||
raise AssertionError(
|
||||
"Differences from golden files found.\n"
|
||||
"Try running with --meld to update golden files.\n"
|
||||
"%s" % stdout)
|
||||
self.assertEquals(proc.wait(), 0)
|
||||
@@ -1,50 +0,0 @@
|
||||
# urllib2 work-alike interface
|
||||
# ...from urllib2...
|
||||
from urllib2 import \
|
||||
URLError, \
|
||||
HTTPError
|
||||
# ...and from mechanize
|
||||
from _auth import \
|
||||
HTTPProxyPasswordMgr, \
|
||||
HTTPSClientCertMgr
|
||||
from _debug import \
|
||||
HTTPResponseDebugProcessor, \
|
||||
HTTPRedirectDebugProcessor
|
||||
# crap ATM
|
||||
## from _gzip import \
|
||||
## HTTPGzipProcessor
|
||||
from _urllib2_fork import \
|
||||
AbstractBasicAuthHandler, \
|
||||
AbstractDigestAuthHandler, \
|
||||
BaseHandler, \
|
||||
CacheFTPHandler, \
|
||||
FileHandler, \
|
||||
FTPHandler, \
|
||||
HTTPBasicAuthHandler, \
|
||||
HTTPCookieProcessor, \
|
||||
HTTPDefaultErrorHandler, \
|
||||
HTTPDigestAuthHandler, \
|
||||
HTTPErrorProcessor, \
|
||||
HTTPHandler, \
|
||||
HTTPPasswordMgr, \
|
||||
HTTPPasswordMgrWithDefaultRealm, \
|
||||
HTTPRedirectHandler, \
|
||||
ProxyBasicAuthHandler, \
|
||||
ProxyDigestAuthHandler, \
|
||||
ProxyHandler, \
|
||||
UnknownHandler
|
||||
from _http import \
|
||||
HTTPEquivProcessor, \
|
||||
HTTPRefererProcessor, \
|
||||
HTTPRefreshProcessor, \
|
||||
HTTPRobotRulesProcessor, \
|
||||
RobotExclusionError
|
||||
import httplib
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
from _urllib2_fork import HTTPSHandler
|
||||
del httplib
|
||||
from _opener import OpenerDirector, \
|
||||
SeekableResponseOpener, \
|
||||
build_opener, install_opener, urlopen
|
||||
from _request import \
|
||||
Request
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,367 +0,0 @@
|
||||
"""Convenient HTTP UserAgent class.
|
||||
|
||||
This is a subclass of urllib2.OpenerDirector.
|
||||
|
||||
|
||||
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it under
|
||||
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
||||
included with the distribution).
|
||||
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
import _auth
|
||||
import _gzip
|
||||
import _opener
|
||||
import _response
|
||||
import _sockettimeout
|
||||
import _urllib2
|
||||
|
||||
|
||||
class UserAgentBase(_opener.OpenerDirector):
|
||||
"""Convenient user-agent class.
|
||||
|
||||
Do not use .add_handler() to add a handler for something already dealt with
|
||||
by this code.
|
||||
|
||||
The only reason at present for the distinction between UserAgent and
|
||||
UserAgentBase is so that classes that depend on .seek()able responses
|
||||
(e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass
|
||||
UserAgent exposes a .set_seekable_responses() method that allows switching
|
||||
off the adding of a .seek() method to responses.
|
||||
|
||||
Public attributes:
|
||||
|
||||
addheaders: list of (name, value) pairs specifying headers to send with
|
||||
every request, unless they are overridden in the Request instance.
|
||||
|
||||
>>> ua = UserAgentBase()
|
||||
>>> ua.addheaders = [
|
||||
... ("User-agent", "Mozilla/5.0 (compatible)"),
|
||||
... ("From", "responsible.person@example.com")]
|
||||
|
||||
"""
|
||||
|
||||
handler_classes = {
|
||||
# scheme handlers
|
||||
"http": _urllib2.HTTPHandler,
|
||||
# CacheFTPHandler is buggy, at least in 2.3, so we don't use it
|
||||
"ftp": _urllib2.FTPHandler,
|
||||
"file": _urllib2.FileHandler,
|
||||
|
||||
# other handlers
|
||||
"_unknown": _urllib2.UnknownHandler,
|
||||
# HTTP{S,}Handler depend on HTTPErrorProcessor too
|
||||
"_http_error": _urllib2.HTTPErrorProcessor,
|
||||
"_http_default_error": _urllib2.HTTPDefaultErrorHandler,
|
||||
|
||||
# feature handlers
|
||||
"_basicauth": _urllib2.HTTPBasicAuthHandler,
|
||||
"_digestauth": _urllib2.HTTPDigestAuthHandler,
|
||||
"_redirect": _urllib2.HTTPRedirectHandler,
|
||||
"_cookies": _urllib2.HTTPCookieProcessor,
|
||||
"_refresh": _urllib2.HTTPRefreshProcessor,
|
||||
"_equiv": _urllib2.HTTPEquivProcessor,
|
||||
"_proxy": _urllib2.ProxyHandler,
|
||||
"_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
|
||||
"_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
|
||||
"_robots": _urllib2.HTTPRobotRulesProcessor,
|
||||
"_gzip": _gzip.HTTPGzipProcessor, # experimental!
|
||||
|
||||
# debug handlers
|
||||
"_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
|
||||
"_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
|
||||
}
|
||||
|
||||
default_schemes = ["http", "ftp", "file"]
|
||||
default_others = ["_unknown", "_http_error", "_http_default_error"]
|
||||
default_features = ["_redirect", "_cookies",
|
||||
"_refresh", "_equiv",
|
||||
"_basicauth", "_digestauth",
|
||||
"_proxy", "_proxy_basicauth", "_proxy_digestauth",
|
||||
"_robots",
|
||||
]
|
||||
if hasattr(_urllib2, 'HTTPSHandler'):
|
||||
handler_classes["https"] = _urllib2.HTTPSHandler
|
||||
default_schemes.append("https")
|
||||
|
||||
def __init__(self):
|
||||
_opener.OpenerDirector.__init__(self)
|
||||
|
||||
ua_handlers = self._ua_handlers = {}
|
||||
for scheme in (self.default_schemes+
|
||||
self.default_others+
|
||||
self.default_features):
|
||||
klass = self.handler_classes[scheme]
|
||||
ua_handlers[scheme] = klass()
|
||||
for handler in ua_handlers.itervalues():
|
||||
self.add_handler(handler)
|
||||
|
||||
# Yuck.
|
||||
# Ensure correct default constructor args were passed to
|
||||
# HTTPRefreshProcessor and HTTPEquivProcessor.
|
||||
if "_refresh" in ua_handlers:
|
||||
self.set_handle_refresh(True)
|
||||
if "_equiv" in ua_handlers:
|
||||
self.set_handle_equiv(True)
|
||||
# Ensure default password managers are installed.
|
||||
pm = ppm = None
|
||||
if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
|
||||
pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
|
||||
if ("_proxy_basicauth" in ua_handlers or
|
||||
"_proxy_digestauth" in ua_handlers):
|
||||
ppm = _auth.HTTPProxyPasswordMgr()
|
||||
self.set_password_manager(pm)
|
||||
self.set_proxy_password_manager(ppm)
|
||||
# set default certificate manager
|
||||
if "https" in ua_handlers:
|
||||
cm = _urllib2.HTTPSClientCertMgr()
|
||||
self.set_client_cert_manager(cm)
|
||||
|
||||
def close(self):
|
||||
_opener.OpenerDirector.close(self)
|
||||
self._ua_handlers = None
|
||||
|
||||
# XXX
|
||||
## def set_timeout(self, timeout):
|
||||
## self._timeout = timeout
|
||||
## def set_http_connection_cache(self, conn_cache):
|
||||
## self._http_conn_cache = conn_cache
|
||||
## def set_ftp_connection_cache(self, conn_cache):
|
||||
## # XXX ATM, FTP has cache as part of handler; should it be separate?
|
||||
## self._ftp_conn_cache = conn_cache
|
||||
|
||||
def set_handled_schemes(self, schemes):
|
||||
"""Set sequence of URL scheme (protocol) strings.
|
||||
|
||||
For example: ua.set_handled_schemes(["http", "ftp"])
|
||||
|
||||
If this fails (with ValueError) because you've passed an unknown
|
||||
scheme, the set of handled schemes will not be changed.
|
||||
|
||||
"""
|
||||
want = {}
|
||||
for scheme in schemes:
|
||||
if scheme.startswith("_"):
|
||||
raise ValueError("not a scheme '%s'" % scheme)
|
||||
if scheme not in self.handler_classes:
|
||||
raise ValueError("unknown scheme '%s'")
|
||||
want[scheme] = None
|
||||
|
||||
# get rid of scheme handlers we don't want
|
||||
for scheme, oldhandler in self._ua_handlers.items():
|
||||
if scheme.startswith("_"): continue # not a scheme handler
|
||||
if scheme not in want:
|
||||
self._replace_handler(scheme, None)
|
||||
else:
|
||||
del want[scheme] # already got it
|
||||
# add the scheme handlers that are missing
|
||||
for scheme in want.keys():
|
||||
self._set_handler(scheme, True)
|
||||
|
||||
def set_cookiejar(self, cookiejar):
|
||||
"""Set a mechanize.CookieJar, or None."""
|
||||
self._set_handler("_cookies", obj=cookiejar)
|
||||
|
||||
# XXX could use Greg Stein's httpx for some of this instead?
|
||||
# or httplib2??
|
||||
def set_proxies(self, proxies=None, proxy_bypass=None):
|
||||
"""Configure proxy settings.
|
||||
|
||||
proxies: dictionary mapping URL scheme to proxy specification. None
|
||||
means use the default system-specific settings.
|
||||
proxy_bypass: function taking hostname, returning whether proxy should
|
||||
be used. None means use the default system-specific settings.
|
||||
|
||||
The default is to try to obtain proxy settings from the system (see the
|
||||
documentation for urllib.urlopen for information about the
|
||||
system-specific methods used -- note that's urllib, not urllib2).
|
||||
|
||||
To avoid all use of proxies, pass an empty proxies dict.
|
||||
|
||||
>>> ua = UserAgentBase()
|
||||
>>> def proxy_bypass(hostname):
|
||||
... return hostname == "noproxy.com"
|
||||
>>> ua.set_proxies(
|
||||
... {"http": "joe:password@myproxy.example.com:3128",
|
||||
... "ftp": "proxy.example.com"},
|
||||
... proxy_bypass)
|
||||
|
||||
"""
|
||||
self._set_handler("_proxy", True,
|
||||
constructor_kwds=dict(proxies=proxies,
|
||||
proxy_bypass=proxy_bypass))
|
||||
|
||||
def add_password(self, url, user, password, realm=None):
|
||||
self._password_manager.add_password(realm, url, user, password)
|
||||
def add_proxy_password(self, user, password, hostport=None, realm=None):
|
||||
self._proxy_password_manager.add_password(
|
||||
realm, hostport, user, password)
|
||||
|
||||
def add_client_certificate(self, url, key_file, cert_file):
|
||||
"""Add an SSL client certificate, for HTTPS client auth.
|
||||
|
||||
key_file and cert_file must be filenames of the key and certificate
|
||||
files, in PEM format. You can use e.g. OpenSSL to convert a p12 (PKCS
|
||||
12) file to PEM format:
|
||||
|
||||
openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem
|
||||
openssl pkcs12 -nocerts -in cert.p12 -out key.pem
|
||||
|
||||
|
||||
Note that client certificate password input is very inflexible ATM. At
|
||||
the moment this seems to be console only, which is presumably the
|
||||
default behaviour of libopenssl. In future mechanize may support
|
||||
third-party libraries that (I assume) allow more options here.
|
||||
|
||||
"""
|
||||
self._client_cert_manager.add_key_cert(url, key_file, cert_file)
|
||||
|
||||
# the following are rarely useful -- use add_password / add_proxy_password
|
||||
# instead
|
||||
def set_password_manager(self, password_manager):
|
||||
"""Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
|
||||
self._password_manager = password_manager
|
||||
self._set_handler("_basicauth", obj=password_manager)
|
||||
self._set_handler("_digestauth", obj=password_manager)
|
||||
def set_proxy_password_manager(self, password_manager):
|
||||
"""Set a mechanize.HTTPProxyPasswordMgr, or None."""
|
||||
self._proxy_password_manager = password_manager
|
||||
self._set_handler("_proxy_basicauth", obj=password_manager)
|
||||
self._set_handler("_proxy_digestauth", obj=password_manager)
|
||||
def set_client_cert_manager(self, cert_manager):
|
||||
"""Set a mechanize.HTTPClientCertMgr, or None."""
|
||||
self._client_cert_manager = cert_manager
|
||||
handler = self._ua_handlers["https"]
|
||||
handler.client_cert_manager = cert_manager
|
||||
|
||||
# these methods all take a boolean parameter
|
||||
def set_handle_robots(self, handle):
|
||||
"""Set whether to observe rules from robots.txt."""
|
||||
self._set_handler("_robots", handle)
|
||||
def set_handle_redirect(self, handle):
|
||||
"""Set whether to handle HTTP 30x redirections."""
|
||||
self._set_handler("_redirect", handle)
|
||||
def set_handle_refresh(self, handle, max_time=None, honor_time=True):
|
||||
"""Set whether to handle HTTP Refresh headers."""
|
||||
self._set_handler("_refresh", handle, constructor_kwds=
|
||||
{"max_time": max_time, "honor_time": honor_time})
|
||||
def set_handle_equiv(self, handle, head_parser_class=None):
|
||||
"""Set whether to treat HTML http-equiv headers like HTTP headers.
|
||||
|
||||
Response objects may be .seek()able if this is set (currently returned
|
||||
responses are, raised HTTPError exception responses are not).
|
||||
|
||||
"""
|
||||
if head_parser_class is not None:
|
||||
constructor_kwds = {"head_parser_class": head_parser_class}
|
||||
else:
|
||||
constructor_kwds={}
|
||||
self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
|
||||
def set_handle_gzip(self, handle):
|
||||
"""Handle gzip transfer encoding.
|
||||
|
||||
"""
|
||||
if handle:
|
||||
warnings.warn(
|
||||
"gzip transfer encoding is experimental!", stacklevel=2)
|
||||
self._set_handler("_gzip", handle)
|
||||
def set_debug_redirects(self, handle):
|
||||
"""Log information about HTTP redirects (including refreshes).
|
||||
|
||||
Logging is performed using module logging. The logger name is
|
||||
"mechanize.http_redirects". To actually print some debug output,
|
||||
eg:
|
||||
|
||||
import sys, logging
|
||||
logger = logging.getLogger("mechanize.http_redirects")
|
||||
logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
Other logger names relevant to this module:
|
||||
|
||||
"mechanize.http_responses"
|
||||
"mechanize.cookies"
|
||||
|
||||
To turn on everything:
|
||||
|
||||
import sys, logging
|
||||
logger = logging.getLogger("mechanize")
|
||||
logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
"""
|
||||
self._set_handler("_debug_redirect", handle)
|
||||
def set_debug_responses(self, handle):
|
||||
"""Log HTTP response bodies.
|
||||
|
||||
See docstring for .set_debug_redirects() for details of logging.
|
||||
|
||||
Response objects may be .seek()able if this is set (currently returned
|
||||
responses are, raised HTTPError exception responses are not).
|
||||
|
||||
"""
|
||||
self._set_handler("_debug_response_body", handle)
|
||||
def set_debug_http(self, handle):
|
||||
"""Print HTTP headers to sys.stdout."""
|
||||
level = int(bool(handle))
|
||||
for scheme in "http", "https":
|
||||
h = self._ua_handlers.get(scheme)
|
||||
if h is not None:
|
||||
h.set_http_debuglevel(level)
|
||||
|
||||
def _set_handler(self, name, handle=None, obj=None,
|
||||
constructor_args=(), constructor_kwds={}):
|
||||
if handle is None:
|
||||
handle = obj is not None
|
||||
if handle:
|
||||
handler_class = self.handler_classes[name]
|
||||
if obj is not None:
|
||||
newhandler = handler_class(obj)
|
||||
else:
|
||||
newhandler = handler_class(
|
||||
*constructor_args, **constructor_kwds)
|
||||
else:
|
||||
newhandler = None
|
||||
self._replace_handler(name, newhandler)
|
||||
|
||||
def _replace_handler(self, name, newhandler=None):
|
||||
# first, if handler was previously added, remove it
|
||||
if name is not None:
|
||||
handler = self._ua_handlers.get(name)
|
||||
if handler:
|
||||
try:
|
||||
self.handlers.remove(handler)
|
||||
except ValueError:
|
||||
pass
|
||||
# then add the replacement, if any
|
||||
if newhandler is not None:
|
||||
self.add_handler(newhandler)
|
||||
self._ua_handlers[name] = newhandler
|
||||
|
||||
|
||||
class UserAgent(UserAgentBase):
|
||||
|
||||
def __init__(self):
|
||||
UserAgentBase.__init__(self)
|
||||
self._seekable = False
|
||||
|
||||
def set_seekable_responses(self, handle):
|
||||
"""Make response objects .seek()able."""
|
||||
self._seekable = bool(handle)
|
||||
|
||||
def open(self, fullurl, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
if self._seekable:
|
||||
def bound_open(fullurl, data=None,
|
||||
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
||||
return UserAgentBase.open(self, fullurl, data, timeout)
|
||||
response = _opener.wrapped_open(
|
||||
bound_open, _response.seek_wrapped_response, fullurl, data,
|
||||
timeout)
|
||||
else:
|
||||
response = UserAgentBase.open(self, fullurl, data)
|
||||
return response
|
||||
@@ -1,305 +0,0 @@
|
||||
"""Utility functions and date/time routines.
|
||||
|
||||
Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
||||
|
||||
This code is free software; you can redistribute it and/or modify it
|
||||
under the terms of the BSD or ZPL 2.1 licenses (see the file
|
||||
COPYING.txt included with the distribution).
|
||||
"""
|
||||
|
||||
import re
|
||||
import time
|
||||
import warnings
|
||||
|
||||
|
||||
class ExperimentalWarning(UserWarning):
|
||||
pass
|
||||
|
||||
def experimental(message):
|
||||
warnings.warn(message, ExperimentalWarning, stacklevel=3)
|
||||
def hide_experimental_warnings():
|
||||
warnings.filterwarnings("ignore", category=ExperimentalWarning)
|
||||
def reset_experimental_warnings():
|
||||
warnings.filterwarnings("default", category=ExperimentalWarning)
|
||||
|
||||
def deprecation(message):
|
||||
warnings.warn(message, DeprecationWarning, stacklevel=3)
|
||||
def hide_deprecations():
|
||||
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
||||
def reset_deprecations():
|
||||
warnings.filterwarnings("default", category=DeprecationWarning)
|
||||
|
||||
|
||||
def write_file(filename, data):
|
||||
f = open(filename, "wb")
|
||||
try:
|
||||
f.write(data)
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
|
||||
def get1(sequence):
|
||||
assert len(sequence) == 1
|
||||
return sequence[0]
|
||||
|
||||
|
||||
def isstringlike(x):
|
||||
try: x+""
|
||||
except: return False
|
||||
else: return True
|
||||
|
||||
## def caller():
|
||||
## try:
|
||||
## raise SyntaxError
|
||||
## except:
|
||||
## import sys
|
||||
## return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
|
||||
|
||||
|
||||
from calendar import timegm
|
||||
|
||||
# Date/time conversion routines for formats used by the HTTP protocol.
|
||||
|
||||
EPOCH = 1970
|
||||
def my_timegm(tt):
|
||||
year, month, mday, hour, min, sec = tt[:6]
|
||||
if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
|
||||
(0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
|
||||
return timegm(tt)
|
||||
else:
|
||||
return None
|
||||
|
||||
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
||||
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
|
||||
months_lower = []
|
||||
for month in months: months_lower.append(month.lower())
|
||||
|
||||
|
||||
def time2isoz(t=None):
|
||||
"""Return a string representing time in seconds since epoch, t.
|
||||
|
||||
If the function is called without an argument, it will use the current
|
||||
time.
|
||||
|
||||
The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
|
||||
representing Universal Time (UTC, aka GMT). An example of this format is:
|
||||
|
||||
1994-11-24 08:49:37Z
|
||||
|
||||
"""
|
||||
if t is None: t = time.time()
|
||||
year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
|
||||
return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
|
||||
year, mon, mday, hour, min, sec)
|
||||
|
||||
def time2netscape(t=None):
|
||||
"""Return a string representing time in seconds since epoch, t.
|
||||
|
||||
If the function is called without an argument, it will use the current
|
||||
time.
|
||||
|
||||
The format of the returned string is like this:
|
||||
|
||||
Wed, DD-Mon-YYYY HH:MM:SS GMT
|
||||
|
||||
"""
|
||||
if t is None: t = time.time()
|
||||
year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
|
||||
return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
|
||||
days[wday], mday, months[mon-1], year, hour, min, sec)
|
||||
|
||||
|
||||
UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
|
||||
|
||||
timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
|
||||
def offset_from_tz_string(tz):
|
||||
offset = None
|
||||
if UTC_ZONES.has_key(tz):
|
||||
offset = 0
|
||||
else:
|
||||
m = timezone_re.search(tz)
|
||||
if m:
|
||||
offset = 3600 * int(m.group(2))
|
||||
if m.group(3):
|
||||
offset = offset + 60 * int(m.group(3))
|
||||
if m.group(1) == '-':
|
||||
offset = -offset
|
||||
return offset
|
||||
|
||||
def _str2time(day, mon, yr, hr, min, sec, tz):
|
||||
# translate month name to number
|
||||
# month numbers start with 1 (January)
|
||||
try:
|
||||
mon = months_lower.index(mon.lower())+1
|
||||
except ValueError:
|
||||
# maybe it's already a number
|
||||
try:
|
||||
imon = int(mon)
|
||||
except ValueError:
|
||||
return None
|
||||
if 1 <= imon <= 12:
|
||||
mon = imon
|
||||
else:
|
||||
return None
|
||||
|
||||
# make sure clock elements are defined
|
||||
if hr is None: hr = 0
|
||||
if min is None: min = 0
|
||||
if sec is None: sec = 0
|
||||
|
||||
yr = int(yr)
|
||||
day = int(day)
|
||||
hr = int(hr)
|
||||
min = int(min)
|
||||
sec = int(sec)
|
||||
|
||||
if yr < 1000:
|
||||
# find "obvious" year
|
||||
cur_yr = time.localtime(time.time())[0]
|
||||
m = cur_yr % 100
|
||||
tmp = yr
|
||||
yr = yr + cur_yr - m
|
||||
m = m - tmp
|
||||
if abs(m) > 50:
|
||||
if m > 0: yr = yr + 100
|
||||
else: yr = yr - 100
|
||||
|
||||
# convert UTC time tuple to seconds since epoch (not timezone-adjusted)
|
||||
t = my_timegm((yr, mon, day, hr, min, sec, tz))
|
||||
|
||||
if t is not None:
|
||||
# adjust time using timezone string, to get absolute time since epoch
|
||||
if tz is None:
|
||||
tz = "UTC"
|
||||
tz = tz.upper()
|
||||
offset = offset_from_tz_string(tz)
|
||||
if offset is None:
|
||||
return None
|
||||
t = t - offset
|
||||
|
||||
return t
|
||||
|
||||
|
||||
strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
|
||||
r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
|
||||
wkday_re = re.compile(
|
||||
r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
|
||||
loose_http_re = re.compile(
|
||||
r"""^
|
||||
(\d\d?) # day
|
||||
(?:\s+|[-\/])
|
||||
(\w+) # month
|
||||
(?:\s+|[-\/])
|
||||
(\d+) # year
|
||||
(?:
|
||||
(?:\s+|:) # separator before clock
|
||||
(\d\d?):(\d\d) # hour:min
|
||||
(?::(\d\d))? # optional seconds
|
||||
)? # optional clock
|
||||
\s*
|
||||
([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
|
||||
\s*
|
||||
(?:\(\w+\))? # ASCII representation of timezone in parens.
|
||||
\s*$""", re.X)
|
||||
def http2time(text):
|
||||
"""Returns time in seconds since epoch of time represented by a string.
|
||||
|
||||
Return value is an integer.
|
||||
|
||||
None is returned if the format of str is unrecognized, the time is outside
|
||||
the representable range, or the timezone string is not recognized. If the
|
||||
string contains no timezone, UTC is assumed.
|
||||
|
||||
The timezone in the string may be numerical (like "-0800" or "+0100") or a
|
||||
string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
|
||||
timezone strings equivalent to UTC (zero offset) are known to the function.
|
||||
|
||||
The function loosely parses the following formats:
|
||||
|
||||
Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
|
||||
Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
|
||||
Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
|
||||
09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
|
||||
08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
|
||||
08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
|
||||
|
||||
The parser ignores leading and trailing whitespace. The time may be
|
||||
absent.
|
||||
|
||||
If the year is given with only 2 digits, the function will select the
|
||||
century that makes the year closest to the current date.
|
||||
|
||||
"""
|
||||
# fast exit for strictly conforming string
|
||||
m = strict_re.search(text)
|
||||
if m:
|
||||
g = m.groups()
|
||||
mon = months_lower.index(g[1].lower()) + 1
|
||||
tt = (int(g[2]), mon, int(g[0]),
|
||||
int(g[3]), int(g[4]), float(g[5]))
|
||||
return my_timegm(tt)
|
||||
|
||||
# No, we need some messy parsing...
|
||||
|
||||
# clean up
|
||||
text = text.lstrip()
|
||||
text = wkday_re.sub("", text, 1) # Useless weekday
|
||||
|
||||
# tz is time zone specifier string
|
||||
day, mon, yr, hr, min, sec, tz = [None]*7
|
||||
|
||||
# loose regexp parse
|
||||
m = loose_http_re.search(text)
|
||||
if m is not None:
|
||||
day, mon, yr, hr, min, sec, tz = m.groups()
|
||||
else:
|
||||
return None # bad format
|
||||
|
||||
return _str2time(day, mon, yr, hr, min, sec, tz)
|
||||
|
||||
|
||||
iso_re = re.compile(
|
||||
"""^
|
||||
(\d{4}) # year
|
||||
[-\/]?
|
||||
(\d\d?) # numerical month
|
||||
[-\/]?
|
||||
(\d\d?) # day
|
||||
(?:
|
||||
(?:\s+|[-:Tt]) # separator before clock
|
||||
(\d\d?):?(\d\d) # hour:min
|
||||
(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
|
||||
)? # optional clock
|
||||
\s*
|
||||
([-+]?\d\d?:?(:?\d\d)?
|
||||
|Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
|
||||
\s*$""", re.X)
|
||||
def iso2time(text):
|
||||
"""
|
||||
As for http2time, but parses the ISO 8601 formats:
|
||||
|
||||
1994-02-03 14:15:29 -0100 -- ISO 8601 format
|
||||
1994-02-03 14:15:29 -- zone is optional
|
||||
1994-02-03 -- only date
|
||||
1994-02-03T14:15:29 -- Use T as separator
|
||||
19940203T141529Z -- ISO 8601 compact format
|
||||
19940203 -- only date
|
||||
|
||||
"""
|
||||
# clean up
|
||||
text = text.lstrip()
|
||||
|
||||
# tz is time zone specifier string
|
||||
day, mon, yr, hr, min, sec, tz = [None]*7
|
||||
|
||||
# loose regexp parse
|
||||
m = iso_re.search(text)
|
||||
if m is not None:
|
||||
# XXX there's an extra bit of the timezone I'm ignoring here: is
|
||||
# this the right thing to do?
|
||||
yr, mon, day, hr, min, sec, tz, _ = m.groups()
|
||||
else:
|
||||
return None # bad format
|
||||
|
||||
return _str2time(day, mon, yr, hr, min, sec, tz)
|
||||
@@ -1,2 +0,0 @@
|
||||
"0.2.5"
|
||||
__version__ = (0, 2, 5, None, None)
|
||||
@@ -516,7 +516,7 @@ def set_context_commands(item, parent_item):
|
||||
from_action=item.action).tourl())))
|
||||
# Añadir a Alfavoritos (Mis enlaces)
|
||||
if item.channel not in ["favorites", "videolibrary", "help", ""] and parent_item.channel != "favorites":
|
||||
context_commands.append(('[COLOR blue]Guardar enlace[/COLOR]', "XBMC.RunPlugin(%s?%s)" %
|
||||
context_commands.append(('[COLOR blue]%s[/COLOR]' % config.get_localized_string(70557), "XBMC.RunPlugin(%s?%s)" %
|
||||
(sys.argv[0], item.clone(channel="alfavorites", action="addFavourite",
|
||||
from_channel=item.channel,
|
||||
from_action=item.action).tourl())))
|
||||
@@ -538,7 +538,7 @@ def set_context_commands(item, parent_item):
|
||||
mediatype = 'tv'
|
||||
else:
|
||||
mediatype = item.contentType
|
||||
context_commands.append(("[COLOR yellow]Buscar Similares[/COLOR]", "XBMC.Container.Update (%s?%s)" % (
|
||||
context_commands.append(("[COLOR yellow]%s[/COLOR]" % config.get_localized_string(70561), "XBMC.Container.Update (%s?%s)" % (
|
||||
sys.argv[0], item.clone(channel='search', action='discover_list', search_type='list', page='1',
|
||||
list_type='%s/%s/similar' % (mediatype,item.infoLabels['tmdb_id'])).tourl())))
|
||||
|
||||
@@ -1044,6 +1044,8 @@ def torrent_client_installed(show_tuple=False):
|
||||
|
||||
def play_torrent(item, xlistitem, mediaurl):
|
||||
logger.info()
|
||||
import time
|
||||
|
||||
# Opciones disponibles para Reproducir torrents
|
||||
torrent_options = list()
|
||||
torrent_options.append(["Cliente interno (necesario libtorrent)"])
|
||||
@@ -1066,28 +1068,32 @@ def play_torrent(item, xlistitem, mediaurl):
|
||||
|
||||
# Plugins externos
|
||||
if seleccion > 1:
|
||||
|
||||
#### Compatibilidad con Kodi 18: evita cuelgues/cancelaciones cuando el .torrent se lanza desde pantalla convencional
|
||||
if xbmc.getCondVisibility('Window.IsMedia'):
|
||||
xbmcplugin.setResolvedUrl(int(sys.argv[1]), False, xlistitem) #Preparamos el entorno para evutar error Kod1 18
|
||||
time.sleep(1) #Dejamos que se ejecute
|
||||
|
||||
mediaurl = urllib.quote_plus(item.url)
|
||||
if ("quasar" in torrent_options[seleccion][1] or "elementum" in torrent_options[seleccion][1]) and item.infoLabels['tmdb_id']: #Llamada con más parámetros para completar el título
|
||||
if item.contentType == 'episode' and "elementum" not in torrent_options[seleccion][1]:
|
||||
mediaurl += "&episode=%s&library=&season=%s&show=%s&tmdb=%s&type=episode" % (item.infoLabels['episode'], item.infoLabels['season'], item.infoLabels['tmdb_id'], item.infoLabels['tmdb_id'])
|
||||
elif item.contentType == 'movie':
|
||||
mediaurl += "&library=&tmdb=%s&type=movie" % (item.infoLabels['tmdb_id'])
|
||||
xbmc.executebuiltin("PlayMedia(" + torrent_options[seleccion][1] % mediaurl + ")")
|
||||
|
||||
if "quasar" in torrent_options[seleccion][1] or "elementum" in torrent_options[seleccion][1]: #Seleccionamos que clientes torrent soportamos
|
||||
if item.strm_path: #Sólo si es de Videoteca
|
||||
import time
|
||||
time_limit = time.time() + 150 #Marcamos el timepo máx. de buffering
|
||||
while not is_playing() and time.time() < time_limit: #Esperamos mientra buffera
|
||||
time.sleep(5) #Repetimos cada intervalo
|
||||
#logger.debug(str(time_limit))
|
||||
|
||||
if is_playing(): #Ha terminado de bufferar o ha cancelado
|
||||
from platformcode import xbmc_videolibrary
|
||||
xbmc_videolibrary.mark_auto_as_watched(item) #Marcamos como visto al terminar
|
||||
#logger.debug("Llamado el marcado")
|
||||
#else:
|
||||
#logger.debug("Video cancelado o timeout")
|
||||
xbmc.executebuiltin("PlayMedia(" + torrent_options[seleccion][1] % mediaurl + ")")
|
||||
|
||||
#Seleccionamos que clientes torrent soportamos para el marcado de vídeos vistos
|
||||
if "quasar" in torrent_options[seleccion][1] or "elementum" in torrent_options[seleccion][1]:
|
||||
time_limit = time.time() + 150 #Marcamos el timepo máx. de buffering
|
||||
while not is_playing() and time.time() < time_limit: #Esperamos mientra buffera
|
||||
time.sleep(5) #Repetimos cada intervalo
|
||||
#logger.debug(str(time_limit))
|
||||
|
||||
if item.strm_path and is_playing(): #Sólo si es de Videoteca
|
||||
from platformcode import xbmc_videolibrary
|
||||
xbmc_videolibrary.mark_auto_as_watched(item) #Marcamos como visto al terminar
|
||||
#logger.debug("Llamado el marcado")
|
||||
|
||||
if seleccion == 1:
|
||||
from platformcode import mct
|
||||
|
||||
@@ -1734,7 +1734,7 @@ msgid "[COLOR %s]Filter configuration for TV series...[/COLOR]"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#60430"
|
||||
msgid "FILTRO: Delete '%s'"
|
||||
msgid "FILTER: Delete '%s'"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#60431"
|
||||
@@ -4804,14 +4804,142 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70527"
|
||||
msgid "Now in Theatres "
|
||||
msgid "My links"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70528"
|
||||
msgid "Movies by Genre"
|
||||
msgid "Default folder"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "tv show"
|
||||
msgid "Repeated link"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70530"
|
||||
msgid "You already have this link in the folder"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70531"
|
||||
msgid "Saved link"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70532"
|
||||
msgid "Folder: %s"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70533"
|
||||
msgid "Rename folder"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70534"
|
||||
msgid "Delete folder"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70535"
|
||||
msgid "Move up all"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70536"
|
||||
msgid "Move up"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70537"
|
||||
msgid "Move down"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70538"
|
||||
msgid "Move down all"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70539"
|
||||
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70540"
|
||||
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70541"
|
||||
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70542"
|
||||
msgid "Create new folder ..."
|
||||
msgstr "Creaa nuova cartella ..."
|
||||
|
||||
msgctxt "#70543"
|
||||
msgid "Move to another folder"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70544"
|
||||
msgid "Change title"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70545"
|
||||
msgid "Change color"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70546"
|
||||
msgid "Save link in:"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70547"
|
||||
msgid "Change thumbnail"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70548"
|
||||
msgid "Delete link"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70549"
|
||||
msgid "Select folder"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70550"
|
||||
msgid "Create new folder"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70551"
|
||||
msgid "Folder name"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70552"
|
||||
msgid "Delete the folder and links it contains?"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70553"
|
||||
msgid "Change link title"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70554"
|
||||
msgid "Select thumbnail:"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70555"
|
||||
msgid "Move link to:"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70556"
|
||||
msgid "%d links in folder"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70557"
|
||||
msgid "Save link"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70558"
|
||||
msgid "Select color:"
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70559"
|
||||
msgid "Now in Theatres "
|
||||
msgstr ""
|
||||
|
||||
msgctxt "#70560"
|
||||
msgid "Movies by Genre"
|
||||
msgstr "
|
||||
|
||||
msgctxt "#70561"
|
||||
msgid "Search Similar
|
||||
msgstr ""
|
||||
|
||||
|
||||
@@ -4792,14 +4792,143 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
|
||||
msgstr "Verifica dei contatori di video visti/non visti (deselezionare per verificare)"
|
||||
|
||||
msgctxt "#70527"
|
||||
msgid "My links"
|
||||
msgstr "I Miei Link"
|
||||
|
||||
msgctxt "#70528"
|
||||
msgid "Default folder"
|
||||
msgstr "Cartella di Default"
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "Repeated link"
|
||||
msgstr "Link ripetuto"
|
||||
|
||||
msgctxt "#70530"
|
||||
msgid "You already have this link in the folder"
|
||||
msgstr "C'è già un link nella cartella"
|
||||
|
||||
msgctxt "#70531"
|
||||
msgid "Saved link"
|
||||
msgstr "Link salvato"
|
||||
|
||||
msgctxt "#70532"
|
||||
msgid "Folder: %s"
|
||||
msgstr "Cartella: %s"
|
||||
|
||||
msgctxt "#70533"
|
||||
msgid "Rename folder"
|
||||
msgstr "Cambia nome alla cartella"
|
||||
|
||||
msgctxt "#70534"
|
||||
msgid "Delete folder"
|
||||
msgstr "Elimina la cartella"
|
||||
|
||||
msgctxt "#70535"
|
||||
msgid "Move up all"
|
||||
msgstr "Sposta tutto in alto"
|
||||
|
||||
msgctxt "#70536"
|
||||
msgid "Move up"
|
||||
msgstr "Sposta in su"
|
||||
|
||||
msgctxt "#70537"
|
||||
msgid "Move down"
|
||||
msgstr "Sposta in giù"
|
||||
|
||||
msgctxt "#70538"
|
||||
msgid "Move down all"
|
||||
msgstr "Sposta tutto in basso"
|
||||
|
||||
msgctxt "#70539"
|
||||
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
|
||||
msgstr "* Crea diverse cartelle per memorizzare i tuoi collegamenti preferiti all'interno di Icarus."
|
||||
|
||||
msgctxt "#70540"
|
||||
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
|
||||
msgstr "* Per aggiungere collegamenti alle cartelle accedi al menu contestuale da qualsiasi punto di Icarus."
|
||||
|
||||
msgctxt "#70541"
|
||||
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
|
||||
msgstr "* I collegamenti possono essere canali, sezioni all'interno dei canali, ricerche e persino film e serie, sebbene per quest'ultimo sia preferibile utilizzare la videoteca."
|
||||
|
||||
msgctxt "#70542"
|
||||
msgid "Create new folder ..."
|
||||
msgstr "Crea nuova cartella ..."
|
||||
|
||||
msgctxt "#70543"
|
||||
msgid "Move to another folder"
|
||||
msgstr "Sposta in altra cartella"
|
||||
|
||||
msgctxt "#70544"
|
||||
msgid "Change title"
|
||||
msgstr "Cambia titolo"
|
||||
|
||||
msgctxt "#70545"
|
||||
msgid "Change color"
|
||||
msgstr "Cambia colore"
|
||||
|
||||
msgctxt "#70546"
|
||||
msgid "Save link in:"
|
||||
msgstr "Salva link in:"
|
||||
|
||||
msgctxt "#70547"
|
||||
msgid "Change thumbnail"
|
||||
msgstr "Cambia thumbnail"
|
||||
|
||||
msgctxt "#70548"
|
||||
msgid "Delete link"
|
||||
msgstr "Elimina link"
|
||||
|
||||
msgctxt "#70549"
|
||||
msgid "Select folder"
|
||||
msgstr "Seleziona cartella"
|
||||
|
||||
msgctxt "#70550"
|
||||
msgid "Create new folder"
|
||||
msgstr "Crea nuova cartella"
|
||||
|
||||
msgctxt "#70551"
|
||||
msgid "Folder name"
|
||||
msgstr "Nome della cartella"
|
||||
|
||||
msgctxt "#70552"
|
||||
msgid "Delete the folder and links it contains?"
|
||||
msgstr "Eliminare la cartella con tutti i link?"
|
||||
|
||||
msgctxt "#70553"
|
||||
msgid "Change link title"
|
||||
msgstr "Cambia titolo del link"
|
||||
|
||||
msgctxt "#70554"
|
||||
msgid "Select thumbnail:"
|
||||
msgstr "Seleziona thumbnail:"
|
||||
|
||||
msgctxt "#70555"
|
||||
msgid "Move link to:"
|
||||
msgstr "Sposta link in:"
|
||||
|
||||
msgctxt "#70556"
|
||||
msgid "%d links in folder"
|
||||
msgstr "%d link nella cartella"
|
||||
|
||||
msgctxt "#70557"
|
||||
msgid "Save link"
|
||||
msgstr "Salva link"
|
||||
|
||||
msgctxt "#70558"
|
||||
msgid "Select color:"
|
||||
msgstr "Seleziona colore:"
|
||||
|
||||
msgctxt "#70559"
|
||||
msgid "Now in Theatres "
|
||||
msgstr "Oggi in Sala"
|
||||
|
||||
msgctxt "#70528"
|
||||
msgctxt "#70560"
|
||||
msgid "Movies by Genre"
|
||||
msgstr "Per genere"
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "tv show"
|
||||
msgstr "serie"
|
||||
msgctxt "#70561"
|
||||
msgid "Search Similar
|
||||
msgstr "Cerca Simili"
|
||||
|
||||
|
||||
|
||||
@@ -1734,7 +1734,7 @@ msgid "[COLOR %s]Filter configuration for TV series...[/COLOR]"
|
||||
msgstr "[COLOR %s]Configurar filtro para series...[/COLOR]"
|
||||
|
||||
msgctxt "#60430"
|
||||
msgid "FILTRO: Delete '%s'"
|
||||
msgid "FILTER: Delete '%s'"
|
||||
msgstr "FILTRO: Borrar '%s'"
|
||||
|
||||
msgctxt "#60431"
|
||||
@@ -4792,13 +4792,150 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
|
||||
msgstr "Verificación de los contadores de vídeos vistos/no vistos (desmarcar para verificar)"
|
||||
|
||||
msgctxt "#70527"
|
||||
msgid "My links"
|
||||
msgstr "Mis enlaces"
|
||||
|
||||
msgctxt "#70528"
|
||||
msgid "Default folder"
|
||||
msgstr "Carpeta por defecto"
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "Repeated link"
|
||||
msgstr "Enlace repetido"
|
||||
|
||||
msgctxt "#70530"
|
||||
msgid "You already have this link in the folder"
|
||||
msgstr "Ya tienes este enlace en la carpeta"
|
||||
|
||||
msgctxt "#70531"
|
||||
msgid "Saved link"
|
||||
msgstr "Guardado enlace"
|
||||
|
||||
msgctxt "#70532"
|
||||
msgid "Folder: %s"
|
||||
msgstr "Carpeta: %s"
|
||||
|
||||
msgctxt "#70533"
|
||||
msgid "Rename folder"
|
||||
msgstr "Cambiar nombre de la carpeta"
|
||||
|
||||
msgctxt "#70534"
|
||||
msgid "Delete folder"
|
||||
msgstr "Eliminar la carpeta"
|
||||
|
||||
msgctxt "#70535"
|
||||
msgid "Move up all"
|
||||
msgstr "Mover arriba del todo"
|
||||
|
||||
msgctxt "#70536"
|
||||
msgid "Move up"
|
||||
msgstr "Mover hacia arriba"
|
||||
|
||||
msgctxt "#70537"
|
||||
msgid "Move down"
|
||||
msgstr "Mover hacia abajo"
|
||||
|
||||
msgctxt "#70538"
|
||||
msgid "Move down all"
|
||||
msgstr "Mover abajo del todo"
|
||||
|
||||
msgctxt "#70539"
|
||||
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
|
||||
msgstr "* Crea diferentes carpetas para guardar tus enlaces favoritos dentro de Icarus.[CR]]"
|
||||
|
||||
msgctxt "#70540"
|
||||
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
|
||||
msgstr "* Para añadir enlaces a las carpetas accede al menú contextual desde cualquier punto de Icarus.[CR]"
|
||||
|
||||
msgctxt "#70541"
|
||||
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
|
||||
msgstr "* Los enlaces pueden ser canales, secciones dentro de los canales, búsquedas, e incluso películas y series aunque para esto último es preferible utilizar la videoteca."
|
||||
|
||||
msgctxt "#70542"
|
||||
msgid "Create new folder ..."
|
||||
msgstr "Crear nueva carpeta ..."
|
||||
|
||||
msgctxt "#70543"
|
||||
msgid "Move to another folder"
|
||||
msgstr "Mover a otra carpeta"
|
||||
|
||||
msgctxt "#70544"
|
||||
msgid "Change title"
|
||||
msgstr "Cambiar título"
|
||||
|
||||
msgctxt "#70545"
|
||||
msgid "Change color"
|
||||
msgstr "Cambiar color"
|
||||
|
||||
msgctxt "#70546"
|
||||
msgid "Save link in:"
|
||||
msgstr "Guardar enlace en:"
|
||||
|
||||
msgctxt "#70547"
|
||||
msgid "Change thumbnail"
|
||||
msgstr "Cambiar thumbnail"
|
||||
|
||||
msgctxt "#70548"
|
||||
msgid "Delete link"
|
||||
msgstr "Eliminar enlace"
|
||||
|
||||
msgctxt "#70549"
|
||||
msgid "Select folder"
|
||||
msgstr "Seleccionar carpeta"
|
||||
|
||||
msgctxt "#70550"
|
||||
msgid "Create new folder"
|
||||
msgstr "Crear nueva carpeta"
|
||||
|
||||
msgctxt "#70551"
|
||||
msgid "Folder name"
|
||||
msgstr "Nombre de la carpeta"
|
||||
|
||||
msgctxt "#70552"
|
||||
msgid "Delete the folder and links it contains?"
|
||||
msgstr "¿Borrar la carpeta y los enlaces que contiene?"
|
||||
|
||||
msgctxt "#70553"
|
||||
msgid "Change link title"
|
||||
msgstr "Cambiar título del enlace"
|
||||
|
||||
msgctxt "#70554"
|
||||
msgid "Select thumbnail:"
|
||||
msgstr "Seleccionar thumbnail:"
|
||||
|
||||
msgctxt "#70555"
|
||||
msgid "Move link to:"
|
||||
msgstr "Mover enlace a:"
|
||||
|
||||
msgctxt "#70556"
|
||||
msgid "%d links in folder"
|
||||
msgstr "%d enlaces en la carpeta"
|
||||
|
||||
msgctxt "#70557"
|
||||
msgid "Save link"
|
||||
msgstr "Guardar enlace"
|
||||
|
||||
msgctxt "#70558"
|
||||
msgid "Select color:"
|
||||
msgstr "Seleccionar color:"
|
||||
|
||||
msgctxt "#70559"
|
||||
msgid "Now in Theatres "
|
||||
msgstr "Ahora en cines"
|
||||
|
||||
msgctxt "#70528"
|
||||
msgctxt "#70560"
|
||||
msgid "Movies by Genre"
|
||||
msgstr "Por generos"
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "tv show"
|
||||
msgstr "serie"
|
||||
msgctxt "#70561"
|
||||
msgid "Search Similar
|
||||
msgstr "Buscar Similares"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1734,7 +1734,7 @@ msgid "[COLOR %s]Filter configuration for TV series...[/COLOR]"
|
||||
msgstr "[COLOR %s]Configurar filtro para series...[/COLOR]"
|
||||
|
||||
msgctxt "#60430"
|
||||
msgid "FILTRO: Delete '%s'"
|
||||
msgid "FILTER: Delete '%s'"
|
||||
msgstr "FILTRO: Borrar '%s'"
|
||||
|
||||
msgctxt "#60431"
|
||||
@@ -4792,13 +4792,150 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
|
||||
msgstr "Verificación de los contadores de vídeos vistos/no vistos (desmarcar para verificar)"
|
||||
|
||||
msgctxt "#70527"
|
||||
msgid "My links"
|
||||
msgstr "Mis enlaces"
|
||||
|
||||
msgctxt "#70528"
|
||||
msgid "Default folder"
|
||||
msgstr "Carpeta por defecto"
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "Repeated link"
|
||||
msgstr "Enlace repetido"
|
||||
|
||||
msgctxt "#70530"
|
||||
msgid "You already have this link in the folder"
|
||||
msgstr "Ya tienes este enlace en la carpeta"
|
||||
|
||||
msgctxt "#70531"
|
||||
msgid "Saved link"
|
||||
msgstr "Guardado enlace"
|
||||
|
||||
msgctxt "#70532"
|
||||
msgid "Folder: %s"
|
||||
msgstr "Carpeta: %s"
|
||||
|
||||
msgctxt "#70533"
|
||||
msgid "Rename folder"
|
||||
msgstr "Cambiar nombre de la carpeta"
|
||||
|
||||
msgctxt "#70534"
|
||||
msgid "Delete folder"
|
||||
msgstr "Eliminar la carpeta"
|
||||
|
||||
msgctxt "#70535"
|
||||
msgid "Move up all"
|
||||
msgstr "Mover arriba del todo"
|
||||
|
||||
msgctxt "#70536"
|
||||
msgid "Move up"
|
||||
msgstr "Mover hacia arriba"
|
||||
|
||||
msgctxt "#70537"
|
||||
msgid "Move down"
|
||||
msgstr "Mover hacia abajo"
|
||||
|
||||
msgctxt "#70538"
|
||||
msgid "Move down all"
|
||||
msgstr "Mover abajo del todo"
|
||||
|
||||
msgctxt "#70539"
|
||||
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
|
||||
msgstr "* Crea diferentes carpetas para guardar tus enlaces favoritos dentro de Icarus.[CR]]"
|
||||
|
||||
msgctxt "#70540"
|
||||
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
|
||||
msgstr "* Para añadir enlaces a las carpetas accede al menú contextual desde cualquier punto de Icarus.[CR]"
|
||||
|
||||
msgctxt "#70541"
|
||||
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
|
||||
msgstr "* Los enlaces pueden ser canales, secciones dentro de los canales, búsquedas, e incluso películas y series aunque para esto último es preferible utilizar la videoteca."
|
||||
|
||||
msgctxt "#70542"
|
||||
msgid "Create new folder ..."
|
||||
msgstr "Crear nueva carpeta ..."
|
||||
|
||||
msgctxt "#70543"
|
||||
msgid "Move to another folder"
|
||||
msgstr "Mover a otra carpeta"
|
||||
|
||||
msgctxt "#70544"
|
||||
msgid "Change title"
|
||||
msgstr "Cambiar título"
|
||||
|
||||
msgctxt "#70545"
|
||||
msgid "Change color"
|
||||
msgstr "Cambiar color"
|
||||
|
||||
msgctxt "#70546"
|
||||
msgid "Save link in:"
|
||||
msgstr "Guardar enlace en:"
|
||||
|
||||
msgctxt "#70547"
|
||||
msgid "Change thumbnail"
|
||||
msgstr "Cambiar thumbnail"
|
||||
|
||||
msgctxt "#70548"
|
||||
msgid "Delete link"
|
||||
msgstr "Eliminar enlace"
|
||||
|
||||
msgctxt "#70549"
|
||||
msgid "Select folder"
|
||||
msgstr "Seleccionar carpeta"
|
||||
|
||||
msgctxt "#70550"
|
||||
msgid "Create new folder"
|
||||
msgstr "Crear nueva carpeta"
|
||||
|
||||
msgctxt "#70551"
|
||||
msgid "Folder name"
|
||||
msgstr "Nombre de la carpeta"
|
||||
|
||||
msgctxt "#70552"
|
||||
msgid "Delete the folder and links it contains?"
|
||||
msgstr "¿Borrar la carpeta y los enlaces que contiene?"
|
||||
|
||||
msgctxt "#70553"
|
||||
msgid "Change link title"
|
||||
msgstr "Cambiar título del enlace"
|
||||
|
||||
msgctxt "#70554"
|
||||
msgid "Select thumbnail:"
|
||||
msgstr "Seleccionar thumbnail:"
|
||||
|
||||
msgctxt "#70555"
|
||||
msgid "Move link to:"
|
||||
msgstr "Mover enlace a:"
|
||||
|
||||
msgctxt "#70556"
|
||||
msgid "%d links in folder"
|
||||
msgstr "%d enlaces en la carpeta"
|
||||
|
||||
msgctxt "#70557"
|
||||
msgid "Save link"
|
||||
msgstr "Guardar enlace"
|
||||
|
||||
msgctxt "#70558"
|
||||
msgid "Select color:"
|
||||
msgstr "Seleccionar color:"
|
||||
|
||||
msgctxt "#70559"
|
||||
msgid "Now in Theatres "
|
||||
msgstr "Ahora en cines"
|
||||
|
||||
msgctxt "#70528"
|
||||
msgctxt "#70560"
|
||||
msgid "Movies by Genre"
|
||||
msgstr "Por generos"
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "tv show"
|
||||
msgstr "serie"
|
||||
msgctxt "#70561"
|
||||
msgid "Search Similar
|
||||
msgstr "Buscar Similares"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1734,7 +1734,7 @@ msgid "[COLOR %s]Filter configuration for TV series...[/COLOR]"
|
||||
msgstr "[COLOR %s]Configurar filtro para series...[/COLOR]"
|
||||
|
||||
msgctxt "#60430"
|
||||
msgid "FILTRO: Delete '%s'"
|
||||
msgid "FILTER: Delete '%s'"
|
||||
msgstr "FILTRO: Borrar '%s'"
|
||||
|
||||
msgctxt "#60431"
|
||||
@@ -4792,13 +4792,150 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
|
||||
msgstr "Verificación de los contadores de vídeos vistos/no vistos (desmarcar para verificar)"
|
||||
|
||||
msgctxt "#70527"
|
||||
msgid "My links"
|
||||
msgstr "Mis enlaces"
|
||||
|
||||
msgctxt "#70528"
|
||||
msgid "Default folder"
|
||||
msgstr "Carpeta por defecto"
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "Repeated link"
|
||||
msgstr "Enlace repetido"
|
||||
|
||||
msgctxt "#70530"
|
||||
msgid "You already have this link in the folder"
|
||||
msgstr "Ya tienes este enlace en la carpeta"
|
||||
|
||||
msgctxt "#70531"
|
||||
msgid "Saved link"
|
||||
msgstr "Guardado enlace"
|
||||
|
||||
msgctxt "#70532"
|
||||
msgid "Folder: %s"
|
||||
msgstr "Carpeta: %s"
|
||||
|
||||
msgctxt "#70533"
|
||||
msgid "Rename folder"
|
||||
msgstr "Cambiar nombre de la carpeta"
|
||||
|
||||
msgctxt "#70534"
|
||||
msgid "Delete folder"
|
||||
msgstr "Eliminar la carpeta"
|
||||
|
||||
msgctxt "#70535"
|
||||
msgid "Move up all"
|
||||
msgstr "Mover arriba del todo"
|
||||
|
||||
msgctxt "#70536"
|
||||
msgid "Move up"
|
||||
msgstr "Mover hacia arriba"
|
||||
|
||||
msgctxt "#70537"
|
||||
msgid "Move down"
|
||||
msgstr "Mover hacia abajo"
|
||||
|
||||
msgctxt "#70538"
|
||||
msgid "Move down all"
|
||||
msgstr "Mover abajo del todo"
|
||||
|
||||
msgctxt "#70539"
|
||||
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
|
||||
msgstr "* Crea diferentes carpetas para guardar tus enlaces favoritos dentro de Icarus.[CR]]"
|
||||
|
||||
msgctxt "#70540"
|
||||
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
|
||||
msgstr "* Para añadir enlaces a las carpetas accede al menú contextual desde cualquier punto de Icarus.[CR]"
|
||||
|
||||
msgctxt "#70541"
|
||||
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
|
||||
msgstr "* Los enlaces pueden ser canales, secciones dentro de los canales, búsquedas, e incluso películas y series aunque para esto último es preferible utilizar la videoteca."
|
||||
|
||||
msgctxt "#70542"
|
||||
msgid "Create new folder ..."
|
||||
msgstr "Crear nueva carpeta ..."
|
||||
|
||||
msgctxt "#70543"
|
||||
msgid "Move to another folder"
|
||||
msgstr "Mover a otra carpeta"
|
||||
|
||||
msgctxt "#70544"
|
||||
msgid "Change title"
|
||||
msgstr "Cambiar título"
|
||||
|
||||
msgctxt "#70545"
|
||||
msgid "Change color"
|
||||
msgstr "Cambiar color"
|
||||
|
||||
msgctxt "#70546"
|
||||
msgid "Save link in:"
|
||||
msgstr "Guardar enlace en:"
|
||||
|
||||
msgctxt "#70547"
|
||||
msgid "Change thumbnail"
|
||||
msgstr "Cambiar thumbnail"
|
||||
|
||||
msgctxt "#70548"
|
||||
msgid "Delete link"
|
||||
msgstr "Eliminar enlace"
|
||||
|
||||
msgctxt "#70549"
|
||||
msgid "Select folder"
|
||||
msgstr "Seleccionar carpeta"
|
||||
|
||||
msgctxt "#70550"
|
||||
msgid "Create new folder"
|
||||
msgstr "Crear nueva carpeta"
|
||||
|
||||
msgctxt "#70551"
|
||||
msgid "Folder name"
|
||||
msgstr "Nombre de la carpeta"
|
||||
|
||||
msgctxt "#70552"
|
||||
msgid "Delete the folder and links it contains?"
|
||||
msgstr "¿Borrar la carpeta y los enlaces que contiene?"
|
||||
|
||||
msgctxt "#70553"
|
||||
msgid "Change link title"
|
||||
msgstr "Cambiar título del enlace"
|
||||
|
||||
msgctxt "#70554"
|
||||
msgid "Select thumbnail:"
|
||||
msgstr "Seleccionar thumbnail:"
|
||||
|
||||
msgctxt "#70555"
|
||||
msgid "Move link to:"
|
||||
msgstr "Mover enlace a:"
|
||||
|
||||
msgctxt "#70556"
|
||||
msgid "%d links in folder"
|
||||
msgstr "%d enlaces en la carpeta"
|
||||
|
||||
msgctxt "#70557"
|
||||
msgid "Save link"
|
||||
msgstr "Guardar enlace"
|
||||
|
||||
msgctxt "#70558"
|
||||
msgid "Select color:"
|
||||
msgstr "Seleccionar color:"
|
||||
|
||||
msgctxt "#70559"
|
||||
msgid "Now in Theatres "
|
||||
msgstr "Ahora en cines"
|
||||
|
||||
msgctxt "#70528"
|
||||
msgctxt "#70560"
|
||||
msgid "Movies by Genre"
|
||||
msgstr "Por generos"
|
||||
|
||||
msgctxt "#70529"
|
||||
msgid "tv show"
|
||||
msgstr "serie"
|
||||
msgctxt "#70561"
|
||||
msgid "Search Similar
|
||||
msgstr "Buscar Similares"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
BIN
plugin.video.alfa/resources/media/channels/thumb/maxipelis24.png
Normal file
BIN
plugin.video.alfa/resources/media/channels/thumb/maxipelis24.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 26 KiB |
BIN
plugin.video.alfa/resources/media/channels/thumb/zonatorrent.png
Normal file
BIN
plugin.video.alfa/resources/media/channels/thumb/zonatorrent.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 35 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 36 KiB |
@@ -9,7 +9,7 @@ from platformcode import logger, config
|
||||
def test_video_exists(page_url):
|
||||
logger.info("(page_url='%s')" % page_url)
|
||||
data = httptools.downloadpage(page_url).data
|
||||
if "File Not Found" in data:
|
||||
if "File Not Found" in data or "File was deleted" in data:
|
||||
return False, config.get_localized_string(70292) % "ClipWatching"
|
||||
return True, ""
|
||||
|
||||
|
||||
17
plugin.video.alfa/servers/tusfiles.json → plugin.video.alfa/servers/megadrive.json
Executable file → Normal file
17
plugin.video.alfa/servers/tusfiles.json → plugin.video.alfa/servers/megadrive.json
Executable file → Normal file
@@ -4,18 +4,14 @@
|
||||
"ignore_urls": [],
|
||||
"patterns": [
|
||||
{
|
||||
"pattern": "http://tusfiles.org/\\?([A-z0-9]+)",
|
||||
"url": "http://tusfiles.org/?\\1/"
|
||||
},
|
||||
{
|
||||
"pattern": "tusfiles.net/(?:embed-|)([A-z0-9]+)",
|
||||
"url": "http://tusfiles.net/\\1"
|
||||
"pattern": "megadrive.co/embed/([A-z0-9]+)",
|
||||
"url": "https://megadrive.co/embed/\\1"
|
||||
}
|
||||
]
|
||||
},
|
||||
"free": true,
|
||||
"id": "tusfiles",
|
||||
"name": "tusfiles",
|
||||
"id": "megadrive",
|
||||
"name": "megadrive",
|
||||
"settings": [
|
||||
{
|
||||
"default": false,
|
||||
@@ -41,5 +37,6 @@
|
||||
"type": "list",
|
||||
"visible": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"thumbnail": "https://s8.postimg.cc/kr5olxmad/megadrive1.png"
|
||||
}
|
||||
27
plugin.video.alfa/servers/megadrive.py
Normal file
27
plugin.video.alfa/servers/megadrive.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from core import httptools
|
||||
from core import scrapertools
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
def test_video_exists(page_url):
|
||||
logger.info("(page_url='%s')" % page_url)
|
||||
data = httptools.downloadpage(page_url).data
|
||||
if "no longer exists" in data or "to copyright issues" in data:
|
||||
return False, "[Megadrive] El video ha sido borrado"
|
||||
if "please+try+again+later." in data:
|
||||
return False, "[Megadrive] Error de Megadrive, no se puede generar el enlace al video"
|
||||
if "File has been removed due to inactivity" in data:
|
||||
return False, "[Megadrive] El archivo ha sido removido por inactividad"
|
||||
return True, ""
|
||||
|
||||
|
||||
def get_video_url(page_url, user="", password="", video_password=""):
|
||||
logger.info("(page_url='%s')" % page_url)
|
||||
data = httptools.downloadpage(page_url).data
|
||||
video_urls = []
|
||||
videourl = scrapertools.find_single_match(data, "<source.*?src='([^']+)")
|
||||
video_urls.append([".MP4 [megadrive]", videourl])
|
||||
|
||||
return video_urls
|
||||
42
plugin.video.alfa/servers/thevid.json
Normal file
42
plugin.video.alfa/servers/thevid.json
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"active": true,
|
||||
"find_videos": {
|
||||
"ignore_urls": [],
|
||||
"patterns": [
|
||||
{
|
||||
"pattern": "(thevid.net/e/\\w+)",
|
||||
"url": "https://\\1"
|
||||
}
|
||||
]
|
||||
},
|
||||
"free": true,
|
||||
"id": "thevid",
|
||||
"name": "thevid",
|
||||
"settings": [
|
||||
{
|
||||
"default": false,
|
||||
"enabled": true,
|
||||
"id": "black_list",
|
||||
"label": "@60654",
|
||||
"type": "bool",
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"default": 0,
|
||||
"enabled": true,
|
||||
"id": "favorites_servers_list",
|
||||
"label": "@60655",
|
||||
"lvalues": [
|
||||
"No",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5"
|
||||
],
|
||||
"type": "list",
|
||||
"visible": false
|
||||
}
|
||||
],
|
||||
"thumbnail": ""
|
||||
}
|
||||
30
plugin.video.alfa/servers/thevid.py
Normal file
30
plugin.video.alfa/servers/thevid.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from core import httptools
|
||||
from core import scrapertools
|
||||
from lib import jsunpack
|
||||
from platformcode import logger, config
|
||||
|
||||
|
||||
def test_video_exists(page_url):
|
||||
logger.info("(page_url='%s')" % page_url)
|
||||
data = httptools.downloadpage(page_url).data
|
||||
if "Video not found..." in data:
|
||||
return False, config.get_localized_string(70292) % "Thevid"
|
||||
return True, ""
|
||||
|
||||
|
||||
def get_video_url(page_url, user="", password="", video_password=""):
|
||||
logger.info("(page_url='%s')" % page_url)
|
||||
data = httptools.downloadpage(page_url).data
|
||||
packed = scrapertools.find_multiple_matches(data, "(?s)<script>\s*eval(.*?)\s*</script>")
|
||||
for pack in packed:
|
||||
unpacked = jsunpack.unpack(pack)
|
||||
if "file" in unpacked:
|
||||
videos = scrapertools.find_multiple_matches(unpacked, 'file.="(//[^"]+)')
|
||||
video_urls = []
|
||||
for video in videos:
|
||||
video = "https:" + video
|
||||
video_urls.append(["mp4 [Thevid]", video])
|
||||
logger.info("Url: %s" % videos)
|
||||
return video_urls
|
||||
@@ -4,7 +4,7 @@
|
||||
"ignore_urls": [],
|
||||
"patterns": [
|
||||
{
|
||||
"pattern": "(?:thevideo.me|tvad.me|thevid.net|thevideo.ch|thevideo.us)/(?:embed-|)([A-z0-9]+)",
|
||||
"pattern": "(?:thevideo.me|tvad.me|thevideo.ch|thevideo.us)/(?:embed-|)([A-z0-9]+)",
|
||||
"url": "https://thevideo.me/embed-\\1.html"
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from core import httptools
|
||||
from core import scrapertools
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
def test_video_exists(page_url):
|
||||
logger.info("(page_url='%s')" % page_url)
|
||||
|
||||
if "tusfiles.net" in page_url:
|
||||
data = httptools.downloadpage(page_url).data
|
||||
|
||||
if "File Not Found" in data:
|
||||
return False, "[Tusfiles] El archivo no existe o ha sido borrado"
|
||||
if "download is no longer available" in data:
|
||||
return False, "[Tusfiles] El archivo ya no está disponible"
|
||||
|
||||
return True, ""
|
||||
|
||||
|
||||
def get_video_url(page_url, premium=False, user="", password="", video_password=""):
|
||||
logger.info("page_url='%s'" % page_url)
|
||||
|
||||
# Saca el código del vídeo
|
||||
data = httptools.downloadpage(page_url).data.replace("\\", "")
|
||||
video_urls = []
|
||||
|
||||
if "tusfiles.org" in page_url:
|
||||
matches = scrapertools.find_multiple_matches(data,
|
||||
'"label"\s*:\s*(.*?),"type"\s*:\s*"([^"]+)","file"\s*:\s*"([^"]+)"')
|
||||
for calidad, tipo, video_url in matches:
|
||||
tipo = tipo.replace("video/", "")
|
||||
video_urls.append([".%s %sp [tusfiles]" % (tipo, calidad), video_url])
|
||||
|
||||
video_urls.sort(key=lambda it: int(it[0].split("p ", 1)[0].rsplit(" ")[1]))
|
||||
else:
|
||||
matches = scrapertools.find_multiple_matches(data, '<source src="([^"]+)" type="([^"]+)"')
|
||||
for video_url, tipo in matches:
|
||||
tipo = tipo.replace("video/", "")
|
||||
video_urls.append([".%s [tusfiles]" % tipo, video_url])
|
||||
|
||||
id = scrapertools.find_single_match(data, 'name="id" value="([^"]+)"')
|
||||
rand = scrapertools.find_single_match(data, 'name="rand" value="([^"]+)"')
|
||||
if id and rand:
|
||||
post = "op=download2&id=%s&rand=%s&referer=&method_free=&method_premium=" % (id, rand)
|
||||
location = httptools.downloadpage(page_url, post, follow_redirects=False, only_headers=True).headers.get(
|
||||
"location")
|
||||
if location:
|
||||
ext = location[-4:]
|
||||
video_urls.append(["%s [tusfiles]" % ext, location])
|
||||
|
||||
return video_urls
|
||||
42
plugin.video.alfa/servers/vevio.json
Normal file
42
plugin.video.alfa/servers/vevio.json
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"active": true,
|
||||
"find_videos": {
|
||||
"ignore_urls": [],
|
||||
"patterns": [
|
||||
{
|
||||
"pattern": "(vev.io/embed/[A-z0-9]+)",
|
||||
"url": "https://\\1"
|
||||
}
|
||||
]
|
||||
},
|
||||
"free": true,
|
||||
"id": "vevio",
|
||||
"name": "vevio",
|
||||
"settings": [
|
||||
{
|
||||
"default": false,
|
||||
"enabled": true,
|
||||
"id": "black_list",
|
||||
"label": "@60654",
|
||||
"type": "bool",
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"default": 0,
|
||||
"enabled": true,
|
||||
"id": "favorites_servers_list",
|
||||
"label": "@60655",
|
||||
"lvalues": [
|
||||
"No",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5"
|
||||
],
|
||||
"type": "list",
|
||||
"visible": false
|
||||
}
|
||||
],
|
||||
"thumbnail": "https://s8.postimg.cc/opp2c3p6d/vevio1.png"
|
||||
}
|
||||
29
plugin.video.alfa/servers/vevio.py
Normal file
29
plugin.video.alfa/servers/vevio.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import urllib
|
||||
from core import httptools
|
||||
from core import scrapertools
|
||||
from platformcode import logger, config
|
||||
|
||||
|
||||
def test_video_exists(page_url):
|
||||
logger.info("(page_url='%s')" % page_url)
|
||||
data = httptools.downloadpage(page_url).data
|
||||
if "File was deleted" in data or "Page Cannot Be Found" in data or "<title>Video not found" in data:
|
||||
return False, "[vevio] El archivo ha sido eliminado o no existe"
|
||||
return True, ""
|
||||
|
||||
|
||||
def get_video_url(page_url, premium=False, user="", password="", video_password=""):
|
||||
logger.info("url=" + page_url)
|
||||
video_urls = []
|
||||
post = {}
|
||||
post = urllib.urlencode(post)
|
||||
url = page_url
|
||||
data = httptools.downloadpage("https://vev.io/api/serve/video/" + scrapertools.find_single_match(url, "embed/([A-z0-9]+)"), post=post).data
|
||||
bloque = scrapertools.find_single_match(data, 'qualities":\{(.*?)\}')
|
||||
matches = scrapertools.find_multiple_matches(bloque, '"([^"]+)":"([^"]+)')
|
||||
for res, media_url in matches:
|
||||
video_urls.append(
|
||||
[scrapertools.get_filename_from_url(media_url)[-4:] + " (" + res + ") [vevio.me]", media_url])
|
||||
return video_urls
|
||||
@@ -1,73 +0,0 @@
|
||||
{
|
||||
"active": true,
|
||||
"find_videos": {
|
||||
"ignore_urls": [
|
||||
"http://vidspot.net/embed-theme.html",
|
||||
"http://vidspot.net/embed-jquery.html",
|
||||
"http://vidspot.net/embed-s.html",
|
||||
"http://vidspot.net/embed-images.html",
|
||||
"http://vidspot.net/embed-faq.html",
|
||||
"http://vidspot.net/embed-embed.html",
|
||||
"http://vidspot.net/embed-ri.html",
|
||||
"http://vidspot.net/embed-d.html",
|
||||
"http://vidspot.net/embed-css.html",
|
||||
"http://vidspot.net/embed-js.html",
|
||||
"http://vidspot.net/embed-player.html",
|
||||
"http://vidspot.net/embed-cgi.html",
|
||||
"http://vidspot.net/embed-i.html",
|
||||
"http://vidspot.net/images",
|
||||
"http://vidspot.net/theme",
|
||||
"http://vidspot.net/xupload",
|
||||
"http://vidspot.net/s",
|
||||
"http://vidspot.net/js",
|
||||
"http://vidspot.net/jquery",
|
||||
"http://vidspot.net/login",
|
||||
"http://vidspot.net/make",
|
||||
"http://vidspot.net/i",
|
||||
"http://vidspot.net/faq",
|
||||
"http://vidspot.net/tos",
|
||||
"http://vidspot.net/premium",
|
||||
"http://vidspot.net/checkfiles",
|
||||
"http://vidspot.net/privacy",
|
||||
"http://vidspot.net/refund",
|
||||
"http://vidspot.net/links",
|
||||
"http://vidspot.net/contact"
|
||||
],
|
||||
"patterns": [
|
||||
{
|
||||
"pattern": "vidspot.(?:net/|php\\?id=)(?:embed-)?([a-z0-9]+)",
|
||||
"url": "http://vidspot.net/\\1"
|
||||
}
|
||||
]
|
||||
},
|
||||
"free": true,
|
||||
"id": "vidspot",
|
||||
"name": "vidspot",
|
||||
"settings": [
|
||||
{
|
||||
"default": false,
|
||||
"enabled": true,
|
||||
"id": "black_list",
|
||||
"label": "@60654",
|
||||
"type": "bool",
|
||||
"visible": true
|
||||
},
|
||||
{
|
||||
"default": 0,
|
||||
"enabled": true,
|
||||
"id": "favorites_servers_list",
|
||||
"label": "@60655",
|
||||
"lvalues": [
|
||||
"No",
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4",
|
||||
"5"
|
||||
],
|
||||
"type": "list",
|
||||
"visible": false
|
||||
}
|
||||
],
|
||||
"thumbnail": "server_vidspot.png"
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from core import scrapertools
|
||||
from platformcode import logger
|
||||
|
||||
|
||||
def test_video_exists(page_url):
|
||||
logger.info("(page_url='%s')" % page_url)
|
||||
|
||||
# No existe / borrado: http://vidspot.net/8jcgbrzhujri
|
||||
data = scrapertools.cache_page("http://anonymouse.org/cgi-bin/anon-www.cgi/" + page_url)
|
||||
if "File Not Found" in data or "Archivo no encontrado" in data or '<b class="err">Deleted' in data \
|
||||
or '<b class="err">Removed' in data or '<font class="err">No such' in data:
|
||||
return False, "No existe o ha sido borrado de vidspot"
|
||||
|
||||
return True, ""
|
||||
|
||||
|
||||
def get_video_url(page_url, premium=False, user="", password="", video_password=""):
|
||||
logger.info("url=%s" % page_url)
|
||||
|
||||
# Normaliza la URL
|
||||
videoid = scrapertools.get_match(page_url, "http://vidspot.net/([a-z0-9A-Z]+)")
|
||||
page_url = "http://vidspot.net/embed-%s-728x400.html" % videoid
|
||||
data = scrapertools.cachePage(page_url)
|
||||
if "Access denied" in data:
|
||||
geobloqueo = True
|
||||
else:
|
||||
geobloqueo = False
|
||||
|
||||
if geobloqueo:
|
||||
url = "http://www.videoproxy.co/hide.php"
|
||||
post = "go=%s" % page_url
|
||||
location = scrapertools.get_header_from_response(url, post=post, header_to_get="location")
|
||||
url = "http://www.videoproxy.co/%s" % location
|
||||
data = scrapertools.cachePage(url)
|
||||
|
||||
# Extrae la URL
|
||||
media_url = scrapertools.find_single_match(data, '"file" : "([^"]+)",')
|
||||
|
||||
video_urls = []
|
||||
|
||||
if media_url != "":
|
||||
if geobloqueo:
|
||||
url = "http://www.videoproxy.co/hide.php"
|
||||
post = "go=%s" % media_url
|
||||
location = scrapertools.get_header_from_response(url, post=post, header_to_get="location")
|
||||
media_url = "http://www.videoproxy.co/%s&direct=false" % location
|
||||
else:
|
||||
media_url += "&direct=false"
|
||||
|
||||
video_urls.append([scrapertools.get_filename_from_url(media_url)[-4:] + " [vidspot]", media_url])
|
||||
|
||||
for video_url in video_urls:
|
||||
logger.info("%s - %s" % (video_url[0], video_url[1]))
|
||||
|
||||
return video_urls
|
||||
Reference in New Issue
Block a user