Merge remote-tracking branch 'alfa-addon/master'

This commit is contained in:
unknown
2018-09-19 15:08:39 -03:00
85 changed files with 2593 additions and 16757 deletions

View File

@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<addon id="plugin.video.alfa" name="Alfa" version="2.7.3" provider-name="Alfa Addon">
<addon id="plugin.video.alfa" name="Alfa" version="2.7.4" provider-name="Alfa Addon">
<requires>
<import addon="xbmc.python" version="2.1.0"/>
<import addon="script.module.libtorrent" optional="true"/>
@@ -19,17 +19,17 @@
</assets>
<news>[B]Estos son los cambios para esta versión:[/B]
[COLOR green][B]Canales agregados y arreglos[/B][/COLOR]
¤ allcalidad ¤ cinecalidad
¤ repelis ¤ cumlouder
¤ porntrex ¤ crunchyroll
¤ pedropolis ¤ pepecine
¤ repelis ¤ thevid
¤ vevio ¤ danimados
¤ sipeliculas ¤ cinecalidad
¤ locopelis ¤ pelisipad
¤ divxtotal ¤ elitetorrent
¤ estrenosgo ¤ grantorrent
¤ mejortorrent1 ¤ newpct1
¤ danimados ¤ fanpelis
¤ repelis
¤ tvvip ¤ zonatorrent
¤ maxipelis24 ¤ wikiseries
¤ arreglos internos
¤ Agradecimientos a @angedam, @chivmalev, @alaquepasa por colaborar en ésta versión
¤ Agradecimientos a @angedam y @chivmalev por colaborar en ésta versión
</news>
<description lang="es">Navega con Kodi por páginas web para ver sus videos de manera fácil.</description>

View File

@@ -161,6 +161,7 @@ def findvideos(item):
itemlist.extend(servertools.find_video_items(data=data))
for videoitem in itemlist:
videoitem.channel = item.channel
videoitem.title = '[%s]' % videoitem.server.capitalize()
return itemlist

View File

@@ -1,61 +0,0 @@
{
"id": "cineasiaenlinea",
"name": "CineAsiaEnLinea",
"active": true,
"adult": false,
"language": ["cast", "lat"],
"thumbnail": "http://i.imgur.com/5KOU8uy.png?3",
"banner": "cineasiaenlinea.png",
"categories": [
"movie",
"vos"
],
"settings": [
{
"id": "modo_grafico",
"type": "bool",
"label": "Buscar información extra",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "include_in_global_search",
"type": "bool",
"label": "Incluir en búsqueda global",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "include_in_newest_peliculas",
"type": "bool",
"label": "Incluir en Novedades - Películas",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "include_in_newest_terror",
"type": "bool",
"label": "Incluir en Novedades - terror",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "perfil",
"type": "list",
"label": "Perfil de color",
"default": 3,
"enabled": true,
"visible": true,
"lvalues": [
"Sin color",
"Perfil 3",
"Perfil 2",
"Perfil 1"
]
}
]
}

View File

@@ -1,177 +0,0 @@
# -*- coding: utf-8 -*-
import re
from core import httptools
from core import scrapertools
from core import servertools
from core import tmdb
from core.item import Item
from platformcode import config, logger
from channelselector import get_thumb
host = "http://www.cineasiaenlinea.com/"
__channel__='cineasiaenlinea'
try:
__modo_grafico__ = config.get_setting('modo_grafico', __channel__)
except:
__modo_grafico__ = True
# Configuracion del canal
__perfil__ = int(config.get_setting('perfil', 'cineasiaenlinea'))
# Fijar perfil de color
perfil = [['0xFFFFE6CC', '0xFFFFCE9C', '0xFF994D00'],
['0xFFA5F6AF', '0xFF5FDA6D', '0xFF11811E'],
['0xFF58D3F7', '0xFF2E9AFE', '0xFF2E64FE']]
if __perfil__ - 1 >= 0:
color1, color2, color3 = perfil[__perfil__ - 1]
else:
color1 = color2 = color3 = ""
def mainlist(item):
logger.info()
itemlist = []
itemlist.append(item.clone(action="peliculas", title="Novedades", url=host + "archivos/peliculas",
thumbnail=get_thumb('newest', auto=True), text_color=color1,))
itemlist.append(item.clone(action="peliculas", title="Estrenos", url=host + "archivos/estrenos",
thumbnail=get_thumb('premieres', auto=True), text_color=color1))
itemlist.append(item.clone(action="indices", title="Por géneros", url=host,
thumbnail=get_thumb('genres', auto=True), text_color=color1))
itemlist.append(item.clone(action="indices", title="Por país", url=host, text_color=color1,
thumbnail=get_thumb('country', auto=True)))
itemlist.append(item.clone(action="indices", title="Por año", url=host, text_color=color1,
thumbnail=get_thumb('year', auto=True)))
itemlist.append(item.clone(title="", action=""))
itemlist.append(item.clone(action="search", title="Buscar...", text_color=color3,
thumbnail=get_thumb('search', auto=True)))
itemlist.append(item.clone(action="configuracion", title="Configurar canal...", text_color="gold", folder=False))
return itemlist
def configuracion(item):
from platformcode import platformtools
ret = platformtools.show_channel_settings()
platformtools.itemlist_refresh()
return ret
def search(item, texto):
logger.info()
item.url = "%s?s=%s" % (host, texto.replace(" ", "+"))
try:
return peliculas(item)
# Se captura la excepción, para no interrumpir al buscador global si un canal falla
except:
import sys
for line in sys.exc_info():
logger.error("%s" % line)
return []
def newest(categoria):
logger.info()
itemlist = []
item = Item()
try:
if categoria == 'peliculas':
item.url = host + "archivos/peliculas"
elif categoria == 'terror':
item.url = host + "genero/terror"
item.action = "peliculas"
itemlist = peliculas(item)
if itemlist[-1].action == "peliculas":
itemlist.pop()
# Se captura la excepción, para no interrumpir al canal novedades si un canal falla
except:
import sys
for line in sys.exc_info():
logger.error("{0}".format(line))
return []
return itemlist
def peliculas(item):
logger.info()
itemlist = []
item.text_color = color2
# Descarga la página
data = httptools.downloadpage(item.url).data
patron = '<h3><a href="([^"]+)">([^<]+)<.*?src="([^"]+)".*?<a rel="tag">([^<]+)<' \
'.*?<a rel="tag">([^<]+)<'
matches = scrapertools.find_multiple_matches(data, patron)
for scrapedurl, scrapedtitle, scrapedthumbnail, year, calidad in matches:
title = re.sub(r' \((\d+)\)', '', scrapedtitle)
scrapedtitle += " [%s]" % calidad
infolab = {'year': year}
itemlist.append(item.clone(action="findvideos", title=scrapedtitle, url=scrapedurl,
thumbnail=scrapedthumbnail, infoLabels=infolab,
contentTitle=title, contentType="movie", quality=calidad))
tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
next_page = scrapertools.find_single_match(data, '<a class="nextpostslink" rel="next" href="([^"]+)"')
if next_page:
itemlist.append(item.clone(title=">> Página Siguiente", url=next_page))
return itemlist
def indices(item):
logger.info()
itemlist = []
# Descarga la página
data = httptools.downloadpage(item.url).data
logger.info(data)
if "géneros" in item.title:
bloque = scrapertools.find_single_match(data, '(?i)<h4>Peliculas por genero</h4>(.*?)</ul>')
matches = scrapertools.find_multiple_matches(bloque, '<a href="([^"]+)".*?>([^<]+)<')
elif "año" in item.title:
bloque = scrapertools.find_single_match(data, '(?i)<h4>Peliculas por Año</h4>(.*?)</select>')
matches = scrapertools.find_multiple_matches(bloque, '<option value="([^"]+)">([^<]+)<')
else:
bloque = scrapertools.find_single_match(data, '(?i)<h4>Peliculas por Pais</h4>(.*?)</ul>')
matches = scrapertools.find_multiple_matches(bloque, '<a href="([^"]+)".*?>([^<]+)<')
for scrapedurl, scrapedtitle in matches:
if "año" in item.title:
scrapedurl = "%sfecha-estreno/%s" % (host, scrapedurl)
itemlist.append(Item(channel=item.channel, action="peliculas", title=scrapedtitle, url=scrapedurl,
thumbnail=item.thumbnail, text_color=color3))
return itemlist
def findvideos(item):
logger.info()
data = httptools.downloadpage(item.url).data
item.infoLabels["plot"] = scrapertools.find_single_match(data, '(?i)<h2>SINOPSIS.*?<p>(.*?)</p>')
item.infoLabels["trailer"] = scrapertools.find_single_match(data, 'src="(http://www.youtube.com/embed/[^"]+)"')
itemlist = servertools.find_video_items(item=item, data=data)
for it in itemlist:
it.thumbnail = item.thumbnail
it.text_color = color2
itemlist.append(item.clone(action="add_pelicula_to_library", title="Añadir película a la videoteca"))
if item.infoLabels["trailer"]:
folder = True
if config.is_xbmc():
folder = False
itemlist.append(item.clone(channel="trailertools", action="buscartrailer", title="Ver Trailer", folder=folder,
contextual=not folder))
return itemlist

View File

@@ -324,7 +324,7 @@ def findvideos(item):
url = server_url[server_id] + video_id + '.html'
elif server_id == 'BitTorrent':
import urllib
base_url = '%sprotect/v.php' % host
base_url = '%s/protect/v.php' % host
post = {'i':video_id, 'title':item.title}
post = urllib.urlencode(post)
headers = {'Referer':item.url}

View File

@@ -8,5 +8,15 @@
"banner": "https://imgur.com/xG5xqBq.png",
"categories": [
"tvshow"
],
"settings": [
{
"id": "include_in_global_search",
"type": "bool",
"label": "Incluir en busqueda global",
"default": true,
"enabled": true,
"visible": true
}
]
}

View File

@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import re
import base64
from channelselector import get_thumb
from core import httptools
@@ -22,48 +23,64 @@ list_quality = ['default']
def mainlist(item):
logger.info()
thumb_series = get_thumb("channels_tvshow.png")
autoplay.init(item.channel, list_servers, list_quality)
itemlist = list()
itemlist.append(Item(channel=item.channel, action="mainpage", title="Categorías", url=host,
thumbnail=thumb_series))
itemlist.append(Item(channel=item.channel, action="mainpage", title="Más Populares", url=host,
thumbnail=thumb_series))
itemlist.append(Item(channel=item.channel, action="lista", title="Peliculas Animadas", url=host+"peliculas/",
thumbnail=thumb_series))
itemlist.append(Item(channel=item.channel, action="search", title="Buscar", url=host + "?s=",
thumbnail=thumb_series))
autoplay.show_option(item.channel, itemlist)
return itemlist
"""
def search(item, texto):
logger.info()
texto = texto.replace(" ","+")
item.url = item.url+texto
item.url = host + "?s=" + texto
if texto!='':
return lista(item)
"""
return sub_search(item)
def sub_search(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
patron = 'class="thumbnail animation-.*?href="([^"]+).*?'
patron += 'img src="([^"]+).*?'
patron += 'alt="([^"]+).*?'
patron += 'class="year">(\d{4})'
matches = scrapertools.find_multiple_matches(data, patron)
for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedyear in matches:
item.action = "findvideos"
item.contentTitle = scrapedtitle
item.contentSerieName = ""
if "serie" in scrapedurl:
item.action = "episodios"
item.contentTitle = ""
item.contentSerieName = scrapedtitle
title = scrapedtitle
if scrapedyear:
item.infoLabels['year'] = int(scrapedyear)
title += " (%s)" %item.infoLabels['year']
itemlist.append(item.clone(thumbnail = scrapedthumbnail,
title = title,
url = scrapedurl
))
tmdb.set_infoLabels(itemlist)
return itemlist
def mainpage(item):
logger.info()
itemlist = []
data1 = httptools.downloadpage(item.url).data
data1 = re.sub(r"\n|\r|\t|\s{2}|&nbsp;", "", data1)
if item.title=="Más Populares":
patron_sec='<a class="lglossary" data-type.+?>(.+?)<\/ul>'
patron='<img .+? src="([^"]+)".+?<a href="([^"]+)".+?>([^"]+)<\/a>' #scrapedthumbnail, #scrapedurl, #scrapedtitle
if item.title=="Categorías":
patron_sec='<ul id="main_header".+?>(.+?)<\/ul><\/div>'
patron='<a href="([^"]+)">([^"]+)<\/a>'#scrapedurl, #scrapedtitle
patron_sec='<ul id="main_header".+?>(.+?)<\/ul><\/div>'
patron='<a href="([^"]+)">([^"]+)<\/a>'#scrapedurl, #scrapedtitle
data = scrapertools.find_single_match(data1, patron_sec)
matches = scrapertools.find_multiple_matches(data, patron)
if item.title=="Géneros" or item.title=="Categorías":
for scrapedurl, scrapedtitle in matches:
@@ -82,11 +99,10 @@ def mainpage(item):
return itemlist
return itemlist
def lista(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;", "", data)
if item.title=="Peliculas Animadas":
@@ -114,8 +130,8 @@ def lista(item):
def episodios(item):
logger.info()
itemlist = []
infoLabels = {}
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;", "", data)
data_lista = scrapertools.find_single_match(data,
@@ -123,51 +139,52 @@ def episodios(item):
show = item.title
patron_caps = '<img alt=".+?" src="([^"]+)"><\/a><\/div><div class=".+?">([^"]+)<\/div>.+?'
patron_caps += '<a .+? href="([^"]+)">([^"]+)<\/a>'
#scrapedthumbnail,#scrapedtempepi, #scrapedurl, #scrapedtitle
matches = scrapertools.find_multiple_matches(data_lista, patron_caps)
for scrapedthumbnail, scrapedtempepi, scrapedurl, scrapedtitle in matches:
tempepi=scrapedtempepi.split(" - ")
if tempepi[0]=='Pel':
tempepi[0]=0
title="{0}x{1} - ({2})".format(tempepi[0], tempepi[1].zfill(2), scrapedtitle)
itemlist.append(Item(channel=item.channel, thumbnail=scrapedthumbnail,
action="findvideos", title=title, url=scrapedurl, show=show))
item.infoLabels["season"] = tempepi[0]
item.infoLabels["episode"] = tempepi[1]
itemlist.append(item.clone(thumbnail=scrapedthumbnail,
action="findvideos", title=title, url=scrapedurl))
if config.get_videolibrary_support() and len(itemlist) > 0:
itemlist.append(Item(channel=item.channel, title="[COLOR yellow]Añadir " + show + " a la videoteca[/COLOR]", url=item.url,
action="add_serie_to_library", extra="episodios", show=show))
return itemlist
def findvideos(item):
logger.info()
import base64
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;", "", data)
data1 = scrapertools.find_single_match(data,
'<div id="playex" .+?>(.+?)<\/nav>?\s<\/div><\/div>')
patron = "changeLink\('([^']+)'\)"
matches = re.compile(patron, re.DOTALL).findall(data1)
for url64 in matches:
url =base64.b64decode(url64)
if 'danimados' in url:
new_data = httptools.downloadpage('https:'+url.replace('stream', 'stream_iframe')).data
url = scrapertools.find_single_match(new_data, '<source src="([^"]+)"')
itemlist.append(item.clone(title='%s',url=url, action="play"))
url1 =base64.b64decode(url64)
if 'danimados' in url1:
new_data = httptools.downloadpage('https:'+url1.replace('stream', 'stream_iframe')).data
logger.info("Intel33 %s" %new_data)
url = scrapertools.find_single_match(new_data, "sources: \[\{file:'([^']+)")
if "zkstream" in url:
url1 = httptools.downloadpage(url, follow_redirects=False, only_headers=True).headers.get("location", "")
else:
url1 = url
itemlist.append(item.clone(title='%s',url=url1, action="play"))
tmdb.set_infoLabels(itemlist)
itemlist = servertools.get_servers_itemlist(itemlist, lambda i: i.title % i.server.capitalize())
if config.get_videolibrary_support() and len(itemlist) > 0 and item.contentType=="movie" and item.contentChannel!='videolibrary':
itemlist.append(
item.clone(channel=item.channel, title='[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]', url=item.url,
action="add_pelicula_to_library", contentTitle=item.show))
action="add_pelicula_to_library"))
autoplay.start(itemlist, item)
return itemlist
def play(item):
item.thumbnail = item.contentThumbnail
return [item]

View File

@@ -519,17 +519,35 @@ def findvideos(item):
item, itemlist = generictools.post_tmdb_findvideos(item, itemlist)
#Ahora tratamos los enlaces .torrent
for scrapedurl in matches: #leemos los torrents con la diferentes calidades
for scrapedurl in matches: #leemos los torrents con la diferentes calidades
#Generamos una copia de Item para trabajar sobre ella
item_local = item.clone()
#Buscamos si ya tiene tamaño, si no, los buscamos en el archivo .torrent
size = scrapertools.find_single_match(item_local.quality, '\s\[(\d+,?\d*?\s\w\s?[b|B])\]')
if not size:
size = generictools.get_torrent_size(item_local.url) #Buscamos el tamaño en el .torrent
if size:
item_local.title = re.sub(r'\s\[\d+,?\d*?\s\w[b|B]\]', '', item_local.title) #Quitamos size de título, si lo traía
item_local.title = '%s [%s]' % (item_local.title, size) #Agregamos size al final del título
size = size.replace('GB', 'G B').replace('Gb', 'G b').replace('MB', 'M B').replace('Mb', 'M b')
item_local.quality = re.sub(r'\s\[\d+,?\d*?\s\w\s?[b|B]\]', '', item_local.quality) #Quitamos size de calidad, si lo traía
item_local.quality = '%s [%s]' % (item_local.quality, size) #Agregamos size al final de la calidad
#Ahora pintamos el link del Torrent
item_local.url = scrapedurl
if host not in item_local.url and host.replace('https', 'http') not in item_local.url :
item_local.url = host + item_local.url
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language)) #Preparamos título de Torrent
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language))
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.alive = "??" #Calidad del link sin verificar
item_local.action = "play" #Visualizar vídeo
item_local.server = "torrent" #Seridor Torrent

View File

@@ -171,8 +171,11 @@ def listado(item):
#Limpiamos el título de la basura innecesaria
title = title.replace("Dual", "").replace("dual", "").replace("Subtitulada", "").replace("subtitulada", "").replace("Subt", "").replace("subt", "").replace("Sub", "").replace("sub", "").replace("(Proper)", "").replace("(proper)", "").replace("Proper", "").replace("proper", "").replace("#", "").replace("(Latino)", "").replace("Latino", "")
title = title.replace("- HDRip", "").replace("(HDRip)", "").replace("- Hdrip", "").replace("(microHD)", "").replace("(DVDRip)", "").replace("(HDRip)", "").replace("(BR-LINE)", "").replace("(HDTS-SCREENER)", "").replace("(BDRip)", "").replace("(BR-Screener)", "").replace("(DVDScreener)", "").replace("TS-Screener", "").replace(" TS", "").replace(" Ts", "")
title = title.replace("- HDRip", "").replace("(HDRip)", "").replace("- Hdrip", "").replace("(microHD)", "").replace("(DVDRip)", "").replace("(HDRip)", "").replace("(BR-LINE)", "").replace("(HDTS-SCREENER)", "").replace("(BDRip)", "").replace("(BR-Screener)", "").replace("(DVDScreener)", "").replace("TS-Screener", "").replace(" TS", "").replace(" Ts", "").replace("temporada", "").replace("Temporada", "").replace("capitulo", "").replace("Capitulo", "")
title = re.sub(r'(?:\d+)?x.?\s?\d+', '', title)
title = re.sub(r'\??\s?\d*?\&.*', '', title).title().strip()
item_local.from_title = title #Guardamos esta etiqueta para posible desambiguación de título
if item_local.extra == "peliculas": #preparamos Item para películas
@@ -190,16 +193,17 @@ def listado(item):
item_local.contentType = "episode"
item_local.extra = "series"
epi_mult = scrapertools.find_single_match(item_local.url, r'cap.*?-\d+-al-(\d+)')
item_local.contentSeason = scrapertools.find_single_match(item_local.url, r'temp.*?-(\d+)')
item_local.contentSeason = scrapertools.find_single_match(item_local.url, r'temporada-(\d+)')
item_local.contentEpisodeNumber = scrapertools.find_single_match(item_local.url, r'cap.*?-(\d+)')
if not item_local.contentSeason:
item_local.contentSeason = scrapertools.find_single_match(item_local.url, r'-(\d+)[x|X]\d+')
if not item_local.contentEpisodeNumber:
item_local.contentEpisodeNumber = scrapertools.find_single_match(item_local.url, r'-\d+[x|X](\d+)')
if item_local.contentSeason < 1:
item_local.contentSeason = 1
if not item_local.contentSeason or item_local.contentSeason < 1:
item_local.contentSeason = 0
if item_local.contentEpisodeNumber < 1:
item_local.contentEpisodeNumber = 1
item_local.contentSerieName = title
if epi_mult:
title = "%sx%s al %s -" % (item_local.contentSeason, str(item_local.contentEpisodeNumber).zfill(2), str(epi_mult).zfill(2)) #Creamos un título con el rango de episodios
@@ -269,11 +273,11 @@ def findvideos(item):
#data = unicode(data, "utf-8", errors="replace")
#Añadimos el tamaño para todos
size = scrapertools.find_single_match(item.quality, '\s\[(\d+,?\d*?\s\w[b|B]s)\]')
size = scrapertools.find_single_match(item.quality, '\s\[(\d+,?\d*?\s\w\s?[b|B]s)\]')
if size:
item.title = re.sub('\s\[\d+,?\d*?\s\w[b|B]s\]', '', item.title) #Quitamos size de título, si lo traía
item.title = '%s [%s]' % (item.title, size) #Agregamos size al final del título
item.quality = re.sub('\s\[\d+,?\d*?\s\w[b|B]s\]', '', item.quality) #Quitamos size de calidad, si lo traía
item.quality = re.sub('\s\[\d+,?\d*?\s\w\s?[b|B]s\]', '', item.quality) #Quitamos size de calidad, si lo traía
patron_t = '<div class="enlace_descarga".*?<a href="(.*?\.torrent)"'
link_torrent = scrapertools.find_single_match(data, patron_t)
@@ -299,9 +303,11 @@ def findvideos(item):
#Llamamos al método para crear el título general del vídeo, con toda la información obtenida de TMDB
item, itemlist = generictools.post_tmdb_findvideos(item, itemlist)
if not size:
size = generictools.get_torrent_size(link_torrent) #Buscamos el tamaño en el .torrent
if size:
item.quality = '%s [%s]' % (item.quality, size) #Agregamos size al final de calidad
item.quality = item.quality.replace("G", "G ").replace("M", "M ") #Se evita la palabra reservada en Unify
item.quality = item.quality.replace("GB", "G B").replace("MB", "M B") #Se evita la palabra reservada en Unify
#Generamos una copia de Item para trabajar sobre ella
item_local = item.clone()
@@ -313,8 +319,15 @@ def findvideos(item):
item_local.quality += "[Torrent]"
item_local.url = link_torrent
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language)) #Preparamos título de Torrent
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.alive = "??" #Calidad del link sin verificar
item_local.action = "play" #Visualizar vídeo
item_local.server = "torrent" #Seridor Torrent

View File

@@ -682,7 +682,7 @@ def findvideos(item):
#Ahora tratamos los enlaces .torrent
itemlist_alt = [] #Usamos una lista intermedia para poder ordenar los episodios
if matches_torrent:
for scrapedurl, scrapedquality, scrapedlang in matches_torrent: #leemos los torrents con la diferentes calidades
for scrapedurl, scrapedquality, scrapedlang in matches_torrent: #leemos los torrents con la diferentes calidades
#Generamos una copia de Item para trabajar sobre ella
item_local = item.clone()
@@ -756,9 +756,19 @@ def findvideos(item):
#Ahora pintamos el link del Torrent
item_local.url = host + scrapedtorrent
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (quality, str(item_local.language)) #Preparamos título de Torrent
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
size = generictools.get_torrent_size(item_local.url) #Buscamos el tamaño en el .torrent
if size:
quality += ' [%s]' % size
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (quality, str(item_local.language))
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', quality)
quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', quality)
quality = quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.alive = "??" #Calidad del link sin verificar
item_local.action = "play" #Visualizar vídeo
item_local.server = "torrent" #Seridor Torrent
@@ -896,8 +906,15 @@ def findvideos(item):
#Ahora pintamos el link Directo
item_local.url = enlace
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', quality)
quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', quality)
quality = quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.action = "play" #Visualizar vídeo
item_local.server = servidor #Seridor Directo

View File

@@ -474,14 +474,17 @@ def findvideos(item):
#Añadimos la duración, que estará en item.quility
if scrapertools.find_single_match(item.quality, '(\[\d+:\d+)') and not scrapertools.find_single_match(item_local.quality, '(\[\d+:\d+)'):
item_local.quality = '%s [/COLOR][COLOR white][%s h]' % (item_local.quality, scrapertools.find_single_match(item.quality, '(\d+:\d+)'))
#if size and item_local.contentType != "episode":
if not size:
size = generictools.get_torrent_size(scrapedurl) #Buscamos el tamaño en el .torrent
if size:
size = size.replace(".", ",").replace("B,", " B").replace("b,", " b")
if '[/COLOR][COLOR white]' in item_local.quality:
item_local.quality = '%s [%s]' % (item_local.quality, size)
else:
item_local.quality = '%s [/COLOR][COLOR white][%s]' % (item_local.quality, size)
if item_local.action == 'show_result': #Viene de una búsqueda global
if item_local.action == 'show_result': #Viene de una búsqueda global
channel = item_local.channel.capitalize()
if item_local.from_channel:
channel = item_local.from_channel.capitalize()
@@ -491,8 +494,15 @@ def findvideos(item):
if scrapedurl:
item_local.url = scrapedurl
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language)) #Preparamos título de Torrent
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip() #Quitamos etiquetas vacías
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title).strip() #Quitamos colores vacíos
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.alive = "??" #Calidad del link sin verificar
item_local.action = "play" #Visualizar vídeo
item_local.server = "torrent" #Seridor Torrent

2
plugin.video.alfa/channels/locopelis.py Executable file → Normal file
View File

@@ -355,7 +355,7 @@ def findvideos(item):
new_url = get_link(get_source(item.url))
new_url = get_link(get_source(new_url))
video_id = scrapertools.find_single_match(new_url, 'http.*?h=(\w+)')
new_url = '%s%s' % (host, 'playeropstream/api.php')
new_url = '%s%s' % (host.replace('.com','.tv'), 'playeropstream/api.php')
post = {'h': video_id}
post = urllib.urlencode(post)
data = httptools.downloadpage(new_url, post=post).data

View File

@@ -0,0 +1,12 @@
{
"id": "maxipelis24",
"name": "Maxipelis24",
"active": true,
"adult": false,
"language": ["lat"],
"thumbnail": "maxipelis24.png",
"banner": "",
"categories": [
"movie"
]
}

View File

@@ -0,0 +1,125 @@
# -*- coding: utf-8 -*-
import re
import urlparse
import urllib
from core import servertools
from core import httptools
from core import scrapertools
from core.item import Item
from platformcode import config, logger
from channelselector import get_thumb
host="http://maxipelis24.com"
def mainlist(item):
logger.info()
itemlist = []
itemlist.append(Item(channel=item.channel, title="peliculas", action="movies", url=host, thumbnail=get_thumb('movies', auto=True)))
itemlist.append(Item(channel=item.channel, action="category", title="Año de Estreno", url=host, cat='year', thumbnail=get_thumb('year', auto=True)))
itemlist.append(Item(channel=item.channel, action="category", title="Géneros", url=host, cat='genre', thumbnail=get_thumb('genres', auto=True)))
itemlist.append(Item(channel=item.channel, action="category", title="Calidad", url=host, cat='quality', thumbnail=get_thumb("quality", auto=True)))
itemlist.append(Item(channel=item.channel, title="Buscar", action="search", url=host+"?s=", thumbnail=get_thumb("search", auto=True)))
return itemlist
def search(item, texto):
logger.info()
texto = texto.replace(" ", "+")
item.url = host + "?s=" + texto
if texto != '':
return movies(item)
def category(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","", data)
if item.cat == 'genre':
data = scrapertools.find_single_match(data, '<h3>Géneros.*?</div>')
patron = '<a href="([^"]+)">([^<]+)<'
elif item.cat == 'year':
data = scrapertools.find_single_match(data, '<h3>Año de estreno.*?</div>')
patron = 'li><a href="([^"]+)">([^<]+).*?<'
elif item.cat == 'quality':
data = scrapertools.find_single_match(data, '<h3>Calidad.*?</div>')
patron = 'li><a href="([^"]+)">([^<]+)<'
matches = re.compile(patron, re.DOTALL).findall(data)
for scrapedurl , scrapedtitle in matches:
itemlist.append(Item(channel=item.channel, action='movies', title=scrapedtitle, url=scrapedurl, type='cat', first=0))
return itemlist
def movies(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;","", data)
patron = '<div id="mt.+?href="([^"]+)".+?'
patron += '<img src="([^"]+)" alt="([^"]+)".+?'
patron += '<span class="imdb">.*?>([^<]+)<.*?'
patron += '<span class="ttx">([^<]+).*?'
patron += 'class="year">([^<]+).+?class="calidad2">([^<]+)<'
matches = re.compile(patron, re.DOTALL).findall(data)
for scrapedurl, img, scrapedtitle, ranking, resto, year, quality in matches:
plot = scrapertools.htmlclean(resto).strip()
title = '%s [COLOR yellow](%s)[/COLOR] [COLOR red][%s][/COLOR]'% (scrapedtitle, ranking, quality)
itemlist.append(Item(channel=item.channel,
title=title,
url=scrapedurl,
action="findvideos",
plot=plot,
thumbnail=img,
contentTitle = scrapedtitle,
contentType = "movie",
quality=quality))
#Paginacion
next_page = '<div class="pag_.*?href="([^"]+)">Siguiente<'
matches = re.compile(next_page, re.DOTALL).findall(data)
if matches:
url = urlparse.urljoin(item.url, matches[0])
itemlist.append(Item(channel=item.channel, action = "movies", title = "Página siguiente >>",url = url))
return itemlist
def findvideos(item):
logger.info()
itemlist=[]
data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data, '<div id="contenedor">(.*?)</div></div></div>')
# Busca los enlaces a los videos
listavideos = servertools.findvideos(data)
for video in listavideos:
videotitle = scrapertools.unescape(video[0])
url = video[1]
server = video[2]
itemlist.append(Item(channel=item.channel, action="play", server=server, title=videotitle, url=url,
thumbnail=item.thumbnail, plot=item.plot, fulltitle=item.title, folder=False))
# Opción "Añadir esta película a la biblioteca de KODI"
if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos':
itemlist.append(
Item(channel=item.channel,
title='[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]',
url=item.url,
action="add_pelicula_to_library",
extra="findvideos",
contentTitle=item.contentTitle,
thumbnail=item.thumbnail
))
return itemlist

View File

@@ -845,18 +845,21 @@ def findvideos(item):
# Poner la calidad, si es necesario
if not item_local.quality:
item_local.quality = ''
if scrapertools.find_single_match(data, '<b>Formato:<\/b>&\w+;\s?([^<]+)<br>'):
item_local.quality = scrapertools.find_single_match(data, '<b>Formato:<\/b>&\w+;\s?([^<]+)<br>')
elif "hdtv" in item_local.url.lower() or "720p" in item_local.url.lower() or "1080p" in item_local.url.lower() or "4k" in item_local.url.lower():
item_local.quality = scrapertools.find_single_match(item_local.url, '.*?_([H|7|1|4].*?)\.torrent')
item_local.quality = item_local.quality.replace("_", " ")
# Extrae el tamaño del vídeo
if scrapertools.find_single_match(data, '<b>Tama.*?:<\/b>&\w+;\s?([^<]+B)<?'):
size = scrapertools.find_single_match(data, '<b>Tama.*?:<\/b>&\w+;\s?([^<]+B)<?')
else:
size = scrapertools.find_single_match(item_local.url, '(\d{1,3},\d{1,2}?\w+)\.torrent')
size = size.upper().replace(".", ",").replace("G", " G ").replace("M", " M ") #sustituimos . por , porque Unify lo borra
if not size:
size = generictools.get_torrent_size(item_local.url) #Buscamos el tamaño en el .torrent
if size:
item_local.title = re.sub('\s\[\d+,?\d*?\s\w[b|B]\]', '', item_local.title) #Quitamos size de título, si lo traía
item_local.title = '%s [%s]' % (item_local.title, size) #Agregamos size al final del título
@@ -866,8 +869,15 @@ def findvideos(item):
#Ahora pintamos el link del Torrent, si lo hay
if item_local.url: # Hay Torrent ?
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (item_local.quality, str(item_local.language)) #Preparamos título de Torrent
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title) #Quitamos etiquetas vacías
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title) #Quitamos colores vacíos
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.alive = "??" #Calidad del link sin verificar
item_local.action = "play" #Visualizar vídeo
item_local.server = "torrent" #Seridor Torrent

View File

@@ -1368,12 +1368,14 @@ def findvideos(item):
size = scrapertools.find_single_match(data, '<div class="fichas-box"><div class="entry-right"><div style="[^"]+"><span class="[^"]+"><strong>Size:<\/strong>?\s(\d+?\.?\d*?\s\w[b|B])<\/span>')
size = size.replace(".", ",") #sustituimos . por , porque Unify lo borra
if not size:
size = scrapertools.find_single_match(item.quality, '\s\[(\d+,?\d*?\s\w[b|B])\]')
size = scrapertools.find_single_match(item.quality, '\s\[(\d+,?\d*?\s\w\s?[b|B])\]')
if not size:
size = generictools.get_torrent_size(item.url) #Buscamos el tamaño en el .torrent
if size:
item.title = re.sub(r'\s\[\d+,?\d*?\s\w[b|B]\]', '', item.title) #Quitamos size de título, si lo traía
item.title = '%s [%s]' % (item.title, size) #Agregamos size al final del título
size = size.replace('GB', 'G B').replace('Gb', 'G b').replace('MB', 'M B').replace('Mb', 'M b')
item.quality = re.sub(r'\s\[\d+,?\d*?\s\w[b|B]\]', '', item.quality) #Quitamos size de calidad, si lo traía
item.quality = re.sub(r'\s\[\d+,?\d*?\s\w\s?[b|B]\]', '', item.quality) #Quitamos size de calidad, si lo traía
#Llamamos al método para crear el título general del vídeo, con toda la información obtenida de TMDB
item, itemlist = generictools.post_tmdb_findvideos(item, itemlist)
@@ -1399,8 +1401,15 @@ def findvideos(item):
else:
quality = item_local.quality
item_local.title = '[COLOR yellow][?][/COLOR] [COLOR yellow][Torrent][/COLOR] [COLOR limegreen][%s][/COLOR] [COLOR red]%s[/COLOR]' % (quality, str(item_local.language)) #Preparamos título de Torrent
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip() #Quitamos etiquetas vacías
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title).strip() #Quitamos colores vacíos
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', quality)
quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', quality)
quality = quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.alive = "??" #Calidad del link sin verificar
item_local.action = "play" #Visualizar vídeo
item_local.server = "torrent" #Servidor
@@ -1485,9 +1494,15 @@ def findvideos(item):
item_local.action = "play"
item_local.server = servidor
item_local.url = enlace
item_local.title = item_local.title.replace("[]", "").strip()
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip()
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', item_local.title).strip()
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
itemlist.append(item_local.clone())
except:
@@ -1582,9 +1597,16 @@ def findvideos(item):
item_local.action = "play"
item_local.server = servidor
item_local.url = enlace
item_local.title = parte_title.replace("[]", "").strip()
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip()
item_local.title = re.sub(r'\[COLOR \w+\]-\[\/COLOR\]', '', item_local.title).strip()
item_local.title = parte_title.strip()
#Preparamos título y calidad, quitamos etiquetas vacías
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.title)
item_local.title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.title)
item_local.title = item_local.title.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', item_local.quality)
item_local.quality = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', item_local.quality)
item_local.quality = item_local.quality.replace("--", "").replace("[]", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
itemlist.append(item_local.clone())
except:

View File

@@ -89,6 +89,7 @@ def search(item, texto):
logger.info()
texto = texto.replace(" ", "+")
item.url = host + "/search/%s" % texto
if item.contentType == '': item.contentType = 'movie'
try:
return scraper(item)
# Se captura la excepción, para no interrumpir al buscador global si un canal falla

2
plugin.video.alfa/channels/pelisipad.py Executable file → Normal file
View File

@@ -519,6 +519,7 @@ def findvideos(item):
if item.video_urls:
import random
import base64
item.video_urls.sort(key=lambda it: (it[1], random.random()), reverse=True)
i = 0
actual_quality = ""
@@ -534,6 +535,7 @@ def findvideos(item):
title += " [COLOR green]Mirror %s[/COLOR] - %s" % (str(i + 1), item.fulltitle)
url = vid % "%s" % base64.b64decode("dHQ9MTQ4MDE5MDQ1MSZtbT1NRzZkclhFand6QmVzbmxSMHNZYXhBJmJiPUUwb1dVVVgx"
"WTBCQTdhWENpeU9paUE=")
url += '|User-Agent=%s' % httptools.get_user_agent
itemlist.append(item.clone(title=title, action="play", url=url, video_urls=""))
i += 1

View File

@@ -356,7 +356,7 @@ def get_links_by_language(item, data):
patron = 'data-source=(.*?)data.*?srt=(.*?)data-iframe.*?Opci.*?<.*?hidden>[^\(]\((.*?)\)'
matches = re.compile(patron, re.DOTALL).findall(data)
if language in IDIOMAS:
language == IDIOMAS[language]
language = IDIOMAS[language]
for url, sub, quality in matches:
if 'http' not in url:
@@ -403,7 +403,7 @@ def findvideos(item):
i.quality) )
# Requerido para FilterTools
itemlist = filtertools.get_links(video_list, item, list_language)
video_list = filtertools.get_links(video_list, item, list_language)
# Requerido para AutoPlay

View File

@@ -9,16 +9,16 @@ from channelselector import get_thumb
from channels import autoplay
from channels import filtertools
from core import httptools
from core import jsontools
from core import scrapertools
from core import servertools
from core import tmdb
from core.item import Item
from platformcode import config, logger
from lib import jsunpack
from platformcode import config, logger, platformtools
idio = {'es-mx': 'LAT','es-es': 'ESP','en': 'VO'}
cali = {'poor': 'SD','low': 'SD','high': 'HD'}
cali = {'poor': 'SD','low': 'SD','medium': 'HD','high': 'HD'}
list_language = idio.values()
list_quality = ["SD","HD"]
@@ -44,9 +44,17 @@ def mainlist(item):
itemlist.append(Item(channel = item.channel, title = "Por género", action = "generos", url = host, extra = "Genero", thumbnail = get_thumb("genres", auto = True) ))
itemlist.append(Item(channel = item.channel, title = ""))
itemlist.append(Item(channel = item.channel, title = "Buscar", action = "search", url = host + "/search?term=", thumbnail = get_thumb("search", auto = True)))
itemlist.append(item.clone(title="Configurar canal...", text_color="gold", action="configuracion", folder=False))
autoplay.show_option(item.channel, itemlist)
return itemlist
def configuracion(item):
ret = platformtools.show_channel_settings()
platformtools.itemlist_refresh()
return ret
def destacadas(item):
logger.info()
itemlist = []
@@ -178,12 +186,10 @@ def findvideos(item):
dict = jsontools.load(bloque)
urlx = httptools.downloadpage(host + dict[0]["url"]) #Para que pueda saltar el cloudflare, se tiene que descargar la página completa
for datos in dict:
url1 = httptools.downloadpage(host + datos["url"], follow_redirects=False, only_headers=True).headers.get("location", "")
titulo = "Ver en: %s (" + cali[datos["quality"]] + ") (" + idio[datos["audio"]] + ")"
text_color = "white"
if "youtube" in url1:
titulo = "Ver trailer: %s"
text_color = "yellow"
url1 = datos["url"]
hostname = scrapertools.find_single_match(datos["hostname"].replace("www.",""), "(.*?)\.")
if hostname == "my": hostname = "mailru"
titulo = "Ver en: " + hostname.capitalize() + " (" + cali[datos["quality"]] + ") (" + idio[datos["audio"]] + ")"
itemlist.append(
item.clone(channel = item.channel,
action = "play",
@@ -192,7 +198,6 @@ def findvideos(item):
title = titulo,
url = url1
))
itemlist = servertools.get_servers_itemlist(itemlist, lambda i: i.title % i.server.capitalize())
itemlist.sort(key=lambda it: (it.language, it.server))
tmdb.set_infoLabels(itemlist, __modo_grafico__)
# Requerido para FilterTools
@@ -217,5 +222,11 @@ def findvideos(item):
def play(item):
item.thumbnail = item.contentThumbnail
return [item]
itemlist = []
url1 = httptools.downloadpage(host + item.url, follow_redirects=False, only_headers=True).headers.get("location", "")
if "storage" in url1:
url1 = scrapertools.find_single_match(url1, "src=(.*mp4)").replace("%3A",":").replace("%2F","/")
itemlist.append(item.clone(url=url1))
itemlist = servertools.get_servers_itemlist(itemlist)
itemlist[0].thumbnail = item.contentThumbnail
return itemlist

View File

@@ -143,6 +143,85 @@ def settings(item):
def setting_channel(item):
if config.get_platform(True)['num_version'] >= 17.0: # A partir de Kodi 16 se puede usar multiselect, y de 17 con preselect
return setting_channel_new(item)
else:
return setting_channel_old(item)
def setting_channel_new(item):
import channelselector, xbmcgui
from core import channeltools
# Cargar lista de opciones (canales activos del usuario y que permitan búsqueda global)
# ------------------------
lista = []; ids = []; lista_lang = []
channels_list = channelselector.filterchannels('all')
for channel in channels_list:
channel_parameters = channeltools.get_channel_parameters(channel.channel)
# No incluir si en la configuracion del canal no existe "include_in_global_search"
if not channel_parameters['include_in_global_search']:
continue
lbl = '%s' % channel_parameters['language']
lbl += ' %s' % ', '.join(config.get_localized_category(categ) for categ in channel_parameters['categories'])
it = xbmcgui.ListItem(channel.title, lbl)
it.setArt({ 'thumb': channel.thumbnail, 'fanart': channel.fanart })
lista.append(it)
ids.append(channel.channel)
lista_lang.append(channel_parameters['language'])
# Diálogo para pre-seleccionar
# ----------------------------
preselecciones_std = ['Modificar selección actual', 'Modificar partiendo de Todos', 'Modificar partiendo de Ninguno', 'Modificar partiendo de Castellano', 'Modificar partiendo de Latino']
if item.action == 'setting_channel':
# Configuración de los canales incluídos en la búsqueda
preselecciones = preselecciones_std
presel_values = [1, 2, 3, 4, 5]
else:
# Llamada desde "buscar en otros canales" (se puede saltar la selección e ir directo a la búsqueda)
preselecciones = ['Buscar con la selección actual'] + preselecciones_std
presel_values = [0, 1, 2, 3, 4, 5]
ret = platformtools.dialog_select(config.get_localized_string(59994), preselecciones)
if ret == -1: return False # pedido cancel
if presel_values[ret] == 0: return True # continuar sin modificar
elif presel_values[ret] == 3: preselect = []
elif presel_values[ret] == 2: preselect = range(len(ids))
elif presel_values[ret] in [4, 5]:
busca = 'cast' if presel_values[ret] == 4 else 'lat'
preselect = []
for i, lg in enumerate(lista_lang):
if busca in lg or '*' in lg:
preselect.append(i)
else:
preselect = []
for i, canal in enumerate(ids):
channel_status = config.get_setting('include_in_global_search', canal)
if channel_status:
preselect.append(i)
# Diálogo para seleccionar
# ------------------------
ret = xbmcgui.Dialog().multiselect(config.get_localized_string(59994), lista, preselect=preselect, useDetails=True)
if ret == None: return False # pedido cancel
seleccionados = [ids[i] for i in ret]
# Guardar cambios en canales para la búsqueda
# -------------------------------------------
for canal in ids:
channel_status = config.get_setting('include_in_global_search', canal)
if channel_status is None: channel_status = True
if channel_status and canal not in seleccionados:
config.set_setting('include_in_global_search', False, canal)
elif not channel_status and canal in seleccionados:
config.set_setting('include_in_global_search', True, canal)
return True
def setting_channel_old(item):
channels_path = os.path.join(config.get_runtime_path(), "channels", '*.json')
channel_language = config.get_setting("channel_language", default="all")
@@ -204,6 +283,7 @@ def save_settings(item, dict_values):
config.set_setting("include_in_global_search", dict_values[v], v)
progreso.close()
return True
def cb_custom_button(item, dict_values):
@@ -354,8 +434,8 @@ def do_search(item, categories=None):
categories = ["Películas"]
setting_item = Item(channel=item.channel, title=config.get_localized_string(59994), folder=False,
thumbnail=get_thumb("search.png"))
setting_channel(setting_item)
if not setting_channel(setting_item):
return False
if categories is None:
categories = []
@@ -474,8 +554,8 @@ def do_search(item, categories=None):
# es compatible tanto con versiones antiguas de python como nuevas
if multithread:
pendent = [a for a in threads if a.isAlive()]
t = float(100) / len(pendent)
while pendent:
if len(pendent) > 0: t = float(100) / len(pendent)
while len(pendent) > 0:
index = (len(threads) - len(pendent)) + 1
percentage = int(math.ceil(index * t))

View File

@@ -1,7 +1,7 @@
{
"id": "seriecanal",
"name": "Seriecanal",
"active": true,
"active": false,
"adult": false,
"language": ["cast"],
"thumbnail": "http://i.imgur.com/EwMK8Yd.png",

View File

@@ -4,12 +4,14 @@ import re
import urllib
import urlparse
from core import httptools
from core import scrapertools
from core import servertools
from core import tmdb
from platformcode import config, logger
__modo_grafico__ = config.get_setting('modo_grafico', "seriecanal")
__perfil__ = config.get_setting('perfil', "descargasmix")
__perfil__ = config.get_setting('perfil', "seriecanal")
# Fijar perfil de color
perfil = [['0xFFFFE6CC', '0xFFFFCE9C', '0xFF994D00'],
@@ -17,23 +19,21 @@ perfil = [['0xFFFFE6CC', '0xFFFFCE9C', '0xFF994D00'],
['0xFF58D3F7', '0xFF2E9AFE', '0xFF2E64FE']]
color1, color2, color3 = perfil[__perfil__]
URL_BASE = "http://www.seriecanal.com/"
host = "https://www.seriecanal.com/"
def login():
logger.info()
data = scrapertools.downloadpage(URL_BASE)
data = httptools.downloadpage(host).data
if "Cerrar Sesion" in data:
return True, ""
usuario = config.get_setting("user", "seriecanal")
password = config.get_setting("password", "seriecanal")
if usuario == "" or password == "":
return False, 'Regístrate en www.seriecanal.com e introduce tus datos en "Configurar Canal"'
else:
post = urllib.urlencode({'username': usuario, 'password': password})
data = scrapertools.downloadpage("http://www.seriecanal.com/index.php?page=member&do=login&tarea=acceder",
post=post)
data = httptools.downloadpage(host + "index.php?page=member&do=login&tarea=acceder", post=post).data
if "Bienvenid@, se ha identificado correctamente en nuestro sistema" in data:
return True, ""
else:
@@ -44,18 +44,15 @@ def mainlist(item):
logger.info()
itemlist = []
item.text_color = color1
result, message = login()
if result:
itemlist.append(item.clone(action="series", title="Últimos episodios", url=URL_BASE))
itemlist.append(item.clone(action="series", title="Últimos episodios", url=host))
itemlist.append(item.clone(action="genero", title="Series por género"))
itemlist.append(item.clone(action="alfabetico", title="Series por orden alfabético"))
itemlist.append(item.clone(action="search", title="Buscar..."))
else:
itemlist.append(item.clone(action="", title=message, text_color="red"))
itemlist.append(item.clone(action="configuracion", title="Configurar canal...", text_color="gold", folder=False))
return itemlist
@@ -68,7 +65,7 @@ def configuracion(item):
def search(item, texto):
logger.info()
item.url = "http://www.seriecanal.com/index.php?page=portada&do=category&method=post&category_id=0&order=" \
item.url = host + "index.php?page=portada&do=category&method=post&category_id=0&order=" \
"C_Create&view=thumb&pgs=1&p2=1"
try:
post = "keyserie=" + texto
@@ -85,27 +82,24 @@ def search(item, texto):
def genero(item):
logger.info()
itemlist = []
data = scrapertools.downloadpage(URL_BASE)
data = httptools.downloadpage(host).data
data = scrapertools.find_single_match(data, '<ul class="tag-cloud">(.*?)</ul>')
matches = scrapertools.find_multiple_matches(data, '<a.*?href="([^"]+)">([^"]+)</a>')
for scrapedurl, scrapedtitle in matches:
scrapedtitle = scrapedtitle.capitalize()
url = urlparse.urljoin(URL_BASE, scrapedurl)
url = urlparse.urljoin(host, scrapedurl)
itemlist.append(item.clone(action="series", title=scrapedtitle, url=url))
return itemlist
def alfabetico(item):
logger.info()
itemlist = []
data = scrapertools.downloadpage(URL_BASE)
data = httptools.downloadpage(host).data
data = scrapertools.find_single_match(data, '<ul class="pagination pagination-sm" style="margin:5px 0;">(.*?)</ul>')
matches = scrapertools.find_multiple_matches(data, '<a.*?href="([^"]+)">([^"]+)</a>')
for scrapedurl, scrapedtitle in matches:
url = urlparse.urljoin(URL_BASE, scrapedurl)
url = urlparse.urljoin(host, scrapedurl)
itemlist.append(item.clone(action="series", title=scrapedtitle, url=url))
return itemlist
@@ -115,45 +109,38 @@ def series(item):
itemlist = []
item.infoLabels = {}
item.text_color = color2
if item.extra != "":
data = scrapertools.downloadpage(item.url, post=item.extra)
data = httptools.downloadpage(item.url, post=item.extra).data
else:
data = scrapertools.downloadpage(item.url)
data = httptools.downloadpage(item.url).data
data = re.sub(r"\n|\r|\t|\s{2}|&nbsp;", "", data)
patron = '<div class="item-inner" style="margin: 0 20px 0px 0\;"><img src="([^"]+)".*?' \
'href="([^"]+)" title="Click para Acceder a la Ficha(?:\|([^"]+)|)".*?' \
'<strong>([^"]+)</strong></a>.*?<strong>([^"]+)</strong></p>.*?' \
'<p class="text-warning".*?\;">(.*?)</p>'
matches = scrapertools.find_multiple_matches(data, patron)
for scrapedthumbnail, scrapedurl, scrapedplot, scrapedtitle, scrapedtemp, scrapedepi in matches:
title = scrapedtitle + " - " + scrapedtemp + " - " + scrapedepi
url = urlparse.urljoin(URL_BASE, scrapedurl)
temporada = scrapertools.find_single_match(scrapedtemp, "(\d+)")
new_item = item.clone()
new_item.contentType = "tvshow"
url = urlparse.urljoin(host, scrapedurl)
temporada = scrapertools.find_single_match(scrapedtemp, "\d+")
episode = scrapertools.find_single_match(scrapedepi, "\d+")
#item.contentType = "tvshow"
if temporada != "":
new_item.infoLabels['season'] = temporada
new_item.contentType = "season"
logger.debug("title=[" + title + "], url=[" + url + "], thumbnail=[" + scrapedthumbnail + "]")
itemlist.append(new_item.clone(action="findvideos", title=title, fulltitle=scrapedtitle, url=url,
thumbnail=scrapedthumbnail, plot=scrapedplot, contentTitle=scrapedtitle,
context=["buscar_trailer"], show=scrapedtitle))
try:
from core import tmdb
tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
except:
pass
item.infoLabels['season'] = temporada
#item.contentType = "season"
if episode != "":
item.infoLabels['episode'] = episode
#item.contentType = "episode"
itemlist.append(item.clone(action="findvideos", title=title, url=url,
contentSerieName=scrapedtitle,
context=["buscar_trailer"]))
tmdb.set_infoLabels(itemlist)
# Extra marca siguiente página
next_page = scrapertools.find_single_match(data, '<a href="([^"]+)" (?:onclick="return false;" |)title='
'"Página Siguiente"')
if next_page != "/":
url = urlparse.urljoin(URL_BASE, next_page)
url = urlparse.urljoin(host, next_page)
itemlist.append(item.clone(action="series", title=">> Siguiente", url=url, text_color=color3))
return itemlist
@@ -163,10 +150,8 @@ def findvideos(item):
logger.info()
itemlist = []
item.text_color = color3
data = scrapertools.downloadpage(item.url)
data = httptools.downloadpage(item.url).data
data = scrapertools.decodeHtmlentities(data)
# Busca en la seccion descarga/torrent
data_download = scrapertools.find_single_match(data, '<th>Episodio - Enlaces de Descarga</th>(.*?)</table>')
patron = '<p class="item_name".*?<a href="([^"]+)".*?>([^"]+)</a>'
@@ -178,18 +163,15 @@ def findvideos(item):
else:
scrapedtitle = "[Torrent] " + scrapedepi
scrapedtitle = scrapertools.htmlclean(scrapedtitle)
new_item.infoLabels['episode'] = scrapertools.find_single_match(scrapedtitle, "Episodio (\d+)")
logger.debug("title=[" + scrapedtitle + "], url=[" + scrapedurl + "]")
itemlist.append(new_item.clone(action="play", title=scrapedtitle, url=scrapedurl, server="torrent",
contentType="episode"))
# Busca en la seccion online
data_online = scrapertools.find_single_match(data, "<th>Enlaces de Visionado Online</th>(.*?)</table>")
patron = '<a href="([^"]+)\\n.*?src="([^"]+)".*?' \
'title="Enlace de Visionado Online">([^"]+)</a>'
matches = scrapertools.find_multiple_matches(data_online, patron)
for scrapedurl, scrapedthumb, scrapedtitle in matches:
# Deshecha enlaces de trailers
scrapedtitle = scrapertools.htmlclean(scrapedtitle)
@@ -200,7 +182,6 @@ def findvideos(item):
new_item.infoLabels['episode'] = scrapertools.find_single_match(scrapedtitle, "Episodio (\d+)")
itemlist.append(new_item.clone(action="play", title=title, url=scrapedurl, contentType="episode"))
# Comprueba si hay otras temporadas
if not "No hay disponible ninguna Temporada adicional" in data:
data_temp = scrapertools.find_single_match(data, '<div class="panel panel-success">(.*?)</table>')
@@ -210,7 +191,7 @@ def findvideos(item):
matches = scrapertools.find_multiple_matches(data_temp, patron)
for scrapedurl, scrapedtitle in matches:
new_item = item.clone()
url = urlparse.urljoin(URL_BASE, scrapedurl)
url = urlparse.urljoin(host, scrapedurl)
scrapedtitle = scrapedtitle.capitalize()
temporada = scrapertools.find_single_match(scrapedtitle, "Temporada (\d+)")
if temporada != "":
@@ -218,13 +199,7 @@ def findvideos(item):
new_item.infoLabels['episode'] = ""
itemlist.append(new_item.clone(action="findvideos", title=scrapedtitle, url=url, text_color="red",
contentType="season"))
try:
from core import tmdb
tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
except:
pass
tmdb.set_infoLabels_itemlist(itemlist, __modo_grafico__)
new_item = item.clone()
if config.is_xbmc():
new_item.contextual = True
@@ -236,7 +211,6 @@ def findvideos(item):
def play(item):
logger.info()
itemlist = []
if item.extra == "torrent":
itemlist.append(item.clone())
else:

View File

@@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
import re
import urlparse
from core import httptools
from core import scrapertools
from core import servertools
@@ -12,10 +9,8 @@ from platformcode import logger
host = 'http://www.sipeliculas.com'
def mainlist(item):
logger.info()
itemlist = []
itemlist.append(item.clone(title="Novedades", action="lista", url=host + "/cartelera/"))
itemlist.append(item.clone(title="Actualizadas", action="lista", url=host + "/peliculas-actualizadas/"))
@@ -24,7 +19,6 @@ def mainlist(item):
itemlist.append(item.clone(title="Año", action="menuseccion", url=host, extra="/estrenos-gratis/"))
itemlist.append(item.clone(title="Alfabetico", action="alfabetica", url=host + '/mirar/'))
itemlist.append(item.clone(title="Buscar", action="search", url=host + "/ver/"))
return itemlist
@@ -33,7 +27,6 @@ def alfabetica(item):
itemlist = []
for letra in "1abcdefghijklmnopqrstuvwxyz":
itemlist.append(item.clone(title=letra.upper(), url=item.url + letra, action="lista"))
return itemlist
@@ -42,7 +35,6 @@ def menuseccion(item):
itemlist = []
seccion = item.extra
data = httptools.downloadpage(item.url).data
if seccion == '/online/':
data = scrapertools.find_single_match(data,
'<h2 class="[^"]+"><i class="[^"]+"></i>Películas por géneros<u class="[^"]+"></u></h2>(.*?)<ul class="abc">')
@@ -50,8 +42,7 @@ def menuseccion(item):
elif seccion == '/estrenos-gratis/':
data = scrapertools.find_single_match(data, '<ul class="lista-anio" id="lista-anio">(.*?)</ul>')
patron = '<li ><a href="([^"]+)" title="[^"]+">([^<]+)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(data)
matches = scrapertools.find_multiple_matches(data, patron)
for scrapedurl, extra in matches:
itemlist.append(Item(channel=item.channel, action='lista', title=extra, url=scrapedurl))
return itemlist
@@ -64,22 +55,19 @@ def lista(item):
listado = scrapertools.find_single_match(data,
'<div id="sipeliculas" class="borde"><div class="izquierda">(.*?)<div class="derecha"><h2')
patron = '<a class="i" href="(.*?)".*?src="(.*?)".*?title=.*?>(.*?)<.*?span>(.*?)<.*?<p><span>(.*?)<'
matches = re.compile(patron, re.DOTALL).findall(listado)
matches = scrapertools.find_multiple_matches(listado, patron)
for scrapedurl, scrapedthumbnail, scrapedtitle, year, plot in matches:
itemlist.append(Item(channel=item.channel, action='findvideos', title=scrapedtitle, url=scrapedurl,
thumbnail=scrapedthumbnail, plot=plot, contentTitle=scrapedtitle, extra=item.extra,
itemlist.append(Item(channel=item.channel, action='findvideos', title=scrapedtitle + " (%s)" %year, url=scrapedurl,
thumbnail=scrapedthumbnail, contentTitle=scrapedtitle, extra=item.extra,
infoLabels ={'year':year}))
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
# Paginacion
if itemlist != []:
patron = '<li[^<]+<a href="([^"]+)" title="[^"]+">Siguiente[^<]+</a></li>'
matches = re.compile(patron, re.DOTALL).findall(data)
matches = scrapertools.find_multiple_matches(data, patron)
if matches:
itemlist.append(
item.clone(title="Pagina Siguiente", action='lista', url=urlparse.urljoin(host, matches[0])))
item.clone(title="Pagina Siguiente", action='lista', url=host + "/" + matches[0]))
return itemlist
@@ -97,11 +85,10 @@ def findvideos(item):
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
listado1 = scrapertools.find_single_match(data,
'<div class="links" id="ver-mas-opciones"><h2 class="h2"><i class="[^"]+"></i>[^<]+</h2><ul class="opciones">(.*?)</ul>')
patron1 = '<li ><a id="([^"]+)" rel="nofollow" href="([^"]+)" title="[^"]+" alt="([^"]+)"><span class="opcion"><i class="[^"]+"></i><u>[^<]+</u>[^<]+</span><span class="ico"><img src="[^"]+" alt="[^"]+"/>[^<]+</span><span>([^"]+)</span><span>([^"]+)</span></a></li>'
matches = matches = re.compile(patron1, re.DOTALL).findall(listado1)
matches = matches = scrapertools.find_multiple_matches(listado1, patron1)
for vidId, vidUrl, vidServer, language, quality in matches:
server = servertools.get_server_name(vidServer)
if 'Sub' in language:
@@ -109,39 +96,32 @@ def findvideos(item):
itemlist.append(Item(channel=item.channel, action='play', url=vidUrl, extra=vidId,
title='Ver en ' + vidServer + ' | ' + language + ' | ' + quality,
thumbnail=item.thumbnail, server=server, language=language, quality=quality ))
listado2 = scrapertools.find_single_match(data, '<ul class="opciones-tab">(.*?)</ul>')
patron2 = '<li ><a id="([^"]+)" rel="nofollow" href="([^"]+)" title="[^"]+" alt="([^"]+)"><img src="[^"]+" alt="[^"]+"/>[^<]+</a></li>'
matches = matches = re.compile(patron2, re.DOTALL).findall(listado2)
matches = matches = scrapertools.find_multiple_matches(listado2, patron2)
for vidId, vidUrl, vidServer in matches:
server = servertools.get_server_name(vidServer)
itemlist.append(Item(channel=item.channel, action='play', url=vidUrl, extra=vidId, title='Ver en ' + vidServer,
thumbnail=item.thumbnail, server=server))
for videoitem in itemlist:
videoitem.fulltitle = item.title
videoitem.folder = False
return itemlist
def play(item):
logger.info()
itemlist = []
video = httptools.downloadpage(host + '/ajax.public.php', 'acc=ver_opc&f=' + item.extra).data
logger.info("video=" + video)
enlaces = servertools.findvideos(video)
if enlaces:
logger.info("server=" + enlaces[0][2])
thumbnail = servertools.guess_server_thumbnail(video)
# Añade al listado de XBMC
data = httptools.downloadpage(item.url).data
video = scrapertools.find_single_match(data, '</div><iframe src="([^"]+)')
if video:
itemlist.append(
Item(channel=item.channel, action="play", title=item.title, fulltitle=item.fulltitle, url=enlaces[0][1],
server=enlaces[0][2], thumbnail=thumbnail, folder=False))
item.clone(action="play", url=video, folder=False, server=""))
itemlist = servertools.get_servers_itemlist(itemlist)
itemlist[0].thumbnail = item.contentThumbnail
return itemlist
def newest(categoria):
logger.info()
itemlist = []
@@ -155,16 +135,13 @@ def newest(categoria):
item.url = host + "/online/terror/"
else:
return []
itemlist = lista(item)
if itemlist[-1].title == "» Siguiente »":
itemlist.pop()
# Se captura la excepción, para no interrumpir al canal novedades si un canal falla
except:
import sys
for line in sys.exc_info():
logger.error("{0}".format(line))
return []
return itemlist

View File

@@ -620,7 +620,7 @@ def play(item):
data['a']['tt']) + \
"&mm=" + data['a']['mm'] + "&bb=" + data['a']['bb']
url += "|User-Agent=Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Mobile Safari/537.36"
url += "|User-Agent=%s" % httptools.get_user_agent
itemlist.append(item.clone(action="play", server="directo", url=url, folder=False))

14
plugin.video.alfa/channels/ultrapeliculashd.json Executable file → Normal file
View File

@@ -19,6 +19,20 @@
"enabled": true,
"visible": true
},
{
"id": "filter_languages",
"type": "list",
"label": "Mostrar enlaces en idioma...",
"default": 0,
"enabled": true,
"visible": true,
"lvalues": [
"No filtrar",
"LAT",
"CAST",
"VOSE"
]
},
{
"id": "include_in_newest_latino",
"type": "bool",

View File

@@ -8,6 +8,7 @@ from core import servertools
from core import jsontools
from core import tmdb
from core.item import Item
from channels import filtertools, autoplay
from platformcode import config, logger
host = 'http://www.ultrapeliculashd.com'
@@ -63,39 +64,51 @@ tcalidad = {'1080P': 'https://s21.postimg.cc/4h1s0t1wn/hd1080.png',
'720P': 'https://s12.postimg.cc/lthu7v4q5/hd720.png', "HD": "https://s27.postimg.cc/m2dhhkrur/image.png"}
IDIOMAS = {'Latino': 'LAT', 'Español': 'CAST', 'SUB':'VOSE'}
list_language = IDIOMAS.values()
list_quality = ['default', '1080p']
list_servers = ['openload','directo']
__comprueba_enlaces__ = config.get_setting('comprueba_enlaces', 'ultrapeliculashd')
__comprueba_enlaces_num__ = config.get_setting('comprueba_enlaces_num', 'ultrapeliculashd')
def mainlist(item):
logger.info()
autoplay.init(item.channel, list_servers, list_quality)
itemlist = []
itemlist.append(item.clone(title="Todas",
action="lista",
thumbnail='https://s18.postimg.cc/fwvaeo6qh/todas.png',
fanart='https://s18.postimg.cc/fwvaeo6qh/todas.png',
url=host + '/movies/'
))
itemlist.append(Item(channel=item.channel, title="Todas",
action="lista",
thumbnail='https://s18.postimg.cc/fwvaeo6qh/todas.png',
fanart='https://s18.postimg.cc/fwvaeo6qh/todas.png',
url=host + '/movies/'
))
itemlist.append(item.clone(title="Generos",
action="generos",
url=host,
thumbnail='https://s3.postimg.cc/5s9jg2wtf/generos.png',
fanart='https://s3.postimg.cc/5s9jg2wtf/generos.png'
))
itemlist.append(Item(channel=item.channel, title="Generos",
action="generos",
url=host,
thumbnail='https://s3.postimg.cc/5s9jg2wtf/generos.png',
fanart='https://s3.postimg.cc/5s9jg2wtf/generos.png'
))
itemlist.append(item.clone(title="Alfabetico",
action="seccion",
url=host,
thumbnail='https://s17.postimg.cc/fwi1y99en/a-z.png',
fanart='https://s17.postimg.cc/fwi1y99en/a-z.png',
extra='alfabetico'
))
itemlist.append(Item(channel=item.channel, title="Alfabetico",
action="seccion",
url=host,
thumbnail='https://s17.postimg.cc/fwi1y99en/a-z.png',
fanart='https://s17.postimg.cc/fwi1y99en/a-z.png',
extra='alfabetico'
))
itemlist.append(item.clone(title="Buscar",
action="search",
url=host + '/?s=',
thumbnail='https://s30.postimg.cc/pei7txpa9/buscar.png',
fanart='https://s30.postimg.cc/pei7txpa9/buscar.png'
))
itemlist.append(Item(channel=item.channel, title="Buscar",
action="search",
url=host + '/?s=',
thumbnail='https://s30.postimg.cc/pei7txpa9/buscar.png',
fanart='https://s30.postimg.cc/pei7txpa9/buscar.png'
))
autoplay.show_option(item.channel, itemlist)
return itemlist
@@ -160,13 +173,13 @@ def generos(item):
title = scrapedtitle
url = scrapedurl
if scrapedtitle not in ['PRÓXIMAMENTE', 'EN CINE']:
itemlist.append(item.clone(action="lista",
title=title,
fulltitle=item.title,
url=url,
thumbnail=thumbnail,
fanart=fanart
))
itemlist.append(Item(channel=item.channel, action="lista",
title=title,
fulltitle=item.title,
url=url,
thumbnail=thumbnail,
fanart=fanart
))
return itemlist
@@ -209,15 +222,33 @@ def alpha(item):
def findvideos(item):
from lib import jsunpack
logger.info()
itemlist = []
data = httptools.downloadpage(item.url).data
data = re.sub(r'"|\n|\r|\t|&nbsp;|<br>|\s{2,}', "", data)
patron = '<iframe.*?rptss src=(.*?) (?:width.*?|frameborder.*?) allowfullscreen><\/iframe>'
patron = '<div id=(option.*?) class=play.*?<iframe.*?'
patron += 'rptss src=(.*?) (?:width.*?|frameborder.*?) allowfullscreen><\/iframe>'
matches = re.compile(patron, re.DOTALL).findall(data)
for video_url in matches:
if 'stream' in video_url and 'streamango' not in video_url:
for option, video_url in matches:
language = scrapertools.find_single_match(data, '#%s>.*?-->(.*?)(?:\s|<)' % option)
if 'sub' in language.lower():
language = 'SUB'
language = IDIOMAS[language]
if 'ultrapeliculashd' in video_url:
new_data = httptools.downloadpage(video_url).data
new_data = re.sub(r'"|\n|\r|\t|&nbsp;|<br>|\s{2,}', "", new_data)
if 'drive' not in video_url:
quality= '1080p'
packed = scrapertools.find_single_match(new_data, '<script>(eval\(.*?)eval')
unpacked = jsunpack.unpack(packed)
url = scrapertools.find_single_match(unpacked, 'file:(http.?:.*?)\}')
else:
quality= '1080p'
url = scrapertools.find_single_match(new_data, '</div><iframe src=([^\s]+) webkitallowfullscreen')
elif 'stream' in video_url and 'streamango' not in video_url:
data = httptools.downloadpage('https:'+video_url).data
if not 'iframe' in video_url:
new_url=scrapertools.find_single_match(data, 'iframe src="(.*?)"')
@@ -233,26 +264,42 @@ def findvideos(item):
url = url.replace('download', 'preview')+headers_string
sub = scrapertools.find_single_match(new_data, 'file:.*?"(.*?srt)"')
new_item = (Item(title=item.title, url=url, quality=quality, subtitle=sub, server='directo'))
new_item = (Item(title=item.title, url=url, quality=quality, subtitle=sub, server='directo',
language = language))
itemlist.append(new_item)
else:
itemlist.extend(servertools.find_video_items(data=video_url))
url = video_url
quality = 'default'
for videoitem in itemlist:
videoitem.channel = item.channel
videoitem.action = 'play'
videoitem.thumbnail = item.thumbnail
videoitem.infoLabels = item.infoLabels
videoitem.title = item.contentTitle + ' (' + videoitem.server + ')'
if 'youtube' in videoitem.url:
videoitem.title = '[COLOR orange]Trailer en Youtube[/COLOR]'
if not config.get_setting("unify"):
title = ' [%s] [%s]' % (quality, language)
else:
title = ''
itemlist = servertools.get_servers_itemlist(itemlist)
new_item = (Item(channel=item.channel, title='%s'+title, url=url, action='play', quality=quality,
language=language, infoLabels=item.infoLabels))
itemlist.append(new_item)
itemlist = servertools.get_servers_itemlist(itemlist, lambda i: i.title % i.server.capitalize())
if __comprueba_enlaces__:
itemlist = servertools.check_list_links(itemlist, __comprueba_enlaces_num__)
# Requerido para FilterTools
itemlist = filtertools.get_links(itemlist, item, list_language)
# Requerido para AutoPlay
autoplay.start(itemlist, item)
if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos':
itemlist.append(
Item(channel=item.channel, title='[COLOR yellow]Añadir esta pelicula a la videoteca[/COLOR]', url=item.url,
action="add_pelicula_to_library", extra="findvideos", contentTitle=item.contentTitle))
return itemlist

View File

@@ -221,7 +221,7 @@ def findvideos(item):
language = ''
if 'latino' in link.lower():
language='Latino'
elif 'español' in link.lower():
elif 'espaÑol' in link.lower():
language = 'Español'
elif 'subtitulado' in link.lower():
language = 'VOSE'

View File

@@ -1,24 +0,0 @@
{
"id": "zentorrents",
"name": "Zentorrent",
"active": false,
"adult": false,
"language": ["cast"],
"banner": "zentorrents.png",
"thumbnail": "http://s6.postimg.cc/9zv90yjip/zentorrentlogo.jpg",
"categories": [
"torrent",
"movie",
"tvshow"
],
"settings": [
{
"id": "include_in_global_search",
"type": "bool",
"label": "Incluir en busqueda global",
"default": true,
"enabled": true,
"visible": true
}
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -5,28 +5,59 @@
"adult": false,
"language": ["cast", "lat"],
"banner": "",
"thumbnail": "https://zonatorrent.org/wp-content/uploads/2017/04/zonatorrent-New-Logo.png",
"thumbnail": "zonatorrent.png",
"version": 1,
"categories": [
"torrent",
"movie"
"torrent",
"movie",
"tvshow",
"vos"
],
"settings": [
{
"id": "include_in_global_search",
"type": "bool",
"label": "Incluir en busqueda global",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "modo_grafico",
"type": "bool",
"label": "Buscar información extra",
"default": true,
"enabled": true,
"visible": true
{
"id": "include_in_global_search",
"type": "bool",
"label": "Incluir en busqueda global",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "modo_grafico",
"type": "bool",
"label": "Buscar información extra",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "timeout_downloadpage",
"type": "list",
"label": "Timeout (segs.) en descarga de páginas o verificación de servidores",
"default": 5,
"enabled": true,
"visible": true,
"lvalues": [
"None",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10"
]
},
{
"id": "seleccionar_ult_temporadda_activa",
"type": "bool",
"label": "Seleccionar para Videoteca si estará activa solo la última Temporada",
"default": true,
"enabled": true,
"visible": true
},
{
"id": "include_in_newest_peliculas",

File diff suppressed because it is too large Load Diff

View File

@@ -24,9 +24,9 @@ def getmainlist(view="thumb_"):
thumbnail=get_thumb("channels.png", view), view=view,
category=config.get_localized_string(30119), viewmode="thumbnails"))
itemlist.append(Item(title='Mis enlaces', channel="alfavorites", action="mainlist",
thumbnail=get_thumb("favorites.png", view), view=view,
category='Mis enlaces', viewmode="thumbnails"))
itemlist.append(Item(title=config.get_localized_string(70527), channel="alfavorites", action="mainlist",
thumbnail=get_thumb("mylink.png", view), view=view,
category=config.get_localized_string(70527), viewmode="thumbnails"))
itemlist.append(Item(title=config.get_localized_string(30103), channel="search", action="mainlist",
thumbnail=get_thumb("search.png", view),
@@ -197,7 +197,7 @@ def filterchannels(category, view="thumb_"):
thumbnail=channel_parameters["thumbnail"], type="generic", viewmode="list"))
if category in ['movie', 'tvshow']:
titles = [config.get_localized_string(70028), config.get_localized_string(30985), config.get_localized_string(70527), config.get_localized_string(60264), config.get_localized_string(70528)]
titles = [config.get_localized_string(70028), config.get_localized_string(30985), config.get_localized_string(70559), config.get_localized_string(60264), config.get_localized_string(70560)]
ids = ['popular', 'top_rated', 'now_playing', 'on_the_air']
for x in range(0,3):
if x == 2 and category != 'movie':
@@ -267,4 +267,4 @@ def set_channel_info(parameters):
content = config.get_localized_category(cat)
info = '[COLOR yellow]Tipo de contenido:[/COLOR] %s\n\n[COLOR yellow]Idiomas:[/COLOR] %s' % (content, language)
return info
return info

3
plugin.video.alfa/core/httptools.py Executable file → Normal file
View File

@@ -56,6 +56,9 @@ default_headers["Accept-Encoding"] = "gzip"
HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT = config.get_setting('httptools_timeout', default=15)
if HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT == 0: HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT = None
def get_user_agent():
# Devuelve el user agent global para ser utilizado cuando es necesario para la url.
return default_headers["User-Agent"]
def get_url_headers(url):
domain_cookies = cj._cookies.get("." + urlparse.urlparse(url)[1], {}).get("/", {})

View File

@@ -319,7 +319,7 @@ def set_infoLabels_item(item, seekTmdb=True, idioma_busqueda='es', lock=None):
__leer_datos(otmdb_global)
if lock:
if lock and lock.locked():
lock.release()
if item.infoLabels['episode']:

View File

@@ -8,6 +8,7 @@
# ------------------------------------------------------------
import re
import os
import sys
import urllib
import urlparse
@@ -236,8 +237,7 @@ def post_tmdb_listado(item, itemlist):
del item.channel_alt
if item.url_alt:
del item.url_alt
if item.extra2:
del item.extra2
#Ajustamos el nombre de la categoría
if not item.category_new:
item.category_new = ''
@@ -389,8 +389,8 @@ def post_tmdb_listado(item, itemlist):
if item_local.infoLabels['episodio_titulo']:
item_local.infoLabels['episodio_titulo'] = item_local.infoLabels['episodio_titulo'].replace(" []", "").strip()
title = title.replace("--", "").replace(" []", "").replace("()", "").replace("(/)", "").replace("[/]", "").strip()
title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', title).strip()
title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', title).strip()
title = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', title).strip()
title = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', title).strip()
if item.category_new == "newest": #Viene de Novedades. Marcamos el título con el nombre del canal
title += ' -%s-' % scrapertools.find_single_match(item_local.url, 'http.?\:\/\/(?:www.)?(\w+)\.\w+\/').capitalize()
@@ -766,6 +766,7 @@ def post_tmdb_episodios(item, itemlist):
#Si no está el título del episodio, pero sí está en "title", lo rescatamos
if not item_local.infoLabels['episodio_titulo'] and item_local.infoLabels['title'].lower() != item_local.infoLabels['tvshowtitle'].lower():
item_local.infoLabels['episodio_titulo'] = item_local.infoLabels['title']
item_local.infoLabels['episodio_titulo'] = item_local.infoLabels['episodio_titulo'].replace('GB', 'G B').replace('MB', 'M B')
#Preparamos el título para que sea compatible con Añadir Serie a Videoteca
if "Temporada" in item_local.title: #Compatibilizamos "Temporada" con Unify
@@ -792,8 +793,8 @@ def post_tmdb_episodios(item, itemlist):
item_local.infoLabels['episodio_titulo'] = item_local.infoLabels['episodio_titulo'].replace(" []", "").strip()
item_local.infoLabels['title'] = item_local.infoLabels['title'].replace(" []", "").strip()
item_local.title = item_local.title.replace(" []", "").strip()
item_local.title = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', item_local.title).strip()
item_local.title = re.sub(r'\s\[COLOR \w+\]-\[\/COLOR\]', '', item_local.title).strip()
item_local.title = re.sub(r'\s?\[COLOR \w+\]\[\[?-?\s?\]?\]\[\/COLOR\]', '', item_local.title).strip()
item_local.title = re.sub(r'\s?\[COLOR \w+\]-?\s?\[\/COLOR\]', '', item_local.title).strip()
#Si la información de num. total de episodios de TMDB no es correcta, tratamos de calcularla
if num_episodios < item_local.contentEpisodeNumber:
@@ -1054,8 +1055,8 @@ def post_tmdb_findvideos(item, itemlist):
title_gen = item.title
#Limpiamos etiquetas vacías
title_gen = re.sub(r'\s\[COLOR \w+\]\[\[?\]?\]\[\/COLOR\]', '', title_gen).strip() #Quitamos etiquetas vacías
title_gen = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', title_gen).strip() #Quitamos colores vacíos
title_gen = re.sub(r'\s?\[COLOR \w+\]\[\[?\s?\]?\]\[\/COLOR\]', '', title_gen).strip() #Quitamos etiquetas vacías
title_gen = re.sub(r'\s?\[COLOR \w+\]\s?\[\/COLOR\]', '', title_gen).strip() #Quitamos colores vacíos
title_gen = title_gen.replace(" []", "").strip() #Quitamos etiquetas vacías
title_videoteca = title_gen #Salvamos el título para Videoteca
@@ -1103,7 +1104,131 @@ def post_tmdb_findvideos(item, itemlist):
return (item, itemlist)
def get_torrent_size(url):
logger.info()
"""
Módulo extraido del antiguo canal ZenTorrent
Calcula el tamaño de los archivos que contienen un .torrent. Descarga el archivo .torrent en una carpeta,
lo lee y descodifica. Si contiene múltiples archivos, suma el tamaño de todos ellos
Llamada: generictools.get_torrent_size(url)
Entrada: url: url del archivo .torrent
Salida: size: str con el tamaño y tipo de medida ( MB, GB, etc)
"""
def convert_size(size):
import math
if (size == 0):
return '0B'
size_name = ("B", "KB", "M B", "G B", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size, 1024)))
p = math.pow(1024, i)
s = round(size / p, 2)
return '%s %s' % (s, size_name[i])
def decode(text):
try:
src = tokenize(text)
data = decode_item(src.next, src.next())
for token in src: # look for more tokens
raise SyntaxError("trailing junk")
except (AttributeError, ValueError, StopIteration):
try:
data = data
except:
data = src
return data
def tokenize(text, match=re.compile("([idel])|(\d+):|(-?\d+)").match):
i = 0
while i < len(text):
m = match(text, i)
s = m.group(m.lastindex)
i = m.end()
if m.lastindex == 2:
yield "s"
yield text[i:i + int(s)]
i = i + int(s)
else:
yield s
def decode_item(next, token):
if token == "i":
# integer: "i" value "e"
data = int(next())
if next() != "e":
raise ValueError
elif token == "s":
# string: "s" value (virtual tokens)
data = next()
elif token == "l" or token == "d":
# container: "l" (or "d") values "e"
data = []
tok = next()
while tok != "e":
data.append(decode_item(next, tok))
tok = next()
if token == "d":
data = dict(zip(data[0::2], data[1::2]))
else:
raise ValueError
return data
#Móludo principal
size = ""
try:
torrents_path = config.get_videolibrary_path() + '/torrents' #path para dejar el .torrent
if not os.path.exists(torrents_path):
os.mkdir(torrents_path) #si no está la carpeta la creamos
urllib.URLopener.version = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 SE 2.X MetaSr 1.0'
urllib.urlretrieve(url, torrents_path + "/generictools.torrent") #desacargamos el .torrent a la carpeta
torrent_file = open(torrents_path + "/generictools.torrent", "rb").read() #leemos el .torrent
if "used CloudFlare" in torrent_file: #Si tiene CloudFlare, usamos este proceso
try:
urllib.urlretrieve("http://anonymouse.org/cgi-bin/anon-www.cgi/" + url.strip(),
torrents_path + "/generictools.torrent")
torrent_file = open(torrents_path + "/generictools.torrent", "rb").read()
except:
torrent_file = ""
torrent = decode(torrent_file) #decodificamos el .torrent
#si sólo tiene un archivo, tomamos la longitud y la convertimos a una unidad legible, si no dará error
try:
sizet = torrent["info"]['length']
size = convert_size(sizet)
except:
pass
#si tiene múltiples archivos sumamos la longitud de todos
if not size:
check_video = scrapertools.find_multiple_matches(str(torrent["info"]["files"]), "'length': (\d+)}")
sizet = sum([int(i) for i in check_video])
size = convert_size(sizet)
except:
logger.error('ERROR al buscar el tamaño de un .Torrent: ' + url)
try:
os.remove(torrents_path + "/generictools.torrent") #borramos el .torrent
except:
pass
#logger.debug(url + ' / ' + size)
return size
def get_field_from_kodi_DB(item, from_fields='*', files='file'):
logger.info()
"""

View File

@@ -1,211 +0,0 @@
__all__ = [
'AbstractBasicAuthHandler',
'AbstractDigestAuthHandler',
'BaseHandler',
'Browser',
'BrowserStateError',
'CacheFTPHandler',
'ContentTooShortError',
'Cookie',
'CookieJar',
'CookiePolicy',
'DefaultCookiePolicy',
'DefaultFactory',
'FTPHandler',
'Factory',
'FileCookieJar',
'FileHandler',
'FormNotFoundError',
'FormsFactory',
'HTTPBasicAuthHandler',
'HTTPCookieProcessor',
'HTTPDefaultErrorHandler',
'HTTPDigestAuthHandler',
'HTTPEquivProcessor',
'HTTPError',
'HTTPErrorProcessor',
'HTTPHandler',
'HTTPPasswordMgr',
'HTTPPasswordMgrWithDefaultRealm',
'HTTPProxyPasswordMgr',
'HTTPRedirectDebugProcessor',
'HTTPRedirectHandler',
'HTTPRefererProcessor',
'HTTPRefreshProcessor',
'HTTPResponseDebugProcessor',
'HTTPRobotRulesProcessor',
'HTTPSClientCertMgr',
'HeadParser',
'History',
'LWPCookieJar',
'Link',
'LinkNotFoundError',
'LinksFactory',
'LoadError',
'MSIECookieJar',
'MozillaCookieJar',
'OpenerDirector',
'OpenerFactory',
'ParseError',
'ProxyBasicAuthHandler',
'ProxyDigestAuthHandler',
'ProxyHandler',
'Request',
'RobotExclusionError',
'RobustFactory',
'RobustFormsFactory',
'RobustLinksFactory',
'RobustTitleFactory',
'SeekableResponseOpener',
'TitleFactory',
'URLError',
'USE_BARE_EXCEPT',
'UnknownHandler',
'UserAgent',
'UserAgentBase',
'XHTMLCompatibleHeadParser',
'__version__',
'build_opener',
'install_opener',
'lwp_cookie_str',
'make_response',
'request_host',
'response_seek_wrapper', # XXX deprecate in public interface?
'seek_wrapped_response', # XXX should probably use this internally in place of response_seek_wrapper()
'str2time',
'urlopen',
'urlretrieve',
'urljoin',
# ClientForm API
'AmbiguityError',
'ControlNotFoundError',
'FormParser',
'ItemCountError',
'ItemNotFoundError',
'LocateError',
'Missing',
'ParseFile',
'ParseFileEx',
'ParseResponse',
'ParseResponseEx',
'ParseString',
'XHTMLCompatibleFormParser',
# deprecated
'CheckboxControl',
'Control',
'FileControl',
'HTMLForm',
'HiddenControl',
'IgnoreControl',
'ImageControl',
'IsindexControl',
'Item',
'Label',
'ListControl',
'PasswordControl',
'RadioControl',
'ScalarControl',
'SelectControl',
'SubmitButtonControl',
'SubmitControl',
'TextControl',
'TextareaControl',
]
import logging
import sys
from _version import __version__
# high-level stateful browser-style interface
from _mechanize import \
Browser, History, \
BrowserStateError, LinkNotFoundError, FormNotFoundError
# configurable URL-opener interface
from _useragent import UserAgentBase, UserAgent
from _html import \
Link, \
Factory, DefaultFactory, RobustFactory, \
FormsFactory, LinksFactory, TitleFactory, \
RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
# urllib2 work-alike interface. This is a superset of the urllib2 interface.
from _urllib2 import *
import _urllib2
if hasattr(_urllib2, "HTTPSHandler"):
__all__.append("HTTPSHandler")
del _urllib2
# misc
from _http import HeadParser
from _http import XHTMLCompatibleHeadParser
from _opener import ContentTooShortError, OpenerFactory, urlretrieve
from _response import \
response_seek_wrapper, seek_wrapped_response, make_response
from _rfc3986 import urljoin
from _util import http2time as str2time
# cookies
from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \
effective_request_host
from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
# 2.4 raises SyntaxError due to generator / try/finally use
if sys.version_info[:2] > (2,4):
try:
import sqlite3
except ImportError:
pass
else:
from _firefox3cookiejar import Firefox3CookieJar
from _mozillacookiejar import MozillaCookieJar
from _msiecookiejar import MSIECookieJar
# forms
from _form import (
AmbiguityError,
ControlNotFoundError,
FormParser,
ItemCountError,
ItemNotFoundError,
LocateError,
Missing,
ParseError,
ParseFile,
ParseFileEx,
ParseResponse,
ParseResponseEx,
ParseString,
XHTMLCompatibleFormParser,
# deprecated
CheckboxControl,
Control,
FileControl,
HTMLForm,
HiddenControl,
IgnoreControl,
ImageControl,
IsindexControl,
Item,
Label,
ListControl,
PasswordControl,
RadioControl,
ScalarControl,
SelectControl,
SubmitButtonControl,
SubmitControl,
TextControl,
TextareaControl,
)
# If you hate the idea of turning bugs into warnings, do:
# import mechanize; mechanize.USE_BARE_EXCEPT = False
USE_BARE_EXCEPT = True
logger = logging.getLogger("mechanize")
if logger.level is logging.NOTSET:
logger.setLevel(logging.CRITICAL)
del logger

View File

@@ -1,68 +0,0 @@
"""HTTP Authentication and Proxy support.
Copyright 2006 John J. Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it under
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
included with the distribution).
"""
from _urllib2_fork import HTTPPasswordMgr
# TODO: stop deriving from HTTPPasswordMgr
class HTTPProxyPasswordMgr(HTTPPasswordMgr):
# has default realm and host/port
def add_password(self, realm, uri, user, passwd):
# uri could be a single URI or a sequence
if uri is None or isinstance(uri, basestring):
uris = [uri]
else:
uris = uri
passwd_by_domain = self.passwd.setdefault(realm, {})
for uri in uris:
for default_port in True, False:
reduced_uri = self.reduce_uri(uri, default_port)
passwd_by_domain[reduced_uri] = (user, passwd)
def find_user_password(self, realm, authuri):
attempts = [(realm, authuri), (None, authuri)]
# bleh, want default realm to take precedence over default
# URI/authority, hence this outer loop
for default_uri in False, True:
for realm, authuri in attempts:
authinfo_by_domain = self.passwd.get(realm, {})
for default_port in True, False:
reduced_authuri = self.reduce_uri(authuri, default_port)
for uri, authinfo in authinfo_by_domain.iteritems():
if uri is None and not default_uri:
continue
if self.is_suburi(uri, reduced_authuri):
return authinfo
user, password = None, None
if user is not None:
break
return user, password
def reduce_uri(self, uri, default_port=True):
if uri is None:
return None
return HTTPPasswordMgr.reduce_uri(self, uri, default_port)
def is_suburi(self, base, test):
if base is None:
# default to the proxy's host/port
hostport, path = test
base = (hostport, "/")
return HTTPPasswordMgr.is_suburi(self, base, test)
class HTTPSClientCertMgr(HTTPPasswordMgr):
# implementation inheritance: this is not a proper subclass
def add_key_cert(self, uri, key_file, cert_file):
self.add_password(None, uri, key_file, cert_file)
def find_key_cert(self, authuri):
return HTTPPasswordMgr.find_user_password(self, None, authuri)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,28 +0,0 @@
import logging
from _response import response_seek_wrapper
from _urllib2_fork import BaseHandler
class HTTPResponseDebugProcessor(BaseHandler):
handler_order = 900 # before redirections, after everything else
def http_response(self, request, response):
if not hasattr(response, "seek"):
response = response_seek_wrapper(response)
info = logging.getLogger("mechanize.http_responses").info
try:
info(response.read())
finally:
response.seek(0)
info("*****************************************************")
return response
https_response = http_response
class HTTPRedirectDebugProcessor(BaseHandler):
def http_request(self, request):
if hasattr(request, "redirect_dict"):
info = logging.getLogger("mechanize.http_redirects").info
info("redirecting to %s", request.get_full_url())
return request

View File

@@ -1,248 +0,0 @@
"""Firefox 3 "cookies.sqlite" cookie persistence.
Copyright 2008 John J Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import logging
import time
from _clientcookie import CookieJar, Cookie, MappingIterator
from _util import isstringlike, experimental
debug = logging.getLogger("mechanize.cookies").debug
class Firefox3CookieJar(CookieJar):
"""Firefox 3 cookie jar.
The cookies are stored in Firefox 3's "cookies.sqlite" format.
Constructor arguments:
filename: filename of cookies.sqlite (typically found at the top level
of a firefox profile directory)
autoconnect: as a convenience, connect to the SQLite cookies database at
Firefox3CookieJar construction time (default True)
policy: an object satisfying the mechanize.CookiePolicy interface
Note that this is NOT a FileCookieJar, and there are no .load(),
.save() or .restore() methods. The database is in sync with the
cookiejar object's state after each public method call.
Following Firefox's own behaviour, session cookies are never saved to
the database.
The file is created, and an sqlite database written to it, if it does
not already exist. The moz_cookies database table is created if it does
not already exist.
"""
# XXX
# handle DatabaseError exceptions
# add a FileCookieJar (explicit .save() / .revert() / .load() methods)
def __init__(self, filename, autoconnect=True, policy=None):
experimental("Firefox3CookieJar is experimental code")
CookieJar.__init__(self, policy)
if filename is not None and not isstringlike(filename):
raise ValueError("filename must be string-like")
self.filename = filename
self._conn = None
if autoconnect:
self.connect()
def connect(self):
import sqlite3 # not available in Python 2.4 stdlib
self._conn = sqlite3.connect(self.filename)
self._conn.isolation_level = "DEFERRED"
self._create_table_if_necessary()
def close(self):
self._conn.close()
def _transaction(self, func):
try:
cur = self._conn.cursor()
try:
result = func(cur)
finally:
cur.close()
except:
self._conn.rollback()
raise
else:
self._conn.commit()
return result
def _execute(self, query, params=()):
return self._transaction(lambda cur: cur.execute(query, params))
def _query(self, query, params=()):
# XXX should we bother with a transaction?
cur = self._conn.cursor()
try:
cur.execute(query, params)
return cur.fetchall()
finally:
cur.close()
def _create_table_if_necessary(self):
self._execute("""\
CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT,
value TEXT, host TEXT, path TEXT,expiry INTEGER,
lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""")
def _cookie_from_row(self, row):
(pk, name, value, domain, path, expires,
last_accessed, secure, http_only) = row
version = 0
domain = domain.encode("ascii", "ignore")
path = path.encode("ascii", "ignore")
name = name.encode("ascii", "ignore")
value = value.encode("ascii", "ignore")
secure = bool(secure)
# last_accessed isn't a cookie attribute, so isn't added to rest
rest = {}
if http_only:
rest["HttpOnly"] = None
if name == "":
name = value
value = None
initial_dot = domain.startswith(".")
domain_specified = initial_dot
discard = False
if expires == "":
expires = None
discard = True
return Cookie(version, name, value,
None, False,
domain, domain_specified, initial_dot,
path, False,
secure,
expires,
discard,
None,
None,
rest)
def clear(self, domain=None, path=None, name=None):
CookieJar.clear(self, domain, path, name)
where_parts = []
sql_params = []
if domain is not None:
where_parts.append("host = ?")
sql_params.append(domain)
if path is not None:
where_parts.append("path = ?")
sql_params.append(path)
if name is not None:
where_parts.append("name = ?")
sql_params.append(name)
where = " AND ".join(where_parts)
if where:
where = " WHERE " + where
def clear(cur):
cur.execute("DELETE FROM moz_cookies%s" % where,
tuple(sql_params))
self._transaction(clear)
def _row_from_cookie(self, cookie, cur):
expires = cookie.expires
if cookie.discard:
expires = ""
domain = unicode(cookie.domain)
path = unicode(cookie.path)
name = unicode(cookie.name)
value = unicode(cookie.value)
secure = bool(int(cookie.secure))
if value is None:
value = name
name = ""
last_accessed = int(time.time())
http_only = cookie.has_nonstandard_attr("HttpOnly")
query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""")
pk = query.fetchone()[0]
if pk is None:
pk = 1
return (pk, name, value, domain, path, expires,
last_accessed, secure, http_only)
def set_cookie(self, cookie):
if cookie.discard:
CookieJar.set_cookie(self, cookie)
return
def set_cookie(cur):
# XXX
# is this RFC 2965-correct?
# could this do an UPDATE instead?
row = self._row_from_cookie(cookie, cur)
name, unused, domain, path = row[1:5]
cur.execute("""\
DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""",
(domain, path, name))
cur.execute("""\
INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", row)
self._transaction(set_cookie)
def __iter__(self):
# session (non-persistent) cookies
for cookie in MappingIterator(self._cookies):
yield cookie
# persistent cookies
for row in self._query("""\
SELECT * FROM moz_cookies ORDER BY name, path, host"""):
yield self._cookie_from_row(row)
def _cookies_for_request(self, request):
session_cookies = CookieJar._cookies_for_request(self, request)
def get_cookies(cur):
query = cur.execute("SELECT host from moz_cookies")
domains = [row[0] for row in query.fetchall()]
cookies = []
for domain in domains:
cookies += self._persistent_cookies_for_domain(domain,
request, cur)
return cookies
persistent_coookies = self._transaction(get_cookies)
return session_cookies + persistent_coookies
def _persistent_cookies_for_domain(self, domain, request, cur):
cookies = []
if not self._policy.domain_return_ok(domain, request):
return []
debug("Checking %s for cookies to return", domain)
query = cur.execute("""\
SELECT * from moz_cookies WHERE host = ? ORDER BY path""",
(domain,))
cookies = [self._cookie_from_row(row) for row in query.fetchall()]
last_path = None
r = []
for cookie in cookies:
if (cookie.path != last_path and
not self._policy.path_return_ok(cookie.path, request)):
last_path = cookie.path
continue
if not self._policy.return_ok(cookie, request):
debug(" not returning cookie")
continue
debug(" it's a match")
r.append(cookie)
return r

File diff suppressed because it is too large Load Diff

View File

@@ -1,105 +0,0 @@
from cStringIO import StringIO
import _response
import _urllib2_fork
# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
class GzipConsumer:
def __init__(self, consumer):
self.__consumer = consumer
self.__decoder = None
self.__data = ""
def __getattr__(self, key):
return getattr(self.__consumer, key)
def feed(self, data):
if self.__decoder is None:
# check if we have a full gzip header
data = self.__data + data
try:
i = 10
flag = ord(data[3])
if flag & 4: # extra
x = ord(data[i]) + 256*ord(data[i+1])
i = i + 2 + x
if flag & 8: # filename
while ord(data[i]):
i = i + 1
i = i + 1
if flag & 16: # comment
while ord(data[i]):
i = i + 1
i = i + 1
if flag & 2: # crc
i = i + 2
if len(data) < i:
raise IndexError("not enough data")
if data[:3] != "\x1f\x8b\x08":
raise IOError("invalid gzip data")
data = data[i:]
except IndexError:
self.__data = data
return # need more data
import zlib
self.__data = ""
self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
data = self.__decoder.decompress(data)
if data:
self.__consumer.feed(data)
def close(self):
if self.__decoder:
data = self.__decoder.flush()
if data:
self.__consumer.feed(data)
self.__consumer.close()
# --------------------------------------------------------------------
# the rest of this module is John Lee's stupid code, not
# Fredrik's nice code :-)
class stupid_gzip_consumer:
def __init__(self): self.data = []
def feed(self, data): self.data.append(data)
class stupid_gzip_wrapper(_response.closeable_response):
def __init__(self, response):
self._response = response
c = stupid_gzip_consumer()
gzc = GzipConsumer(c)
gzc.feed(response.read())
self.__data = StringIO("".join(c.data))
def read(self, size=-1):
return self.__data.read(size)
def readline(self, size=-1):
return self.__data.readline(size)
def readlines(self, sizehint=-1):
return self.__data.readlines(sizehint)
def __getattr__(self, name):
# delegate unknown methods/attributes
return getattr(self._response, name)
class HTTPGzipProcessor(_urllib2_fork.BaseHandler):
handler_order = 200 # response processing before HTTPEquivProcessor
def http_request(self, request):
request.add_header("Accept-Encoding", "gzip")
return request
def http_response(self, request, response):
# post-process response
enc_hdrs = response.info().getheaders("Content-encoding")
for enc_hdr in enc_hdrs:
if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
return stupid_gzip_wrapper(response)
return response
https_response = http_response

View File

@@ -1,241 +0,0 @@
"""Utility functions for HTTP header value parsing and construction.
Copyright 1997-1998, Gisle Aas
Copyright 2002-2006, John J. Lee
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import os, re
from types import StringType
from types import UnicodeType
STRING_TYPES = StringType, UnicodeType
from _util import http2time
import _rfc3986
def is_html_file_extension(url, allow_xhtml):
ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
html_exts = [".htm", ".html"]
if allow_xhtml:
html_exts += [".xhtml"]
return ext in html_exts
def is_html(ct_headers, url, allow_xhtml=False):
"""
ct_headers: Sequence of Content-Type headers
url: Response URL
"""
if not ct_headers:
return is_html_file_extension(url, allow_xhtml)
headers = split_header_words(ct_headers)
if len(headers) < 1:
return is_html_file_extension(url, allow_xhtml)
first_header = headers[0]
first_parameter = first_header[0]
ct = first_parameter[0]
html_types = ["text/html"]
if allow_xhtml:
html_types += [
"text/xhtml", "text/xml",
"application/xml", "application/xhtml+xml",
]
return ct in html_types
def unmatched(match):
"""Return unmatched part of re.Match object."""
start, end = match.span(0)
return match.string[:start]+match.string[end:]
token_re = re.compile(r"^\s*([^=\s;,]+)")
quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
escape_re = re.compile(r"\\(.)")
def split_header_words(header_values):
r"""Parse header values into a list of lists containing key,value pairs.
The function knows how to deal with ",", ";" and "=" as well as quoted
values after "=". A list of space separated tokens are parsed as if they
were separated by ";".
If the header_values passed as argument contains multiple values, then they
are treated as if they were a single value separated by comma ",".
This means that this function is useful for parsing header fields that
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
the requirement for tokens).
headers = #header
header = (token | parameter) *( [";"] (token | parameter))
token = 1*<any CHAR except CTLs or separators>
separators = "(" | ")" | "<" | ">" | "@"
| "," | ";" | ":" | "\" | <">
| "/" | "[" | "]" | "?" | "="
| "{" | "}" | SP | HT
quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
qdtext = <any TEXT except <">>
quoted-pair = "\" CHAR
parameter = attribute "=" value
attribute = token
value = token | quoted-string
Each header is represented by a list of key/value pairs. The value for a
simple token (not part of a parameter) is None. Syntactically incorrect
headers will not necessarily be parsed as you would want.
This is easier to describe with some examples:
>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
>>> split_header_words(['text/html; charset="iso-8859-1"'])
[[('text/html', None), ('charset', 'iso-8859-1')]]
>>> split_header_words([r'Basic realm="\"foo\bar\""'])
[[('Basic', None), ('realm', '"foobar"')]]
"""
assert type(header_values) not in STRING_TYPES
result = []
for text in header_values:
orig_text = text
pairs = []
while text:
m = token_re.search(text)
if m:
text = unmatched(m)
name = m.group(1)
m = quoted_value_re.search(text)
if m: # quoted value
text = unmatched(m)
value = m.group(1)
value = escape_re.sub(r"\1", value)
else:
m = value_re.search(text)
if m: # unquoted value
text = unmatched(m)
value = m.group(1)
value = value.rstrip()
else:
# no value, a lone token
value = None
pairs.append((name, value))
elif text.lstrip().startswith(","):
# concatenated headers, as per RFC 2616 section 4.2
text = text.lstrip()[1:]
if pairs: result.append(pairs)
pairs = []
else:
# skip junk
non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
assert nr_junk_chars > 0, (
"split_header_words bug: '%s', '%s', %s" %
(orig_text, text, pairs))
text = non_junk
if pairs: result.append(pairs)
return result
join_escape_re = re.compile(r"([\"\\])")
def join_header_words(lists):
"""Do the inverse of the conversion done by split_header_words.
Takes a list of lists of (key, value) pairs and produces a single header
value. Attribute values are quoted if needed.
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
'text/plain; charset="iso-8859/1"'
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
'text/plain, charset="iso-8859/1"'
"""
headers = []
for pairs in lists:
attr = []
for k, v in pairs:
if v is not None:
if not re.search(r"^\w+$", v):
v = join_escape_re.sub(r"\\\1", v) # escape " and \
v = '"%s"' % v
if k is None: # Netscape cookies may have no name
k = v
else:
k = "%s=%s" % (k, v)
attr.append(k)
if attr: headers.append("; ".join(attr))
return ", ".join(headers)
def strip_quotes(text):
if text.startswith('"'):
text = text[1:]
if text.endswith('"'):
text = text[:-1]
return text
def parse_ns_headers(ns_headers):
"""Ad-hoc parser for Netscape protocol cookie-attributes.
The old Netscape cookie format for Set-Cookie can for instance contain
an unquoted "," in the expires field, so we have to use this ad-hoc
parser instead of split_header_words.
XXX This may not make the best possible effort to parse all the crap
that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
parser is probably better, so could do worse than following that if
this ever gives any trouble.
Currently, this is also used for parsing RFC 2109 cookies.
"""
known_attrs = ("expires", "domain", "path", "secure",
# RFC 2109 attrs (may turn up in Netscape cookies, too)
"version", "port", "max-age")
result = []
for ns_header in ns_headers:
pairs = []
version_set = False
params = re.split(r";\s*", ns_header)
for ii in range(len(params)):
param = params[ii]
param = param.rstrip()
if param == "": continue
if "=" not in param:
k, v = param, None
else:
k, v = re.split(r"\s*=\s*", param, 1)
k = k.lstrip()
if ii != 0:
lc = k.lower()
if lc in known_attrs:
k = lc
if k == "version":
# This is an RFC 2109 cookie.
v = strip_quotes(v)
version_set = True
if k == "expires":
# convert expires date to seconds since epoch
v = http2time(strip_quotes(v)) # None if invalid
pairs.append((k, v))
if pairs:
if not version_set:
pairs.append(("version", "0"))
result.append(pairs)
return result
def _test():
import doctest, _headersutil
return doctest.testmod(_headersutil)
if __name__ == "__main__":
_test()

View File

@@ -1,629 +0,0 @@
"""HTML handling.
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it under
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
included with the distribution).
"""
import codecs
import copy
import htmlentitydefs
import re
import _sgmllib_copy as sgmllib
import _beautifulsoup
import _form
from _headersutil import split_header_words, is_html as _is_html
import _request
import _rfc3986
DEFAULT_ENCODING = "latin-1"
COMPRESS_RE = re.compile(r"\s+")
class CachingGeneratorFunction(object):
"""Caching wrapper around a no-arguments iterable."""
def __init__(self, iterable):
self._cache = []
# wrap iterable to make it non-restartable (otherwise, repeated
# __call__ would give incorrect results)
self._iterator = iter(iterable)
def __call__(self):
cache = self._cache
for item in cache:
yield item
for item in self._iterator:
cache.append(item)
yield item
class EncodingFinder:
def __init__(self, default_encoding):
self._default_encoding = default_encoding
def encoding(self, response):
# HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
# headers may be in the response. HTTP-EQUIV headers come last,
# so try in order from first to last.
for ct in response.info().getheaders("content-type"):
for k, v in split_header_words([ct])[0]:
if k == "charset":
encoding = v
try:
codecs.lookup(v)
except LookupError:
continue
else:
return encoding
return self._default_encoding
class ResponseTypeFinder:
def __init__(self, allow_xhtml):
self._allow_xhtml = allow_xhtml
def is_html(self, response, encoding):
ct_hdrs = response.info().getheaders("content-type")
url = response.geturl()
# XXX encoding
return _is_html(ct_hdrs, url, self._allow_xhtml)
class Args(object):
# idea for this argument-processing trick is from Peter Otten
def __init__(self, args_map):
self.__dict__["dictionary"] = dict(args_map)
def __getattr__(self, key):
try:
return self.dictionary[key]
except KeyError:
return getattr(self.__class__, key)
def __setattr__(self, key, value):
if key == "dictionary":
raise AttributeError()
self.dictionary[key] = value
def form_parser_args(
select_default=False,
form_parser_class=None,
request_class=None,
backwards_compat=False,
):
return Args(locals())
class Link:
def __init__(self, base_url, url, text, tag, attrs):
assert None not in [url, tag, attrs]
self.base_url = base_url
self.absolute_url = _rfc3986.urljoin(base_url, url)
self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
def __cmp__(self, other):
try:
for name in "url", "text", "tag", "attrs":
if getattr(self, name) != getattr(other, name):
return -1
except AttributeError:
return -1
return 0
def __repr__(self):
return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
self.base_url, self.url, self.text, self.tag, self.attrs)
class LinksFactory:
def __init__(self,
link_parser_class=None,
link_class=Link,
urltags=None,
):
import _pullparser
if link_parser_class is None:
link_parser_class = _pullparser.TolerantPullParser
self.link_parser_class = link_parser_class
self.link_class = link_class
if urltags is None:
urltags = {
"a": "href",
"area": "href",
"frame": "src",
"iframe": "src",
}
self.urltags = urltags
self._response = None
self._encoding = None
def set_response(self, response, base_url, encoding):
self._response = response
self._encoding = encoding
self._base_url = base_url
def links(self):
"""Return an iterator that provides links of the document."""
response = self._response
encoding = self._encoding
base_url = self._base_url
p = self.link_parser_class(response, encoding=encoding)
try:
for token in p.tags(*(self.urltags.keys()+["base"])):
if token.type == "endtag":
continue
if token.data == "base":
base_href = dict(token.attrs).get("href")
if base_href is not None:
base_url = base_href
continue
attrs = dict(token.attrs)
tag = token.data
text = None
# XXX use attr_encoding for ref'd doc if that doc does not
# provide one by other means
#attr_encoding = attrs.get("charset")
url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL?
if not url:
# Probably an <A NAME="blah"> link or <AREA NOHREF...>.
# For our purposes a link is something with a URL, so
# ignore this.
continue
url = _rfc3986.clean_url(url, encoding)
if tag == "a":
if token.type != "startendtag":
# hmm, this'd break if end tag is missing
text = p.get_compressed_text(("endtag", tag))
# but this doesn't work for e.g.
# <a href="blah"><b>Andy</b></a>
#text = p.get_compressed_text()
yield Link(base_url, url, text, tag, token.attrs)
except sgmllib.SGMLParseError, exc:
raise _form.ParseError(exc)
class FormsFactory:
"""Makes a sequence of objects satisfying HTMLForm interface.
After calling .forms(), the .global_form attribute is a form object
containing all controls not a descendant of any FORM element.
For constructor argument docs, see ParseResponse argument docs.
"""
def __init__(self,
select_default=False,
form_parser_class=None,
request_class=None,
backwards_compat=False,
):
self.select_default = select_default
if form_parser_class is None:
form_parser_class = _form.FormParser
self.form_parser_class = form_parser_class
if request_class is None:
request_class = _request.Request
self.request_class = request_class
self.backwards_compat = backwards_compat
self._response = None
self.encoding = None
self.global_form = None
def set_response(self, response, encoding):
self._response = response
self.encoding = encoding
self.global_form = None
def forms(self):
encoding = self.encoding
forms = _form.ParseResponseEx(
self._response,
select_default=self.select_default,
form_parser_class=self.form_parser_class,
request_class=self.request_class,
encoding=encoding,
_urljoin=_rfc3986.urljoin,
_urlparse=_rfc3986.urlsplit,
_urlunparse=_rfc3986.urlunsplit,
)
self.global_form = forms[0]
return forms[1:]
class TitleFactory:
def __init__(self):
self._response = self._encoding = None
def set_response(self, response, encoding):
self._response = response
self._encoding = encoding
def _get_title_text(self, parser):
import _pullparser
text = []
tok = None
while 1:
try:
tok = parser.get_token()
except _pullparser.NoMoreTokensError:
break
if tok.type == "data":
text.append(str(tok))
elif tok.type == "entityref":
t = unescape("&%s;" % tok.data,
parser._entitydefs, parser.encoding)
text.append(t)
elif tok.type == "charref":
t = unescape_charref(tok.data, parser.encoding)
text.append(t)
elif tok.type in ["starttag", "endtag", "startendtag"]:
tag_name = tok.data
if tok.type == "endtag" and tag_name == "title":
break
text.append(str(tok))
return COMPRESS_RE.sub(" ", "".join(text).strip())
def title(self):
import _pullparser
p = _pullparser.TolerantPullParser(
self._response, encoding=self._encoding)
try:
try:
p.get_tag("title")
except _pullparser.NoMoreTokensError:
return None
else:
return self._get_title_text(p)
except sgmllib.SGMLParseError, exc:
raise _form.ParseError(exc)
def unescape(data, entities, encoding):
if data is None or "&" not in data:
return data
def replace_entities(match):
ent = match.group()
if ent[1] == "#":
return unescape_charref(ent[2:-1], encoding)
repl = entities.get(ent[1:-1])
if repl is not None:
repl = unichr(repl)
if type(repl) != type(""):
try:
repl = repl.encode(encoding)
except UnicodeError:
repl = ent
else:
repl = ent
return repl
return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
def unescape_charref(data, encoding):
name, base = data, 10
if name.startswith("x"):
name, base= name[1:], 16
uc = unichr(int(name, base))
if encoding is None:
return uc
else:
try:
repl = uc.encode(encoding)
except UnicodeError:
repl = "&#%s;" % data
return repl
class MechanizeBs(_beautifulsoup.BeautifulSoup):
_entitydefs = htmlentitydefs.name2codepoint
# don't want the magic Microsoft-char workaround
PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
lambda(x):x.group(1) + ' />'),
(re.compile('<!\s+([^<>]*)>'),
lambda(x):'<!' + x.group(1) + '>')
]
def __init__(self, encoding, text=None, avoidParserProblems=True,
initialTextIsEverything=True):
self._encoding = encoding
_beautifulsoup.BeautifulSoup.__init__(
self, text, avoidParserProblems, initialTextIsEverything)
def handle_charref(self, ref):
t = unescape("&#%s;"%ref, self._entitydefs, self._encoding)
self.handle_data(t)
def handle_entityref(self, ref):
t = unescape("&%s;"%ref, self._entitydefs, self._encoding)
self.handle_data(t)
def unescape_attrs(self, attrs):
escaped_attrs = []
for key, val in attrs:
val = unescape(val, self._entitydefs, self._encoding)
escaped_attrs.append((key, val))
return escaped_attrs
class RobustLinksFactory:
compress_re = COMPRESS_RE
def __init__(self,
link_parser_class=None,
link_class=Link,
urltags=None,
):
if link_parser_class is None:
link_parser_class = MechanizeBs
self.link_parser_class = link_parser_class
self.link_class = link_class
if urltags is None:
urltags = {
"a": "href",
"area": "href",
"frame": "src",
"iframe": "src",
}
self.urltags = urltags
self._bs = None
self._encoding = None
self._base_url = None
def set_soup(self, soup, base_url, encoding):
self._bs = soup
self._base_url = base_url
self._encoding = encoding
def links(self):
bs = self._bs
base_url = self._base_url
encoding = self._encoding
for ch in bs.recursiveChildGenerator():
if (isinstance(ch, _beautifulsoup.Tag) and
ch.name in self.urltags.keys()+["base"]):
link = ch
attrs = bs.unescape_attrs(link.attrs)
attrs_dict = dict(attrs)
if link.name == "base":
base_href = attrs_dict.get("href")
if base_href is not None:
base_url = base_href
continue
url_attr = self.urltags[link.name]
url = attrs_dict.get(url_attr)
if not url:
continue
url = _rfc3986.clean_url(url, encoding)
text = link.fetchText(lambda t: True)
if not text:
# follow _pullparser's weird behaviour rigidly
if link.name == "a":
text = ""
else:
text = None
else:
text = self.compress_re.sub(" ", " ".join(text).strip())
yield Link(base_url, url, text, link.name, attrs)
class RobustFormsFactory(FormsFactory):
def __init__(self, *args, **kwds):
args = form_parser_args(*args, **kwds)
if args.form_parser_class is None:
args.form_parser_class = _form.RobustFormParser
FormsFactory.__init__(self, **args.dictionary)
def set_response(self, response, encoding):
self._response = response
self.encoding = encoding
class RobustTitleFactory:
def __init__(self):
self._bs = self._encoding = None
def set_soup(self, soup, encoding):
self._bs = soup
self._encoding = encoding
def title(self):
title = self._bs.first("title")
if title == _beautifulsoup.Null:
return None
else:
inner_html = "".join([str(node) for node in title.contents])
return COMPRESS_RE.sub(" ", inner_html.strip())
class Factory:
"""Factory for forms, links, etc.
This interface may expand in future.
Public methods:
set_request_class(request_class)
set_response(response)
forms()
links()
Public attributes:
Note that accessing these attributes may raise ParseError.
encoding: string specifying the encoding of response if it contains a text
document (this value is left unspecified for documents that do not have
an encoding, e.g. an image file)
is_html: true if response contains an HTML document (XHTML may be
regarded as HTML too)
title: page title, or None if no title or not HTML
global_form: form object containing all controls that are not descendants
of any FORM element, or None if the forms_factory does not support
supplying a global form
"""
LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"]
def __init__(self, forms_factory, links_factory, title_factory,
encoding_finder=EncodingFinder(DEFAULT_ENCODING),
response_type_finder=ResponseTypeFinder(allow_xhtml=False),
):
"""
Pass keyword arguments only.
default_encoding: character encoding to use if encoding cannot be
determined (or guessed) from the response. You should turn on
HTTP-EQUIV handling if you want the best chance of getting this right
without resorting to this default. The default value of this
parameter (currently latin-1) may change in future.
"""
self._forms_factory = forms_factory
self._links_factory = links_factory
self._title_factory = title_factory
self._encoding_finder = encoding_finder
self._response_type_finder = response_type_finder
self.set_response(None)
def set_request_class(self, request_class):
"""Set request class (mechanize.Request by default).
HTMLForm instances returned by .forms() will return instances of this
class when .click()ed.
"""
self._forms_factory.request_class = request_class
def set_response(self, response):
"""Set response.
The response must either be None or implement the same interface as
objects returned by mechanize.urlopen().
"""
self._response = response
self._forms_genf = self._links_genf = None
self._get_title = None
for name in self.LAZY_ATTRS:
try:
delattr(self, name)
except AttributeError:
pass
def __getattr__(self, name):
if name not in self.LAZY_ATTRS:
return getattr(self.__class__, name)
if name == "encoding":
self.encoding = self._encoding_finder.encoding(
copy.copy(self._response))
return self.encoding
elif name == "is_html":
self.is_html = self._response_type_finder.is_html(
copy.copy(self._response), self.encoding)
return self.is_html
elif name == "title":
if self.is_html:
self.title = self._title_factory.title()
else:
self.title = None
return self.title
elif name == "global_form":
self.forms()
return self.global_form
def forms(self):
"""Return iterable over HTMLForm-like objects.
Raises mechanize.ParseError on failure.
"""
# this implementation sets .global_form as a side-effect, for benefit
# of __getattr__ impl
if self._forms_genf is None:
try:
self._forms_genf = CachingGeneratorFunction(
self._forms_factory.forms())
except: # XXXX define exception!
self.set_response(self._response)
raise
self.global_form = getattr(
self._forms_factory, "global_form", None)
return self._forms_genf()
def links(self):
"""Return iterable over mechanize.Link-like objects.
Raises mechanize.ParseError on failure.
"""
if self._links_genf is None:
try:
self._links_genf = CachingGeneratorFunction(
self._links_factory.links())
except: # XXXX define exception!
self.set_response(self._response)
raise
return self._links_genf()
class DefaultFactory(Factory):
"""Based on sgmllib."""
def __init__(self, i_want_broken_xhtml_support=False):
Factory.__init__(
self,
forms_factory=FormsFactory(),
links_factory=LinksFactory(),
title_factory=TitleFactory(),
response_type_finder=ResponseTypeFinder(
allow_xhtml=i_want_broken_xhtml_support),
)
def set_response(self, response):
Factory.set_response(self, response)
if response is not None:
self._forms_factory.set_response(
copy.copy(response), self.encoding)
self._links_factory.set_response(
copy.copy(response), response.geturl(), self.encoding)
self._title_factory.set_response(
copy.copy(response), self.encoding)
class RobustFactory(Factory):
"""Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is
DefaultFactory.
"""
def __init__(self, i_want_broken_xhtml_support=False,
soup_class=None):
Factory.__init__(
self,
forms_factory=RobustFormsFactory(),
links_factory=RobustLinksFactory(),
title_factory=RobustTitleFactory(),
response_type_finder=ResponseTypeFinder(
allow_xhtml=i_want_broken_xhtml_support),
)
if soup_class is None:
soup_class = MechanizeBs
self._soup_class = soup_class
def set_response(self, response):
Factory.set_response(self, response)
if response is not None:
data = response.read()
soup = self._soup_class(self.encoding, data)
self._forms_factory.set_response(
copy.copy(response), self.encoding)
self._links_factory.set_soup(
soup, response.geturl(), self.encoding)
self._title_factory.set_soup(soup, self.encoding)

View File

@@ -1,447 +0,0 @@
"""HTTP related handlers.
Note that some other HTTP handlers live in more specific modules: _auth.py,
_gzip.py, etc.
Copyright 2002-2006 John J Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import HTMLParser
from cStringIO import StringIO
import htmlentitydefs
import logging
import robotparser
import socket
import time
import _sgmllib_copy as sgmllib
from _urllib2_fork import HTTPError, BaseHandler
from _headersutil import is_html
from _html import unescape, unescape_charref
from _request import Request
from _response import response_seek_wrapper
import _rfc3986
import _sockettimeout
debug = logging.getLogger("mechanize").debug
debug_robots = logging.getLogger("mechanize.robots").debug
# monkeypatch urllib2.HTTPError to show URL
## import urllib2
## def urllib2_str(self):
## return 'HTTP Error %s: %s (%s)' % (
## self.code, self.msg, self.geturl())
## urllib2.HTTPError.__str__ = urllib2_str
CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes
DEFAULT_ENCODING = 'latin-1'
# XXX would self.reset() work, instead of raising this exception?
class EndOfHeadError(Exception): pass
class AbstractHeadParser:
# only these elements are allowed in or before HEAD of document
head_elems = ("html", "head",
"title", "base",
"script", "style", "meta", "link", "object")
_entitydefs = htmlentitydefs.name2codepoint
_encoding = DEFAULT_ENCODING
def __init__(self):
self.http_equiv = []
def start_meta(self, attrs):
http_equiv = content = None
for key, value in attrs:
if key == "http-equiv":
http_equiv = self.unescape_attr_if_required(value)
elif key == "content":
content = self.unescape_attr_if_required(value)
if http_equiv is not None and content is not None:
self.http_equiv.append((http_equiv, content))
def end_head(self):
raise EndOfHeadError()
def handle_entityref(self, name):
#debug("%s", name)
self.handle_data(unescape(
'&%s;' % name, self._entitydefs, self._encoding))
def handle_charref(self, name):
#debug("%s", name)
self.handle_data(unescape_charref(name, self._encoding))
def unescape_attr(self, name):
#debug("%s", name)
return unescape(name, self._entitydefs, self._encoding)
def unescape_attrs(self, attrs):
#debug("%s", attrs)
escaped_attrs = {}
for key, val in attrs.items():
escaped_attrs[key] = self.unescape_attr(val)
return escaped_attrs
def unknown_entityref(self, ref):
self.handle_data("&%s;" % ref)
def unknown_charref(self, ref):
self.handle_data("&#%s;" % ref)
class XHTMLCompatibleHeadParser(AbstractHeadParser,
HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
AbstractHeadParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag not in self.head_elems:
raise EndOfHeadError()
try:
method = getattr(self, 'start_' + tag)
except AttributeError:
try:
method = getattr(self, 'do_' + tag)
except AttributeError:
pass # unknown tag
else:
method(attrs)
else:
method(attrs)
def handle_endtag(self, tag):
if tag not in self.head_elems:
raise EndOfHeadError()
try:
method = getattr(self, 'end_' + tag)
except AttributeError:
pass # unknown tag
else:
method()
def unescape(self, name):
# Use the entitydefs passed into constructor, not
# HTMLParser.HTMLParser's entitydefs.
return self.unescape_attr(name)
def unescape_attr_if_required(self, name):
return name # HTMLParser.HTMLParser already did it
class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
def _not_called(self):
assert False
def __init__(self):
sgmllib.SGMLParser.__init__(self)
AbstractHeadParser.__init__(self)
def handle_starttag(self, tag, method, attrs):
if tag not in self.head_elems:
raise EndOfHeadError()
if tag == "meta":
method(attrs)
def unknown_starttag(self, tag, attrs):
self.handle_starttag(tag, self._not_called, attrs)
def handle_endtag(self, tag, method):
if tag in self.head_elems:
method()
else:
raise EndOfHeadError()
def unescape_attr_if_required(self, name):
return self.unescape_attr(name)
def parse_head(fileobj, parser):
"""Return a list of key, value pairs."""
while 1:
data = fileobj.read(CHUNK)
try:
parser.feed(data)
except EndOfHeadError:
break
if len(data) != CHUNK:
# this should only happen if there is no HTML body, or if
# CHUNK is big
break
return parser.http_equiv
class HTTPEquivProcessor(BaseHandler):
"""Append META HTTP-EQUIV headers to regular HTTP headers."""
handler_order = 300 # before handlers that look at HTTP headers
def __init__(self, head_parser_class=HeadParser,
i_want_broken_xhtml_support=False,
):
self.head_parser_class = head_parser_class
self._allow_xhtml = i_want_broken_xhtml_support
def http_response(self, request, response):
if not hasattr(response, "seek"):
response = response_seek_wrapper(response)
http_message = response.info()
url = response.geturl()
ct_hdrs = http_message.getheaders("content-type")
if is_html(ct_hdrs, url, self._allow_xhtml):
try:
try:
html_headers = parse_head(response,
self.head_parser_class())
finally:
response.seek(0)
except (HTMLParser.HTMLParseError,
sgmllib.SGMLParseError):
pass
else:
for hdr, val in html_headers:
# add a header
http_message.dict[hdr.lower()] = val
text = hdr + ": " + val
for line in text.split("\n"):
http_message.headers.append(line + "\n")
return response
https_response = http_response
class MechanizeRobotFileParser(robotparser.RobotFileParser):
def __init__(self, url='', opener=None):
robotparser.RobotFileParser.__init__(self, url)
self._opener = opener
self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT
def set_opener(self, opener=None):
import _opener
if opener is None:
opener = _opener.OpenerDirector()
self._opener = opener
def set_timeout(self, timeout):
self._timeout = timeout
def read(self):
"""Reads the robots.txt URL and feeds it to the parser."""
if self._opener is None:
self.set_opener()
req = Request(self.url, unverifiable=True, visit=False,
timeout=self._timeout)
try:
f = self._opener.open(req)
except HTTPError, f:
pass
except (IOError, socket.error, OSError), exc:
debug_robots("ignoring error opening %r: %s" %
(self.url, exc))
return
lines = []
line = f.readline()
while line:
lines.append(line.strip())
line = f.readline()
status = f.code
if status == 401 or status == 403:
self.disallow_all = True
debug_robots("disallow all")
elif status >= 400:
self.allow_all = True
debug_robots("allow all")
elif status == 200 and lines:
debug_robots("parse lines")
self.parse(lines)
class RobotExclusionError(HTTPError):
def __init__(self, request, *args):
apply(HTTPError.__init__, (self,)+args)
self.request = request
class HTTPRobotRulesProcessor(BaseHandler):
# before redirections, after everything else
handler_order = 800
try:
from httplib import HTTPMessage
except:
from mimetools import Message
http_response_class = Message
else:
http_response_class = HTTPMessage
def __init__(self, rfp_class=MechanizeRobotFileParser):
self.rfp_class = rfp_class
self.rfp = None
self._host = None
def http_request(self, request):
scheme = request.get_type()
if scheme not in ["http", "https"]:
# robots exclusion only applies to HTTP
return request
if request.get_selector() == "/robots.txt":
# /robots.txt is always OK to fetch
return request
host = request.get_host()
# robots.txt requests don't need to be allowed by robots.txt :-)
origin_req = getattr(request, "_origin_req", None)
if (origin_req is not None and
origin_req.get_selector() == "/robots.txt" and
origin_req.get_host() == host
):
return request
if host != self._host:
self.rfp = self.rfp_class()
try:
self.rfp.set_opener(self.parent)
except AttributeError:
debug("%r instance does not support set_opener" %
self.rfp.__class__)
self.rfp.set_url(scheme+"://"+host+"/robots.txt")
self.rfp.set_timeout(request.timeout)
self.rfp.read()
self._host = host
ua = request.get_header("User-agent", "")
if self.rfp.can_fetch(ua, request.get_full_url()):
return request
else:
# XXX This should really have raised URLError. Too late now...
msg = "request disallowed by robots.txt"
raise RobotExclusionError(
request,
request.get_full_url(),
403, msg,
self.http_response_class(StringIO()), StringIO(msg))
https_request = http_request
class HTTPRefererProcessor(BaseHandler):
"""Add Referer header to requests.
This only makes sense if you use each RefererProcessor for a single
chain of requests only (so, for example, if you use a single
HTTPRefererProcessor to fetch a series of URLs extracted from a single
page, this will break).
There's a proper implementation of this in mechanize.Browser.
"""
def __init__(self):
self.referer = None
def http_request(self, request):
if ((self.referer is not None) and
not request.has_header("Referer")):
request.add_unredirected_header("Referer", self.referer)
return request
def http_response(self, request, response):
self.referer = response.geturl()
return response
https_request = http_request
https_response = http_response
def clean_refresh_url(url):
# e.g. Firefox 1.5 does (something like) this
if ((url.startswith('"') and url.endswith('"')) or
(url.startswith("'") and url.endswith("'"))):
url = url[1:-1]
return _rfc3986.clean_url(url, "latin-1") # XXX encoding
def parse_refresh_header(refresh):
"""
>>> parse_refresh_header("1; url=http://example.com/")
(1.0, 'http://example.com/')
>>> parse_refresh_header("1; url='http://example.com/'")
(1.0, 'http://example.com/')
>>> parse_refresh_header("1")
(1.0, None)
>>> parse_refresh_header("blah") # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
ValueError: invalid literal for float(): blah
"""
ii = refresh.find(";")
if ii != -1:
pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
jj = newurl_spec.find("=")
key = None
if jj != -1:
key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
newurl = clean_refresh_url(newurl)
if key is None or key.strip().lower() != "url":
raise ValueError()
else:
pause, newurl = float(refresh), None
return pause, newurl
class HTTPRefreshProcessor(BaseHandler):
"""Perform HTTP Refresh redirections.
Note that if a non-200 HTTP code has occurred (for example, a 30x
redirect), this processor will do nothing.
By default, only zero-time Refresh headers are redirected. Use the
max_time attribute / constructor argument to allow Refresh with longer
pauses. Use the honor_time attribute / constructor argument to control
whether the requested pause is honoured (with a time.sleep()) or
skipped in favour of immediate redirection.
Public attributes:
max_time: see above
honor_time: see above
"""
handler_order = 1000
def __init__(self, max_time=0, honor_time=True):
self.max_time = max_time
self.honor_time = honor_time
self._sleep = time.sleep
def http_response(self, request, response):
code, msg, hdrs = response.code, response.msg, response.info()
if code == 200 and hdrs.has_key("refresh"):
refresh = hdrs.getheaders("refresh")[0]
try:
pause, newurl = parse_refresh_header(refresh)
except ValueError:
debug("bad Refresh header: %r" % refresh)
return response
if newurl is None:
newurl = response.geturl()
if (self.max_time is None) or (pause <= self.max_time):
if pause > 1E-3 and self.honor_time:
self._sleep(pause)
hdrs["location"] = newurl
# hardcoded http is NOT a bug
response = self.parent.error(
"http", request, response,
"refresh", msg, hdrs)
else:
debug("Refresh header ignored: %r" % refresh)
return response
https_response = http_response

View File

@@ -1,185 +0,0 @@
"""Load / save to libwww-perl (LWP) format files.
Actually, the format is slightly extended from that used by LWP's
(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
not recorded by LWP.
It uses the version string "2.0", though really there isn't an LWP Cookies
2.0 format. This indicates that there is extra information in here
(domain_dot and port_spec) while still being compatible with libwww-perl,
I hope.
Copyright 2002-2006 John J Lee <jjl@pobox.com>
Copyright 1997-1999 Gisle Aas (original libwww-perl code)
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import time, re, logging
from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
MISSING_FILENAME_TEXT, LoadError
from _headersutil import join_header_words, split_header_words
from _util import iso2time, time2isoz
debug = logging.getLogger("mechanize").debug
def lwp_cookie_str(cookie):
"""Return string representation of Cookie in an the LWP cookie file format.
Actually, the format is extended a bit -- see module docstring.
"""
h = [(cookie.name, cookie.value),
("path", cookie.path),
("domain", cookie.domain)]
if cookie.port is not None: h.append(("port", cookie.port))
if cookie.path_specified: h.append(("path_spec", None))
if cookie.port_specified: h.append(("port_spec", None))
if cookie.domain_initial_dot: h.append(("domain_dot", None))
if cookie.secure: h.append(("secure", None))
if cookie.expires: h.append(("expires",
time2isoz(float(cookie.expires))))
if cookie.discard: h.append(("discard", None))
if cookie.comment: h.append(("comment", cookie.comment))
if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
if cookie.rfc2109: h.append(("rfc2109", None))
keys = cookie.nonstandard_attr_keys()
keys.sort()
for k in keys:
h.append((k, str(cookie.get_nonstandard_attr(k))))
h.append(("version", str(cookie.version)))
return join_header_words([h])
class LWPCookieJar(FileCookieJar):
"""
The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
"Set-Cookie3" is the format used by the libwww-perl libary, not known
to be compatible with any browser, but which is easy to read and
doesn't lose information about RFC 2965 cookies.
Additional methods
as_lwp_str(ignore_discard=True, ignore_expired=True)
"""
magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
"""Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
ignore_discard and ignore_expires: see docstring for FileCookieJar.save
"""
now = time.time()
r = []
for cookie in self:
if not ignore_discard and cookie.discard:
debug(" Not saving %s: marked for discard", cookie.name)
continue
if not ignore_expires and cookie.is_expired(now):
debug(" Not saving %s: expired", cookie.name)
continue
r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
return "\n".join(r+[""])
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
f = open(filename, "w")
try:
debug("Saving LWP cookies file")
# There really isn't an LWP Cookies 2.0 format, but this indicates
# that there is extra information in here (domain_dot and
# port_spec) while still being compatible with libwww-perl, I hope.
f.write("#LWP-Cookies-2.0\n")
f.write(self.as_lwp_str(ignore_discard, ignore_expires))
finally:
f.close()
def _really_load(self, f, filename, ignore_discard, ignore_expires):
magic = f.readline()
if not re.search(self.magic_re, magic):
msg = "%s does not seem to contain cookies" % filename
raise LoadError(msg)
now = time.time()
header = "Set-Cookie3:"
boolean_attrs = ("port_spec", "path_spec", "domain_dot",
"secure", "discard", "rfc2109")
value_attrs = ("version",
"port", "path", "domain",
"expires",
"comment", "commenturl")
try:
while 1:
line = f.readline()
if line == "": break
if not line.startswith(header):
continue
line = line[len(header):].strip()
for data in split_header_words([line]):
name, value = data[0]
standard = {}
rest = {}
for k in boolean_attrs:
standard[k] = False
for k, v in data[1:]:
if k is not None:
lc = k.lower()
else:
lc = None
# don't lose case distinction for unknown fields
if (lc in value_attrs) or (lc in boolean_attrs):
k = lc
if k in boolean_attrs:
if v is None: v = True
standard[k] = v
elif k in value_attrs:
standard[k] = v
else:
rest[k] = v
h = standard.get
expires = h("expires")
discard = h("discard")
if expires is not None:
expires = iso2time(expires)
if expires is None:
discard = True
domain = h("domain")
domain_specified = domain.startswith(".")
c = Cookie(h("version"), name, value,
h("port"), h("port_spec"),
domain, domain_specified, h("domain_dot"),
h("path"), h("path_spec"),
h("secure"),
expires,
discard,
h("comment"),
h("commenturl"),
rest,
h("rfc2109"),
)
if not ignore_discard and c.discard:
continue
if not ignore_expires and c.is_expired(now):
continue
self.set_cookie(c)
except:
reraise_unmasked_exceptions((IOError,))
raise LoadError("invalid Set-Cookie3 format file %s" % filename)

View File

@@ -1,393 +0,0 @@
# Taken from Python 2.6.4 for use by _sgmllib.py
"""Shared support for scanning document type declarations in HTML and XHTML.
This module is used as a foundation for the HTMLParser and sgmllib
modules (indirectly, for htmllib as well). It has no documented
public API and should not be used directly.
"""
import re
_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
_commentclose = re.compile(r'--\s*>')
_markedsectionclose = re.compile(r']\s*]\s*>')
# An analysis of the MS-Word extensions is available at
# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
_msmarkedsectionclose = re.compile(r']\s*>')
del re
class ParserBase:
"""Parser base class which provides some common support methods used
by the SGML/HTML and XHTML parsers."""
def __init__(self):
if self.__class__ is ParserBase:
raise RuntimeError(
"markupbase.ParserBase must be subclassed")
def error(self, message):
raise NotImplementedError(
"subclasses of ParserBase must override error()")
def reset(self):
self.lineno = 1
self.offset = 0
def getpos(self):
"""Return current line number and offset."""
return self.lineno, self.offset
# Internal -- update line number and offset. This should be
# called for each piece of data exactly once, in order -- in other
# words the concatenation of all the input strings to this
# function should be exactly the entire input.
def updatepos(self, i, j):
if i >= j:
return j
rawdata = self.rawdata
nlines = rawdata.count("\n", i, j)
if nlines:
self.lineno = self.lineno + nlines
pos = rawdata.rindex("\n", i, j) # Should not fail
self.offset = j-(pos+1)
else:
self.offset = self.offset + j-i
return j
_decl_otherchars = ''
# Internal -- parse declaration (for use by subclasses).
def parse_declaration(self, i):
# This is some sort of declaration; in "HTML as
# deployed," this should only be the document type
# declaration ("<!DOCTYPE html...>").
# ISO 8879:1986, however, has more complex
# declaration syntax for elements in <!...>, including:
# --comment--
# [marked section]
# name in the following list: ENTITY, DOCTYPE, ELEMENT,
# ATTLIST, NOTATION, SHORTREF, USEMAP,
# LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
rawdata = self.rawdata
j = i + 2
assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
if rawdata[j:j+1] == ">":
# the empty comment <!>
return j + 1
if rawdata[j:j+1] in ("-", ""):
# Start of comment followed by buffer boundary,
# or just a buffer boundary.
return -1
# A simple, practical version could look like: ((name|stringlit) S*) + '>'
n = len(rawdata)
if rawdata[j:j+2] == '--': #comment
# Locate --.*-- as the body of the comment
return self.parse_comment(i)
elif rawdata[j] == '[': #marked section
# Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
# Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
# Note that this is extended by Microsoft Office "Save as Web" function
# to include [if...] and [endif].
return self.parse_marked_section(i)
else: #all other declaration elements
decltype, j = self._scan_name(j, i)
if j < 0:
return j
if decltype == "doctype":
self._decl_otherchars = ''
while j < n:
c = rawdata[j]
if c == ">":
# end of declaration syntax
data = rawdata[i+2:j]
if decltype == "doctype":
self.handle_decl(data)
else:
self.unknown_decl(data)
return j + 1
if c in "\"'":
m = _declstringlit_match(rawdata, j)
if not m:
return -1 # incomplete
j = m.end()
elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
name, j = self._scan_name(j, i)
elif c in self._decl_otherchars:
j = j + 1
elif c == "[":
# this could be handled in a separate doctype parser
if decltype == "doctype":
j = self._parse_doctype_subset(j + 1, i)
elif decltype in ("attlist", "linktype", "link", "element"):
# must tolerate []'d groups in a content model in an element declaration
# also in data attribute specifications of attlist declaration
# also link type declaration subsets in linktype declarations
# also link attribute specification lists in link declarations
self.error("unsupported '[' char in %s declaration" % decltype)
else:
self.error("unexpected '[' char in declaration")
else:
self.error(
"unexpected %r char in declaration" % rawdata[j])
if j < 0:
return j
return -1 # incomplete
# Internal -- parse a marked section
# Override this to handle MS-word extension syntax <![if word]>content<![endif]>
def parse_marked_section(self, i, report=1):
rawdata= self.rawdata
assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
sectName, j = self._scan_name( i+3, i )
if j < 0:
return j
if sectName in ("temp", "cdata", "ignore", "include", "rcdata"):
# look for standard ]]> ending
match= _markedsectionclose.search(rawdata, i+3)
elif sectName in ("if", "else", "endif"):
# look for MS Office ]> ending
match= _msmarkedsectionclose.search(rawdata, i+3)
else:
self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
if not match:
return -1
if report:
j = match.start(0)
self.unknown_decl(rawdata[i+3: j])
return match.end(0)
# Internal -- parse comment, return length or -1 if not terminated
def parse_comment(self, i, report=1):
rawdata = self.rawdata
if rawdata[i:i+4] != '<!--':
self.error('unexpected call to parse_comment()')
match = _commentclose.search(rawdata, i+4)
if not match:
return -1
if report:
j = match.start(0)
self.handle_comment(rawdata[i+4: j])
return match.end(0)
# Internal -- scan past the internal subset in a <!DOCTYPE declaration,
# returning the index just past any whitespace following the trailing ']'.
def _parse_doctype_subset(self, i, declstartpos):
rawdata = self.rawdata
n = len(rawdata)
j = i
while j < n:
c = rawdata[j]
if c == "<":
s = rawdata[j:j+2]
if s == "<":
# end of buffer; incomplete
return -1
if s != "<!":
self.updatepos(declstartpos, j + 1)
self.error("unexpected char in internal subset (in %r)" % s)
if (j + 2) == n:
# end of buffer; incomplete
return -1
if (j + 4) > n:
# end of buffer; incomplete
return -1
if rawdata[j:j+4] == "<!--":
j = self.parse_comment(j, report=0)
if j < 0:
return j
continue
name, j = self._scan_name(j + 2, declstartpos)
if j == -1:
return -1
if name not in ("attlist", "element", "entity", "notation"):
self.updatepos(declstartpos, j + 2)
self.error(
"unknown declaration %r in internal subset" % name)
# handle the individual names
meth = getattr(self, "_parse_doctype_" + name)
j = meth(j, declstartpos)
if j < 0:
return j
elif c == "%":
# parameter entity reference
if (j + 1) == n:
# end of buffer; incomplete
return -1
s, j = self._scan_name(j + 1, declstartpos)
if j < 0:
return j
if rawdata[j] == ";":
j = j + 1
elif c == "]":
j = j + 1
while j < n and rawdata[j].isspace():
j = j + 1
if j < n:
if rawdata[j] == ">":
return j
self.updatepos(declstartpos, j)
self.error("unexpected char after internal subset")
else:
return -1
elif c.isspace():
j = j + 1
else:
self.updatepos(declstartpos, j)
self.error("unexpected char %r in internal subset" % c)
# end of buffer reached
return -1
# Internal -- scan past <!ELEMENT declarations
def _parse_doctype_element(self, i, declstartpos):
name, j = self._scan_name(i, declstartpos)
if j == -1:
return -1
# style content model; just skip until '>'
rawdata = self.rawdata
if '>' in rawdata[j:]:
return rawdata.find(">", j) + 1
return -1
# Internal -- scan past <!ATTLIST declarations
def _parse_doctype_attlist(self, i, declstartpos):
rawdata = self.rawdata
name, j = self._scan_name(i, declstartpos)
c = rawdata[j:j+1]
if c == "":
return -1
if c == ">":
return j + 1
while 1:
# scan a series of attribute descriptions; simplified:
# name type [value] [#constraint]
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
c = rawdata[j:j+1]
if c == "":
return -1
if c == "(":
# an enumerated type; look for ')'
if ")" in rawdata[j:]:
j = rawdata.find(")", j) + 1
else:
return -1
while rawdata[j:j+1].isspace():
j = j + 1
if not rawdata[j:]:
# end of buffer, incomplete
return -1
else:
name, j = self._scan_name(j, declstartpos)
c = rawdata[j:j+1]
if not c:
return -1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if m:
j = m.end()
else:
return -1
c = rawdata[j:j+1]
if not c:
return -1
if c == "#":
if rawdata[j:] == "#":
# end of buffer
return -1
name, j = self._scan_name(j + 1, declstartpos)
if j < 0:
return j
c = rawdata[j:j+1]
if not c:
return -1
if c == '>':
# all done
return j + 1
# Internal -- scan past <!NOTATION declarations
def _parse_doctype_notation(self, i, declstartpos):
name, j = self._scan_name(i, declstartpos)
if j < 0:
return j
rawdata = self.rawdata
while 1:
c = rawdata[j:j+1]
if not c:
# end of buffer; incomplete
return -1
if c == '>':
return j + 1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if not m:
return -1
j = m.end()
else:
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
# Internal -- scan past <!ENTITY declarations
def _parse_doctype_entity(self, i, declstartpos):
rawdata = self.rawdata
if rawdata[i:i+1] == "%":
j = i + 1
while 1:
c = rawdata[j:j+1]
if not c:
return -1
if c.isspace():
j = j + 1
else:
break
else:
j = i
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
while 1:
c = self.rawdata[j:j+1]
if not c:
return -1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if m:
j = m.end()
else:
return -1 # incomplete
elif c == ">":
return j + 1
else:
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
# Internal -- scan a name token and the new position and the token, or
# return -1 if we've reached the end of the buffer.
def _scan_name(self, i, declstartpos):
rawdata = self.rawdata
n = len(rawdata)
if i == n:
return None, -1
m = _declname_match(rawdata, i)
if m:
s = m.group()
name = s.strip()
if (i + len(s)) == n:
return None, -1 # end of buffer
return name.lower(), m.end()
else:
self.updatepos(declstartpos, i)
self.error("expected name token at %r"
% rawdata[declstartpos:declstartpos+20])
# To be overridden -- handlers for unknown objects
def unknown_decl(self, data):
pass

View File

@@ -1,669 +0,0 @@
"""Stateful programmatic WWW navigation, after Perl's WWW::Mechanize.
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
Copyright 2003 Andy Lester (original Perl code)
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
included with the distribution).
"""
import copy, re, os, urllib, urllib2
from _html import DefaultFactory
import _response
import _request
import _rfc3986
import _sockettimeout
import _urllib2_fork
from _useragent import UserAgentBase
class BrowserStateError(Exception): pass
class LinkNotFoundError(Exception): pass
class FormNotFoundError(Exception): pass
def sanepathname2url(path):
urlpath = urllib.pathname2url(path)
if os.name == "nt" and urlpath.startswith("///"):
urlpath = urlpath[2:]
# XXX don't ask me about the mac...
return urlpath
class History:
"""
Though this will become public, the implied interface is not yet stable.
"""
def __init__(self):
self._history = [] # LIFO
def add(self, request, response):
self._history.append((request, response))
def back(self, n, _response):
response = _response # XXX move Browser._response into this class?
while n > 0 or response is None:
try:
request, response = self._history.pop()
except IndexError:
raise BrowserStateError("already at start of history")
n -= 1
return request, response
def clear(self):
del self._history[:]
def close(self):
for request, response in self._history:
if response is not None:
response.close()
del self._history[:]
class HTTPRefererProcessor(_urllib2_fork.BaseHandler):
def http_request(self, request):
# See RFC 2616 14.36. The only times we know the source of the
# request URI has a URI associated with it are redirect, and
# Browser.click() / Browser.submit() / Browser.follow_link().
# Otherwise, it's the user's job to add any Referer header before
# .open()ing.
if hasattr(request, "redirect_dict"):
request = self.parent._add_referer_header(
request, origin_request=False)
return request
https_request = http_request
class Browser(UserAgentBase):
"""Browser-like class with support for history, forms and links.
BrowserStateError is raised whenever the browser is in the wrong state to
complete the requested operation - e.g., when .back() is called when the
browser history is empty, or when .follow_link() is called when the current
response does not contain HTML data.
Public attributes:
request: current request (mechanize.Request)
form: currently selected form (see .select_form())
"""
handler_classes = copy.copy(UserAgentBase.handler_classes)
handler_classes["_referer"] = HTTPRefererProcessor
default_features = copy.copy(UserAgentBase.default_features)
default_features.append("_referer")
def __init__(self,
factory=None,
history=None,
request_class=None,
):
"""
Only named arguments should be passed to this constructor.
factory: object implementing the mechanize.Factory interface.
history: object implementing the mechanize.History interface. Note
this interface is still experimental and may change in future.
request_class: Request class to use. Defaults to mechanize.Request
The Factory and History objects passed in are 'owned' by the Browser,
so they should not be shared across Browsers. In particular,
factory.set_response() should not be called except by the owning
Browser itself.
Note that the supplied factory's request_class is overridden by this
constructor, to ensure only one Request class is used.
"""
self._handle_referer = True
if history is None:
history = History()
self._history = history
if request_class is None:
request_class = _request.Request
if factory is None:
factory = DefaultFactory()
factory.set_request_class(request_class)
self._factory = factory
self.request_class = request_class
self.request = None
self._set_response(None, False)
# do this last to avoid __getattr__ problems
UserAgentBase.__init__(self)
def close(self):
UserAgentBase.close(self)
if self._response is not None:
self._response.close()
if self._history is not None:
self._history.close()
self._history = None
# make use after .close easy to spot
self.form = None
self.request = self._response = None
self.request = self.response = self.set_response = None
self.geturl = self.reload = self.back = None
self.clear_history = self.set_cookie = self.links = self.forms = None
self.viewing_html = self.encoding = self.title = None
self.select_form = self.click = self.submit = self.click_link = None
self.follow_link = self.find_link = None
def set_handle_referer(self, handle):
"""Set whether to add Referer header to each request."""
self._set_handler("_referer", handle)
self._handle_referer = bool(handle)
def _add_referer_header(self, request, origin_request=True):
if self.request is None:
return request
scheme = request.get_type()
original_scheme = self.request.get_type()
if scheme not in ["http", "https"]:
return request
if not origin_request and not self.request.has_header("Referer"):
return request
if (self._handle_referer and
original_scheme in ["http", "https"] and
not (original_scheme == "https" and scheme != "https")):
# strip URL fragment (RFC 2616 14.36)
parts = _rfc3986.urlsplit(self.request.get_full_url())
parts = parts[:-1]+(None,)
referer = _rfc3986.urlunsplit(parts)
request.add_unredirected_header("Referer", referer)
return request
def open_novisit(self, url, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
"""Open a URL without visiting it.
Browser state (including request, response, history, forms and links)
is left unchanged by calling this function.
The interface is the same as for .open().
This is useful for things like fetching images.
See also .retrieve().
"""
return self._mech_open(url, data, visit=False, timeout=timeout)
def open(self, url, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
return self._mech_open(url, data, timeout=timeout)
def _mech_open(self, url, data=None, update_history=True, visit=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
try:
url.get_full_url
except AttributeError:
# string URL -- convert to absolute URL if required
scheme, authority = _rfc3986.urlsplit(url)[:2]
if scheme is None:
# relative URL
if self._response is None:
raise BrowserStateError(
"can't fetch relative reference: "
"not viewing any document")
url = _rfc3986.urljoin(self._response.geturl(), url)
request = self._request(url, data, visit, timeout)
visit = request.visit
if visit is None:
visit = True
if visit:
self._visit_request(request, update_history)
success = True
try:
response = UserAgentBase.open(self, request, data)
except urllib2.HTTPError, error:
success = False
if error.fp is None: # not a response
raise
response = error
## except (IOError, socket.error, OSError), error:
## # Yes, urllib2 really does raise all these :-((
## # See test_urllib2.py for examples of socket.gaierror and OSError,
## # plus note that FTPHandler raises IOError.
## # XXX I don't seem to have an example of exactly socket.error being
## # raised, only socket.gaierror...
## # I don't want to start fixing these here, though, since this is a
## # subclass of OpenerDirector, and it would break old code. Even in
## # Python core, a fix would need some backwards-compat. hack to be
## # acceptable.
## raise
if visit:
self._set_response(response, False)
response = copy.copy(self._response)
elif response is not None:
response = _response.upgrade_response(response)
if not success:
raise response
return response
def __str__(self):
text = []
text.append("<%s " % self.__class__.__name__)
if self._response:
text.append("visiting %s" % self._response.geturl())
else:
text.append("(not visiting a URL)")
if self.form:
text.append("\n selected form:\n %s\n" % str(self.form))
text.append(">")
return "".join(text)
def response(self):
"""Return a copy of the current response.
The returned object has the same interface as the object returned by
.open() (or mechanize.urlopen()).
"""
return copy.copy(self._response)
def open_local_file(self, filename):
path = sanepathname2url(os.path.abspath(filename))
url = 'file://'+path
return self.open(url)
def set_response(self, response):
"""Replace current response with (a copy of) response.
response may be None.
This is intended mostly for HTML-preprocessing.
"""
self._set_response(response, True)
def _set_response(self, response, close_current):
# sanity check, necessary but far from sufficient
if not (response is None or
(hasattr(response, "info") and hasattr(response, "geturl") and
hasattr(response, "read")
)
):
raise ValueError("not a response object")
self.form = None
if response is not None:
response = _response.upgrade_response(response)
if close_current and self._response is not None:
self._response.close()
self._response = response
self._factory.set_response(response)
def visit_response(self, response, request=None):
"""Visit the response, as if it had been .open()ed.
Unlike .set_response(), this updates history rather than replacing the
current response.
"""
if request is None:
request = _request.Request(response.geturl())
self._visit_request(request, True)
self._set_response(response, False)
def _visit_request(self, request, update_history):
if self._response is not None:
self._response.close()
if self.request is not None and update_history:
self._history.add(self.request, self._response)
self._response = None
# we want self.request to be assigned even if UserAgentBase.open
# fails
self.request = request
def geturl(self):
"""Get URL of current document."""
if self._response is None:
raise BrowserStateError("not viewing any document")
return self._response.geturl()
def reload(self):
"""Reload current document, and return response object."""
if self.request is None:
raise BrowserStateError("no URL has yet been .open()ed")
if self._response is not None:
self._response.close()
return self._mech_open(self.request, update_history=False)
def back(self, n=1):
"""Go back n steps in history, and return response object.
n: go back this number of steps (default 1 step)
"""
if self._response is not None:
self._response.close()
self.request, response = self._history.back(n, self._response)
self.set_response(response)
if not response.read_complete:
return self.reload()
return copy.copy(response)
def clear_history(self):
self._history.clear()
def set_cookie(self, cookie_string):
"""Request to set a cookie.
Note that it is NOT necessary to call this method under ordinary
circumstances: cookie handling is normally entirely automatic. The
intended use case is rather to simulate the setting of a cookie by
client script in a web page (e.g. JavaScript). In that case, use of
this method is necessary because mechanize currently does not support
JavaScript, VBScript, etc.
The cookie is added in the same way as if it had arrived with the
current response, as a result of the current request. This means that,
for example, if it is not appropriate to set the cookie based on the
current request, no cookie will be set.
The cookie will be returned automatically with subsequent responses
made by the Browser instance whenever that's appropriate.
cookie_string should be a valid value of the Set-Cookie header.
For example:
browser.set_cookie(
"sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT")
Currently, this method does not allow for adding RFC 2986 cookies.
This limitation will be lifted if anybody requests it.
"""
if self._response is None:
raise BrowserStateError("not viewing any document")
if self.request.get_type() not in ["http", "https"]:
raise BrowserStateError("can't set cookie for non-HTTP/HTTPS "
"transactions")
cookiejar = self._ua_handlers["_cookies"].cookiejar
response = self.response() # copy
headers = response.info()
headers["Set-cookie"] = cookie_string
cookiejar.extract_cookies(response, self.request)
def links(self, **kwds):
"""Return iterable over links (mechanize.Link objects)."""
if not self.viewing_html():
raise BrowserStateError("not viewing HTML")
links = self._factory.links()
if kwds:
return self._filter_links(links, **kwds)
else:
return links
def forms(self):
"""Return iterable over forms.
The returned form objects implement the mechanize.HTMLForm interface.
"""
if not self.viewing_html():
raise BrowserStateError("not viewing HTML")
return self._factory.forms()
def global_form(self):
"""Return the global form object, or None if the factory implementation
did not supply one.
The "global" form object contains all controls that are not descendants
of any FORM element.
The returned form object implements the mechanize.HTMLForm interface.
This is a separate method since the global form is not regarded as part
of the sequence of forms in the document -- mostly for
backwards-compatibility.
"""
if not self.viewing_html():
raise BrowserStateError("not viewing HTML")
return self._factory.global_form
def viewing_html(self):
"""Return whether the current response contains HTML data."""
if self._response is None:
raise BrowserStateError("not viewing any document")
return self._factory.is_html
def encoding(self):
if self._response is None:
raise BrowserStateError("not viewing any document")
return self._factory.encoding
def title(self):
r"""Return title, or None if there is no title element in the document.
Treatment of any tag children of attempts to follow Firefox and IE
(currently, tags are preserved).
"""
if not self.viewing_html():
raise BrowserStateError("not viewing HTML")
return self._factory.title
def select_form(self, name=None, predicate=None, nr=None):
"""Select an HTML form for input.
This is a bit like giving a form the "input focus" in a browser.
If a form is selected, the Browser object supports the HTMLForm
interface, so you can call methods like .set_value(), .set(), and
.click().
Another way to select a form is to assign to the .form attribute. The
form assigned should be one of the objects returned by the .forms()
method.
At least one of the name, predicate and nr arguments must be supplied.
If no matching form is found, mechanize.FormNotFoundError is raised.
If name is specified, then the form must have the indicated name.
If predicate is specified, then the form must match that function. The
predicate function is passed the HTMLForm as its single argument, and
should return a boolean value indicating whether the form matched.
nr, if supplied, is the sequence number of the form (where 0 is the
first). Note that control 0 is the first form matching all the other
arguments (if supplied); it is not necessarily the first control in the
form. The "global form" (consisting of all form controls not contained
in any FORM element) is considered not to be part of this sequence and
to have no name, so will not be matched unless both name and nr are
None.
"""
if not self.viewing_html():
raise BrowserStateError("not viewing HTML")
if (name is None) and (predicate is None) and (nr is None):
raise ValueError(
"at least one argument must be supplied to specify form")
global_form = self._factory.global_form
if nr is None and name is None and \
predicate is not None and predicate(global_form):
self.form = global_form
return
orig_nr = nr
for form in self.forms():
if name is not None and name != form.name:
continue
if predicate is not None and not predicate(form):
continue
if nr:
nr -= 1
continue
self.form = form
break # success
else:
# failure
description = []
if name is not None: description.append("name '%s'" % name)
if predicate is not None:
description.append("predicate %s" % predicate)
if orig_nr is not None: description.append("nr %d" % orig_nr)
description = ", ".join(description)
raise FormNotFoundError("no form matching "+description)
def click(self, *args, **kwds):
"""See mechanize.HTMLForm.click for documentation."""
if not self.viewing_html():
raise BrowserStateError("not viewing HTML")
request = self.form.click(*args, **kwds)
return self._add_referer_header(request)
def submit(self, *args, **kwds):
"""Submit current form.
Arguments are as for mechanize.HTMLForm.click().
Return value is same as for Browser.open().
"""
return self.open(self.click(*args, **kwds))
def click_link(self, link=None, **kwds):
"""Find a link and return a Request object for it.
Arguments are as for .find_link(), except that a link may be supplied
as the first argument.
"""
if not self.viewing_html():
raise BrowserStateError("not viewing HTML")
if not link:
link = self.find_link(**kwds)
else:
if kwds:
raise ValueError(
"either pass a Link, or keyword arguments, not both")
request = self.request_class(link.absolute_url)
return self._add_referer_header(request)
def follow_link(self, link=None, **kwds):
"""Find a link and .open() it.
Arguments are as for .click_link().
Return value is same as for Browser.open().
"""
return self.open(self.click_link(link, **kwds))
def find_link(self, **kwds):
"""Find a link in current page.
Links are returned as mechanize.Link objects.
# Return third link that .search()-matches the regexp "python"
# (by ".search()-matches", I mean that the regular expression method
# .search() is used, rather than .match()).
find_link(text_regex=re.compile("python"), nr=2)
# Return first http link in the current page that points to somewhere
# on python.org whose link text (after tags have been removed) is
# exactly "monty python".
find_link(text="monty python",
url_regex=re.compile("http.*python.org"))
# Return first link with exactly three HTML attributes.
find_link(predicate=lambda link: len(link.attrs) == 3)
Links include anchors (<a>), image maps (<area>), and frames (<frame>,
<iframe>).
All arguments must be passed by keyword, not position. Zero or more
arguments may be supplied. In order to find a link, all arguments
supplied must match.
If a matching link is not found, mechanize.LinkNotFoundError is raised.
text: link text between link tags: e.g. <a href="blah">this bit</a> (as
returned by pullparser.get_compressed_text(), ie. without tags but
with opening tags "textified" as per the pullparser docs) must compare
equal to this argument, if supplied
text_regex: link text between tag (as defined above) must match the
regular expression object or regular expression string passed as this
argument, if supplied
name, name_regex: as for text and text_regex, but matched against the
name HTML attribute of the link tag
url, url_regex: as for text and text_regex, but matched against the
URL of the link tag (note this matches against Link.url, which is a
relative or absolute URL according to how it was written in the HTML)
tag: element name of opening tag, e.g. "a"
predicate: a function taking a Link object as its single argument,
returning a boolean result, indicating whether the links
nr: matches the nth link that matches all other criteria (default 0)
"""
try:
return self._filter_links(self._factory.links(), **kwds).next()
except StopIteration:
raise LinkNotFoundError()
def __getattr__(self, name):
# pass through _form.HTMLForm methods and attributes
form = self.__dict__.get("form")
if form is None:
raise AttributeError(
"%s instance has no attribute %s (perhaps you forgot to "
".select_form()?)" % (self.__class__, name))
return getattr(form, name)
def _filter_links(self, links,
text=None, text_regex=None,
name=None, name_regex=None,
url=None, url_regex=None,
tag=None,
predicate=None,
nr=0
):
if not self.viewing_html():
raise BrowserStateError("not viewing HTML")
orig_nr = nr
for link in links:
if url is not None and url != link.url:
continue
if url_regex is not None and not re.search(url_regex, link.url):
continue
if (text is not None and
(link.text is None or text != link.text)):
continue
if (text_regex is not None and
(link.text is None or not re.search(text_regex, link.text))):
continue
if name is not None and name != dict(link.attrs).get("name"):
continue
if name_regex is not None:
link_name = dict(link.attrs).get("name")
if link_name is None or not re.search(name_regex, link_name):
continue
if tag is not None and tag != link.tag:
continue
if predicate is not None and not predicate(link):
continue
if nr:
nr -= 1
continue
yield link
nr = orig_nr

View File

@@ -1,161 +0,0 @@
"""Mozilla / Netscape cookie loading / saving.
Copyright 2002-2006 John J Lee <jjl@pobox.com>
Copyright 1997-1999 Gisle Aas (original libwww-perl code)
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import re, time, logging
from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
MISSING_FILENAME_TEXT, LoadError
debug = logging.getLogger("ClientCookie").debug
class MozillaCookieJar(FileCookieJar):
"""
WARNING: you may want to backup your browser's cookies file if you use
this class to save cookies. I *think* it works, but there have been
bugs in the past!
This class differs from CookieJar only in the format it uses to save and
load cookies to and from a file. This class uses the Mozilla/Netscape
`cookies.txt' format. lynx uses this file format, too.
Don't expect cookies saved while the browser is running to be noticed by
the browser (in fact, Mozilla on unix will overwrite your saved cookies if
you change them on disk while it's running; on Windows, you probably can't
save at all while the browser is running).
Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
Netscape cookies on saving.
In particular, the cookie version and port number information is lost,
together with information about whether or not Path, Port and Discard were
specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
domain as set in the HTTP header started with a dot (yes, I'm aware some
domains in Netscape files start with a dot and some don't -- trust me, you
really don't want to know any more about this).
Note that though Mozilla and Netscape use the same format, they use
slightly different headers. The class saves cookies using the Netscape
header by default (Mozilla can cope with that).
"""
magic_re = "#( Netscape)? HTTP Cookie File"
header = """\
# Netscape HTTP Cookie File
# http://www.netscape.com/newsref/std/cookie_spec.html
# This is a generated file! Do not edit.
"""
def _really_load(self, f, filename, ignore_discard, ignore_expires):
now = time.time()
magic = f.readline()
if not re.search(self.magic_re, magic):
f.close()
raise LoadError(
"%s does not look like a Netscape format cookies file" %
filename)
try:
while 1:
line = f.readline()
if line == "": break
# last field may be absent, so keep any trailing tab
if line.endswith("\n"): line = line[:-1]
# skip comments and blank lines XXX what is $ for?
if (line.strip().startswith("#") or
line.strip().startswith("$") or
line.strip() == ""):
continue
domain, domain_specified, path, secure, expires, name, value = \
line.split("\t", 6)
secure = (secure == "TRUE")
domain_specified = (domain_specified == "TRUE")
if name == "":
name = value
value = None
initial_dot = domain.startswith(".")
if domain_specified != initial_dot:
raise LoadError("domain and domain specified flag don't "
"match in %s: %s" % (filename, line))
discard = False
if expires == "":
expires = None
discard = True
# assume path_specified is false
c = Cookie(0, name, value,
None, False,
domain, domain_specified, initial_dot,
path, False,
secure,
expires,
discard,
None,
None,
{})
if not ignore_discard and c.discard:
continue
if not ignore_expires and c.is_expired(now):
continue
self.set_cookie(c)
except:
reraise_unmasked_exceptions((IOError, LoadError))
raise LoadError("invalid Netscape format file %s: %s" %
(filename, line))
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
f = open(filename, "w")
try:
debug("Saving Netscape cookies.txt file")
f.write(self.header)
now = time.time()
for cookie in self:
if not ignore_discard and cookie.discard:
debug(" Not saving %s: marked for discard", cookie.name)
continue
if not ignore_expires and cookie.is_expired(now):
debug(" Not saving %s: expired", cookie.name)
continue
if cookie.secure: secure = "TRUE"
else: secure = "FALSE"
if cookie.domain.startswith("."): initial_dot = "TRUE"
else: initial_dot = "FALSE"
if cookie.expires is not None:
expires = str(cookie.expires)
else:
expires = ""
if cookie.value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas cookielib regards it as a
# cookie with no value.
name = ""
value = cookie.name
else:
name = cookie.name
value = cookie.value
f.write(
"\t".join([cookie.domain, initial_dot, cookie.path,
secure, expires, name, value])+
"\n")
finally:
f.close()

View File

@@ -1,388 +0,0 @@
"""Microsoft Internet Explorer cookie loading on Windows.
Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
# XXX names and comments are not great here
import os, re, time, struct, logging
if os.name == "nt":
import _winreg
from _clientcookie import FileCookieJar, CookieJar, Cookie, \
MISSING_FILENAME_TEXT, LoadError
debug = logging.getLogger("mechanize").debug
def regload(path, leaf):
key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
_winreg.KEY_ALL_ACCESS)
try:
value = _winreg.QueryValueEx(key, leaf)[0]
except WindowsError:
value = None
return value
WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
def epoch_time_offset_from_win32_filetime(filetime):
"""Convert from win32 filetime to seconds-since-epoch value.
MSIE stores create and expire times as Win32 FILETIME, which is 64
bits of 100 nanosecond intervals since Jan 01 1601.
mechanize expects time in 32-bit value expressed in seconds since the
epoch (Jan 01 1970).
"""
if filetime < WIN32_EPOCH:
raise ValueError("filetime (%d) is before epoch (%d)" %
(filetime, WIN32_EPOCH))
return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
def binary_to_char(c): return "%02X" % ord(c)
def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
class MSIEBase:
magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
padding = "\x0d\xf0\xad\x0b"
msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
"(.+\@[\x21-\xFF]+\.txt)")
# path under HKEY_CURRENT_USER from which to get location of index.dat
reg_path = r"software\microsoft\windows" \
r"\currentversion\explorer\shell folders"
reg_key = "Cookies"
def __init__(self):
self._delayload_domains = {}
def _delayload_domain(self, domain):
# if necessary, lazily load cookies for this domain
delayload_info = self._delayload_domains.get(domain)
if delayload_info is not None:
cookie_file, ignore_discard, ignore_expires = delayload_info
try:
self.load_cookie_data(cookie_file,
ignore_discard, ignore_expires)
except (LoadError, IOError):
debug("error reading cookie file, skipping: %s", cookie_file)
else:
del self._delayload_domains[domain]
def _load_cookies_from_file(self, filename):
debug("Loading MSIE cookies file: %s", filename)
cookies = []
cookies_fh = open(filename)
try:
while 1:
key = cookies_fh.readline()
if key == "": break
rl = cookies_fh.readline
def getlong(rl=rl): return long(rl().rstrip())
def getstr(rl=rl): return rl().rstrip()
key = key.rstrip()
value = getstr()
domain_path = getstr()
flags = getlong() # 0x2000 bit is for secure I think
lo_expire = getlong()
hi_expire = getlong()
lo_create = getlong()
hi_create = getlong()
sep = getstr()
if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
hi_create, lo_create, sep) or (sep != "*"):
break
m = self.msie_domain_re.search(domain_path)
if m:
domain = m.group(1)
path = m.group(2)
cookies.append({"KEY": key, "VALUE": value,
"DOMAIN": domain, "PATH": path,
"FLAGS": flags, "HIXP": hi_expire,
"LOXP": lo_expire, "HICREATE": hi_create,
"LOCREATE": lo_create})
finally:
cookies_fh.close()
return cookies
def load_cookie_data(self, filename,
ignore_discard=False, ignore_expires=False):
"""Load cookies from file containing actual cookie data.
Old cookies are kept unless overwritten by newly loaded ones.
You should not call this method if the delayload attribute is set.
I think each of these files contain all cookies for one user, domain,
and path.
filename: file containing cookies -- usually found in a file like
C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
"""
now = int(time.time())
cookie_data = self._load_cookies_from_file(filename)
for cookie in cookie_data:
flags = cookie["FLAGS"]
secure = ((flags & 0x2000) != 0)
filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
expires = epoch_time_offset_from_win32_filetime(filetime)
if expires < now:
discard = True
else:
discard = False
domain = cookie["DOMAIN"]
initial_dot = domain.startswith(".")
if initial_dot:
domain_specified = True
else:
# MSIE 5 does not record whether the domain cookie-attribute
# was specified.
# Assuming it wasn't is conservative, because with strict
# domain matching this will match less frequently; with regular
# Netscape tail-matching, this will match at exactly the same
# times that domain_specified = True would. It also means we
# don't have to prepend a dot to achieve consistency with our
# own & Mozilla's domain-munging scheme.
domain_specified = False
# assume path_specified is false
# XXX is there other stuff in here? -- e.g. comment, commentURL?
c = Cookie(0,
cookie["KEY"], cookie["VALUE"],
None, False,
domain, domain_specified, initial_dot,
cookie["PATH"], False,
secure,
expires,
discard,
None,
None,
{"flags": flags})
if not ignore_discard and c.discard:
continue
if not ignore_expires and c.is_expired(now):
continue
CookieJar.set_cookie(self, c)
def load_from_registry(self, ignore_discard=False, ignore_expires=False,
username=None):
"""
username: only required on win9x
"""
cookies_dir = regload(self.reg_path, self.reg_key)
filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
self.load(filename, ignore_discard, ignore_expires, username)
def _really_load(self, index, filename, ignore_discard, ignore_expires,
username):
now = int(time.time())
if username is None:
username = os.environ['USERNAME'].lower()
cookie_dir = os.path.dirname(filename)
data = index.read(256)
if len(data) != 256:
raise LoadError("%s file is too short" % filename)
# Cookies' index.dat file starts with 32 bytes of signature
# followed by an offset to the first record, stored as a little-
# endian DWORD.
sig, size, data = data[:32], data[32:36], data[36:]
size = struct.unpack("<L", size)[0]
# check that sig is valid
if not self.magic_re.match(sig) or size != 0x4000:
raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
(str(filename), sig, size))
# skip to start of first record
index.seek(size, 0)
sector = 128 # size of sector in bytes
while 1:
data = ""
# Cookies are usually in two contiguous sectors, so read in two
# sectors and adjust if not a Cookie.
to_read = 2 * sector
d = index.read(to_read)
if len(d) != to_read:
break
data = data + d
# Each record starts with a 4-byte signature and a count
# (little-endian DWORD) of sectors for the record.
sig, size, data = data[:4], data[4:8], data[8:]
size = struct.unpack("<L", size)[0]
to_read = (size - 2) * sector
## from urllib import quote
## print "data", quote(data)
## print "sig", quote(sig)
## print "size in sectors", size
## print "size in bytes", size*sector
## print "size in units of 16 bytes", (size*sector) / 16
## print "size to read in bytes", to_read
## print
if sig != "URL ":
assert sig in ("HASH", "LEAK", \
self.padding, "\x00\x00\x00\x00"), \
"unrecognized MSIE index.dat record: %s" % \
binary_to_str(sig)
if sig == "\x00\x00\x00\x00":
# assume we've got all the cookies, and stop
break
if sig == self.padding:
continue
# skip the rest of this record
assert to_read >= 0
if size != 2:
assert to_read != 0
index.seek(to_read, 1)
continue
# read in rest of record if necessary
if size > 2:
more_data = index.read(to_read)
if len(more_data) != to_read: break
data = data + more_data
cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
"(%s\@[\x21-\xFF]+\.txt)" % username)
m = re.search(cookie_re, data, re.I)
if m:
cookie_file = os.path.join(cookie_dir, m.group(2))
if not self.delayload:
try:
self.load_cookie_data(cookie_file,
ignore_discard, ignore_expires)
except (LoadError, IOError):
debug("error reading cookie file, skipping: %s",
cookie_file)
else:
domain = m.group(1)
i = domain.find("/")
if i != -1:
domain = domain[:i]
self._delayload_domains[domain] = (
cookie_file, ignore_discard, ignore_expires)
class MSIECookieJar(MSIEBase, FileCookieJar):
"""FileCookieJar that reads from the Windows MSIE cookies database.
MSIECookieJar can read the cookie files of Microsoft Internet Explorer
(MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
Windows 98. Other configurations may also work, but are untested. Saving
cookies in MSIE format is NOT supported. If you save cookies, they'll be
in the usual Set-Cookie3 format, which you can read back in using an
instance of the plain old CookieJar class. Don't save using the same
filename that you loaded cookies from, because you may succeed in
clobbering your MSIE cookies index file!
You should be able to have LWP share Internet Explorer's cookies like
this (note you need to supply a username to load_from_registry if you're on
Windows 9x or Windows ME):
cj = MSIECookieJar(delayload=1)
# find cookies index file in registry and load cookies from it
cj.load_from_registry()
opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
response = opener.open("http://example.com/")
Iterating over a delayloaded MSIECookieJar instance will not cause any
cookies to be read from disk. To force reading of all cookies from disk,
call read_all_cookies. Note that the following methods iterate over self:
clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
and as_string.
Additional methods:
load_from_registry(ignore_discard=False, ignore_expires=False,
username=None)
load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
read_all_cookies()
"""
def __init__(self, filename=None, delayload=False, policy=None):
MSIEBase.__init__(self)
FileCookieJar.__init__(self, filename, delayload, policy)
def set_cookie(self, cookie):
if self.delayload:
self._delayload_domain(cookie.domain)
CookieJar.set_cookie(self, cookie)
def _cookies_for_request(self, request):
"""Return a list of cookies to be returned to server."""
domains = self._cookies.copy()
domains.update(self._delayload_domains)
domains = domains.keys()
cookies = []
for domain in domains:
cookies.extend(self._cookies_for_domain(domain, request))
return cookies
def _cookies_for_domain(self, domain, request):
if not self._policy.domain_return_ok(domain, request):
return []
debug("Checking %s for cookies to return", domain)
if self.delayload:
self._delayload_domain(domain)
return CookieJar._cookies_for_domain(self, domain, request)
def read_all_cookies(self):
"""Eagerly read in all cookies."""
if self.delayload:
for domain in self._delayload_domains.keys():
self._delayload_domain(domain)
def load(self, filename, ignore_discard=False, ignore_expires=False,
username=None):
"""Load cookies from an MSIE 'index.dat' cookies index file.
filename: full path to cookie index file
username: only required on win9x
"""
if filename is None:
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
index = open(filename, "rb")
try:
self._really_load(index, filename, ignore_discard, ignore_expires,
username)
finally:
index.close()

View File

@@ -1,442 +0,0 @@
"""URL opener.
Copyright 2004-2006 John J Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import os, urllib2, bisect, httplib, types, tempfile
try:
import threading as _threading
except ImportError:
import dummy_threading as _threading
try:
set
except NameError:
import sets
set = sets.Set
from _request import Request
import _response
import _rfc3986
import _sockettimeout
import _urllib2_fork
from _util import isstringlike
open_file = open
class ContentTooShortError(urllib2.URLError):
def __init__(self, reason, result):
urllib2.URLError.__init__(self, reason)
self.result = result
def set_request_attr(req, name, value, default):
try:
getattr(req, name)
except AttributeError:
setattr(req, name, default)
if value is not default:
setattr(req, name, value)
class OpenerDirector(_urllib2_fork.OpenerDirector):
def __init__(self):
_urllib2_fork.OpenerDirector.__init__(self)
# really none of these are (sanely) public -- the lack of initial
# underscore on some is just due to following urllib2
self.process_response = {}
self.process_request = {}
self._any_request = {}
self._any_response = {}
self._handler_index_valid = True
self._tempfiles = []
def add_handler(self, handler):
if not hasattr(handler, "add_parent"):
raise TypeError("expected BaseHandler instance, got %r" %
type(handler))
if handler in self.handlers:
return
# XXX why does self.handlers need to be sorted?
bisect.insort(self.handlers, handler)
handler.add_parent(self)
self._handler_index_valid = False
def _maybe_reindex_handlers(self):
if self._handler_index_valid:
return
handle_error = {}
handle_open = {}
process_request = {}
process_response = {}
any_request = set()
any_response = set()
unwanted = []
for handler in self.handlers:
added = False
for meth in dir(handler):
if meth in ["redirect_request", "do_open", "proxy_open"]:
# oops, coincidental match
continue
if meth == "any_request":
any_request.add(handler)
added = True
continue
elif meth == "any_response":
any_response.add(handler)
added = True
continue
ii = meth.find("_")
scheme = meth[:ii]
condition = meth[ii+1:]
if condition.startswith("error"):
jj = meth[ii+1:].find("_") + ii + 1
kind = meth[jj+1:]
try:
kind = int(kind)
except ValueError:
pass
lookup = handle_error.setdefault(scheme, {})
elif condition == "open":
kind = scheme
lookup = handle_open
elif condition == "request":
kind = scheme
lookup = process_request
elif condition == "response":
kind = scheme
lookup = process_response
else:
continue
lookup.setdefault(kind, set()).add(handler)
added = True
if not added:
unwanted.append(handler)
for handler in unwanted:
self.handlers.remove(handler)
# sort indexed methods
# XXX could be cleaned up
for lookup in [process_request, process_response]:
for scheme, handlers in lookup.iteritems():
lookup[scheme] = handlers
for scheme, lookup in handle_error.iteritems():
for code, handlers in lookup.iteritems():
handlers = list(handlers)
handlers.sort()
lookup[code] = handlers
for scheme, handlers in handle_open.iteritems():
handlers = list(handlers)
handlers.sort()
handle_open[scheme] = handlers
# cache the indexes
self.handle_error = handle_error
self.handle_open = handle_open
self.process_request = process_request
self.process_response = process_response
self._any_request = any_request
self._any_response = any_response
def _request(self, url_or_req, data, visit,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
if isstringlike(url_or_req):
req = Request(url_or_req, data, visit=visit, timeout=timeout)
else:
# already a mechanize.Request instance
req = url_or_req
if data is not None:
req.add_data(data)
# XXX yuck
set_request_attr(req, "visit", visit, None)
set_request_attr(req, "timeout", timeout,
_sockettimeout._GLOBAL_DEFAULT_TIMEOUT)
return req
def open(self, fullurl, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
req = self._request(fullurl, data, None, timeout)
req_scheme = req.get_type()
self._maybe_reindex_handlers()
# pre-process request
# XXX should we allow a Processor to change the URL scheme
# of the request?
request_processors = set(self.process_request.get(req_scheme, []))
request_processors.update(self._any_request)
request_processors = list(request_processors)
request_processors.sort()
for processor in request_processors:
for meth_name in ["any_request", req_scheme+"_request"]:
meth = getattr(processor, meth_name, None)
if meth:
req = meth(req)
# In Python >= 2.4, .open() supports processors already, so we must
# call ._open() instead.
urlopen = _urllib2_fork.OpenerDirector._open
response = urlopen(self, req, data)
# post-process response
response_processors = set(self.process_response.get(req_scheme, []))
response_processors.update(self._any_response)
response_processors = list(response_processors)
response_processors.sort()
for processor in response_processors:
for meth_name in ["any_response", req_scheme+"_response"]:
meth = getattr(processor, meth_name, None)
if meth:
response = meth(req, response)
return response
def error(self, proto, *args):
if proto in ['http', 'https']:
# XXX http[s] protocols are special-cased
dict = self.handle_error['http'] # https is not different than http
proto = args[2] # YUCK!
meth_name = 'http_error_%s' % proto
http_err = 1
orig_args = args
else:
dict = self.handle_error
meth_name = proto + '_error'
http_err = 0
args = (dict, proto, meth_name) + args
result = apply(self._call_chain, args)
if result:
return result
if http_err:
args = (dict, 'default', 'http_error_default') + orig_args
return apply(self._call_chain, args)
BLOCK_SIZE = 1024*8
def retrieve(self, fullurl, filename=None, reporthook=None, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT,
open=open_file):
"""Returns (filename, headers).
For remote objects, the default filename will refer to a temporary
file. Temporary files are removed when the OpenerDirector.close()
method is called.
For file: URLs, at present the returned filename is None. This may
change in future.
If the actual number of bytes read is less than indicated by the
Content-Length header, raises ContentTooShortError (a URLError
subclass). The exception's .result attribute contains the (filename,
headers) that would have been returned.
"""
req = self._request(fullurl, data, False, timeout)
scheme = req.get_type()
fp = self.open(req)
try:
headers = fp.info()
if filename is None and scheme == 'file':
# XXX req.get_selector() seems broken here, return None,
# pending sanity :-/
return None, headers
#return urllib.url2pathname(req.get_selector()), headers
if filename:
tfp = open(filename, 'wb')
else:
path = _rfc3986.urlsplit(req.get_full_url())[2]
suffix = os.path.splitext(path)[1]
fd, filename = tempfile.mkstemp(suffix)
self._tempfiles.append(filename)
tfp = os.fdopen(fd, 'wb')
try:
result = filename, headers
bs = self.BLOCK_SIZE
size = -1
read = 0
blocknum = 0
if reporthook:
if "content-length" in headers:
size = int(headers["Content-Length"])
reporthook(blocknum, bs, size)
while 1:
block = fp.read(bs)
if block == "":
break
read += len(block)
tfp.write(block)
blocknum += 1
if reporthook:
reporthook(blocknum, bs, size)
finally:
tfp.close()
finally:
fp.close()
# raise exception if actual size does not match content-length header
if size >= 0 and read < size:
raise ContentTooShortError(
"retrieval incomplete: "
"got only %i out of %i bytes" % (read, size),
result
)
return result
def close(self):
_urllib2_fork.OpenerDirector.close(self)
# make it very obvious this object is no longer supposed to be used
self.open = self.error = self.retrieve = self.add_handler = None
if self._tempfiles:
for filename in self._tempfiles:
try:
os.unlink(filename)
except OSError:
pass
del self._tempfiles[:]
def wrapped_open(urlopen, process_response_object, fullurl, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
success = True
try:
response = urlopen(fullurl, data, timeout)
except urllib2.HTTPError, error:
success = False
if error.fp is None: # not a response
raise
response = error
if response is not None:
response = process_response_object(response)
if not success:
raise response
return response
class ResponseProcessingOpener(OpenerDirector):
def open(self, fullurl, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
def bound_open(fullurl, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
return OpenerDirector.open(self, fullurl, data, timeout)
return wrapped_open(
bound_open, self.process_response_object, fullurl, data, timeout)
def process_response_object(self, response):
return response
class SeekableResponseOpener(ResponseProcessingOpener):
def process_response_object(self, response):
return _response.seek_wrapped_response(response)
def isclass(obj):
return isinstance(obj, (types.ClassType, type))
class OpenerFactory:
"""This class's interface is quite likely to change."""
default_classes = [
# handlers
_urllib2_fork.ProxyHandler,
_urllib2_fork.UnknownHandler,
_urllib2_fork.HTTPHandler,
_urllib2_fork.HTTPDefaultErrorHandler,
_urllib2_fork.HTTPRedirectHandler,
_urllib2_fork.FTPHandler,
_urllib2_fork.FileHandler,
# processors
_urllib2_fork.HTTPCookieProcessor,
_urllib2_fork.HTTPErrorProcessor,
]
if hasattr(httplib, 'HTTPS'):
default_classes.append(_urllib2_fork.HTTPSHandler)
handlers = []
replacement_handlers = []
def __init__(self, klass=OpenerDirector):
self.klass = klass
def build_opener(self, *handlers):
"""Create an opener object from a list of handlers and processors.
The opener will use several default handlers and processors, including
support for HTTP and FTP.
If any of the handlers passed as arguments are subclasses of the
default handlers, the default handlers will not be used.
"""
opener = self.klass()
default_classes = list(self.default_classes)
skip = set()
for klass in default_classes:
for check in handlers:
if isclass(check):
if issubclass(check, klass):
skip.add(klass)
elif isinstance(check, klass):
skip.add(klass)
for klass in skip:
default_classes.remove(klass)
for klass in default_classes:
opener.add_handler(klass())
for h in handlers:
if isclass(h):
h = h()
opener.add_handler(h)
return opener
build_opener = OpenerFactory().build_opener
_opener = None
urlopen_lock = _threading.Lock()
def urlopen(url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
global _opener
if _opener is None:
urlopen_lock.acquire()
try:
if _opener is None:
_opener = build_opener()
finally:
urlopen_lock.release()
return _opener.open(url, data, timeout)
def urlretrieve(url, filename=None, reporthook=None, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
global _opener
if _opener is None:
urlopen_lock.acquire()
try:
if _opener is None:
_opener = build_opener()
finally:
urlopen_lock.release()
return _opener.retrieve(url, filename, reporthook, data, timeout)
def install_opener(opener):
global _opener
_opener = opener

View File

@@ -1,391 +0,0 @@
"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser.
Examples
This program extracts all links from a document. It will print one
line for each link, containing the URL and the textual description
between the <A>...</A> tags:
import pullparser, sys
f = file(sys.argv[1])
p = pullparser.PullParser(f)
for token in p.tags("a"):
if token.type == "endtag": continue
url = dict(token.attrs).get("href", "-")
text = p.get_compressed_text(endat=("endtag", "a"))
print "%s\t%s" % (url, text)
This program extracts the <TITLE> from the document:
import pullparser, sys
f = file(sys.argv[1])
p = pullparser.PullParser(f)
if p.get_tag("title"):
title = p.get_compressed_text()
print "Title: %s" % title
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
Copyright 1998-2001 Gisle Aas (original libwww-perl code)
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses.
"""
import re, htmlentitydefs
import _sgmllib_copy as sgmllib
import HTMLParser
from xml.sax import saxutils
from _html import unescape, unescape_charref
class NoMoreTokensError(Exception): pass
class Token:
"""Represents an HTML tag, declaration, processing instruction etc.
Behaves as both a tuple-like object (ie. iterable) and has attributes
.type, .data and .attrs.
>>> t = Token("starttag", "a", [("href", "http://www.python.org/")])
>>> t == ("starttag", "a", [("href", "http://www.python.org/")])
True
>>> (t.type, t.data) == ("starttag", "a")
True
>>> t.attrs == [("href", "http://www.python.org/")]
True
Public attributes
type: one of "starttag", "endtag", "startendtag", "charref", "entityref",
"data", "comment", "decl", "pi", after the corresponding methods of
HTMLParser.HTMLParser
data: For a tag, the tag name; otherwise, the relevant data carried by the
tag, as a string
attrs: list of (name, value) pairs representing HTML attributes
(or None if token does not represent an opening tag)
"""
def __init__(self, type, data, attrs=None):
self.type = type
self.data = data
self.attrs = attrs
def __iter__(self):
return iter((self.type, self.data, self.attrs))
def __eq__(self, other):
type, data, attrs = other
if (self.type == type and
self.data == data and
self.attrs == attrs):
return True
else:
return False
def __ne__(self, other): return not self.__eq__(other)
def __repr__(self):
args = ", ".join(map(repr, [self.type, self.data, self.attrs]))
return self.__class__.__name__+"(%s)" % args
def __str__(self):
"""
>>> print Token("starttag", "br")
<br>
>>> print Token("starttag", "a",
... [("href", "http://www.python.org/"), ("alt", '"foo"')])
<a href="http://www.python.org/" alt='"foo"'>
>>> print Token("startendtag", "br")
<br />
>>> print Token("startendtag", "br", [("spam", "eggs")])
<br spam="eggs" />
>>> print Token("endtag", "p")
</p>
>>> print Token("charref", "38")
&#38;
>>> print Token("entityref", "amp")
&amp;
>>> print Token("data", "foo\\nbar")
foo
bar
>>> print Token("comment", "Life is a bowl\\nof cherries.")
<!--Life is a bowl
of cherries.-->
>>> print Token("decl", "decl")
<!decl>
>>> print Token("pi", "pi")
<?pi>
"""
if self.attrs is not None:
attrs = "".join([" %s=%s" % (k, saxutils.quoteattr(v)) for
k, v in self.attrs])
else:
attrs = ""
if self.type == "starttag":
return "<%s%s>" % (self.data, attrs)
elif self.type == "startendtag":
return "<%s%s />" % (self.data, attrs)
elif self.type == "endtag":
return "</%s>" % self.data
elif self.type == "charref":
return "&#%s;" % self.data
elif self.type == "entityref":
return "&%s;" % self.data
elif self.type == "data":
return self.data
elif self.type == "comment":
return "<!--%s-->" % self.data
elif self.type == "decl":
return "<!%s>" % self.data
elif self.type == "pi":
return "<?%s>" % self.data
assert False
def iter_until_exception(fn, exception, *args, **kwds):
while 1:
try:
yield fn(*args, **kwds)
except exception:
raise StopIteration
class _AbstractParser:
chunk = 1024
compress_re = re.compile(r"\s+")
def __init__(self, fh, textify={"img": "alt", "applet": "alt"},
encoding="ascii", entitydefs=None):
"""
fh: file-like object (only a .read() method is required) from which to
read HTML to be parsed
textify: mapping used by .get_text() and .get_compressed_text() methods
to represent opening tags as text
encoding: encoding used to encode numeric character references by
.get_text() and .get_compressed_text() ("ascii" by default)
entitydefs: mapping like {"amp": "&", ...} containing HTML entity
definitions (a sensible default is used). This is used to unescape
entities in .get_text() (and .get_compressed_text()) and attribute
values. If the encoding can not represent the character, the entity
reference is left unescaped. Note that entity references (both
numeric - e.g. &#123; or &#xabc; - and non-numeric - e.g. &amp;) are
unescaped in attribute values and the return value of .get_text(), but
not in data outside of tags. Instead, entity references outside of
tags are represented as tokens. This is a bit odd, it's true :-/
If the element name of an opening tag matches a key in the textify
mapping then that tag is converted to text. The corresponding value is
used to specify which tag attribute to obtain the text from. textify
maps from element names to either:
- an HTML attribute name, in which case the HTML attribute value is
used as its text value along with the element name in square
brackets (e.g. "alt text goes here[IMG]", or, if the alt attribute
were missing, just "[IMG]")
- a callable object (e.g. a function) which takes a Token and returns
the string to be used as its text value
If textify has no key for an element name, nothing is substituted for
the opening tag.
Public attributes:
encoding and textify: see above
"""
self._fh = fh
self._tokenstack = [] # FIFO
self.textify = textify
self.encoding = encoding
if entitydefs is None:
entitydefs = htmlentitydefs.name2codepoint
self._entitydefs = entitydefs
def __iter__(self): return self
def tags(self, *names):
return iter_until_exception(self.get_tag, NoMoreTokensError, *names)
def tokens(self, *tokentypes):
return iter_until_exception(self.get_token, NoMoreTokensError,
*tokentypes)
def next(self):
try:
return self.get_token()
except NoMoreTokensError:
raise StopIteration()
def get_token(self, *tokentypes):
"""Pop the next Token object from the stack of parsed tokens.
If arguments are given, they are taken to be token types in which the
caller is interested: tokens representing other elements will be
skipped. Element names must be given in lower case.
Raises NoMoreTokensError.
"""
while 1:
while self._tokenstack:
token = self._tokenstack.pop(0)
if tokentypes:
if token.type in tokentypes:
return token
else:
return token
data = self._fh.read(self.chunk)
if not data:
raise NoMoreTokensError()
self.feed(data)
def unget_token(self, token):
"""Push a Token back onto the stack."""
self._tokenstack.insert(0, token)
def get_tag(self, *names):
"""Return the next Token that represents an opening or closing tag.
If arguments are given, they are taken to be element names in which the
caller is interested: tags representing other elements will be skipped.
Element names must be given in lower case.
Raises NoMoreTokensError.
"""
while 1:
tok = self.get_token()
if tok.type not in ["starttag", "endtag", "startendtag"]:
continue
if names:
if tok.data in names:
return tok
else:
return tok
def get_text(self, endat=None):
"""Get some text.
endat: stop reading text at this tag (the tag is included in the
returned text); endtag is a tuple (type, name) where type is
"starttag", "endtag" or "startendtag", and name is the element name of
the tag (element names must be given in lower case)
If endat is not given, .get_text() will stop at the next opening or
closing tag, or when there are no more tokens (no exception is raised).
Note that .get_text() includes the text representation (if any) of the
opening tag, but pushes the opening tag back onto the stack. As a
result, if you want to call .get_text() again, you need to call
.get_tag() first (unless you want an empty string returned when you
next call .get_text()).
Entity references are translated using the value of the entitydefs
constructor argument (a mapping from names to characters like that
provided by the standard module htmlentitydefs). Named entity
references that are not in this mapping are left unchanged.
The textify attribute is used to translate opening tags into text: see
the class docstring.
"""
text = []
tok = None
while 1:
try:
tok = self.get_token()
except NoMoreTokensError:
# unget last token (not the one we just failed to get)
if tok: self.unget_token(tok)
break
if tok.type == "data":
text.append(tok.data)
elif tok.type == "entityref":
t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
text.append(t)
elif tok.type == "charref":
t = unescape_charref(tok.data, self.encoding)
text.append(t)
elif tok.type in ["starttag", "endtag", "startendtag"]:
tag_name = tok.data
if tok.type in ["starttag", "startendtag"]:
alt = self.textify.get(tag_name)
if alt is not None:
if callable(alt):
text.append(alt(tok))
elif tok.attrs is not None:
for k, v in tok.attrs:
if k == alt:
text.append(v)
text.append("[%s]" % tag_name.upper())
if endat is None or endat == (tok.type, tag_name):
self.unget_token(tok)
break
return "".join(text)
def get_compressed_text(self, *args, **kwds):
"""
As .get_text(), but collapses each group of contiguous whitespace to a
single space character, and removes all initial and trailing
whitespace.
"""
text = self.get_text(*args, **kwds)
text = text.strip()
return self.compress_re.sub(" ", text)
def handle_startendtag(self, tag, attrs):
self._tokenstack.append(Token("startendtag", tag, attrs))
def handle_starttag(self, tag, attrs):
self._tokenstack.append(Token("starttag", tag, attrs))
def handle_endtag(self, tag):
self._tokenstack.append(Token("endtag", tag))
def handle_charref(self, name):
self._tokenstack.append(Token("charref", name))
def handle_entityref(self, name):
self._tokenstack.append(Token("entityref", name))
def handle_data(self, data):
self._tokenstack.append(Token("data", data))
def handle_comment(self, data):
self._tokenstack.append(Token("comment", data))
def handle_decl(self, decl):
self._tokenstack.append(Token("decl", decl))
def unknown_decl(self, data):
# XXX should this call self.error instead?
#self.error("unknown declaration: " + `data`)
self._tokenstack.append(Token("decl", data))
def handle_pi(self, data):
self._tokenstack.append(Token("pi", data))
def unescape_attr(self, name):
return unescape(name, self._entitydefs, self.encoding)
def unescape_attrs(self, attrs):
escaped_attrs = []
for key, val in attrs:
escaped_attrs.append((key, self.unescape_attr(val)))
return escaped_attrs
class PullParser(_AbstractParser, HTMLParser.HTMLParser):
def __init__(self, *args, **kwds):
HTMLParser.HTMLParser.__init__(self)
_AbstractParser.__init__(self, *args, **kwds)
def unescape(self, name):
# Use the entitydefs passed into constructor, not
# HTMLParser.HTMLParser's entitydefs.
return self.unescape_attr(name)
class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
def __init__(self, *args, **kwds):
sgmllib.SGMLParser.__init__(self)
_AbstractParser.__init__(self, *args, **kwds)
def unknown_starttag(self, tag, attrs):
attrs = self.unescape_attrs(attrs)
self._tokenstack.append(Token("starttag", tag, attrs))
def unknown_endtag(self, tag):
self._tokenstack.append(Token("endtag", tag))
def _test():
import doctest, _pullparser
return doctest.testmod(_pullparser)
if __name__ == "__main__":
_test()

View File

@@ -1,40 +0,0 @@
"""Integration with Python standard library module urllib2: Request class.
Copyright 2004-2006 John J Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import logging
import _rfc3986
import _sockettimeout
import _urllib2_fork
warn = logging.getLogger("mechanize").warning
class Request(_urllib2_fork.Request):
def __init__(self, url, data=None, headers={},
origin_req_host=None, unverifiable=False, visit=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
# In mechanize 0.2, the interpretation of a unicode url argument will
# change: A unicode url argument will be interpreted as an IRI, and a
# bytestring as a URI. For now, we accept unicode or bytestring. We
# don't insist that the value is always a URI (specifically, must only
# contain characters which are legal), because that might break working
# code (who knows what bytes some servers want to see, especially with
# browser plugins for internationalised URIs).
if not _rfc3986.is_clean_uri(url):
warn("url argument is not a URI "
"(contains illegal characters) %r" % url)
_urllib2_fork.Request.__init__(self, url, data, headers)
self.selector = None
self.visit = visit
self.timeout = timeout
def __str__(self):
return "<Request for %s>" % self.get_full_url()

View File

@@ -1,525 +0,0 @@
"""Response classes.
The seek_wrapper code is not used if you're using UserAgent with
.set_seekable_responses(False), or if you're using the urllib2-level interface
HTTPEquivProcessor. Class closeable_response is instantiated by some handlers
(AbstractHTTPHandler), but the closeable_response interface is only depended
upon by Browser-level code. Function upgrade_response is only used if you're
using Browser.
Copyright 2006 John J. Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
included with the distribution).
"""
import copy, mimetools, urllib2
from cStringIO import StringIO
def len_of_seekable(file_):
# this function exists because evaluation of len(file_.getvalue()) on every
# .read() from seek_wrapper would be O(N**2) in number of .read()s
pos = file_.tell()
file_.seek(0, 2) # to end
try:
return file_.tell()
finally:
file_.seek(pos)
# XXX Andrew Dalke kindly sent me a similar class in response to my request on
# comp.lang.python, which I then proceeded to lose. I wrote this class
# instead, but I think he's released his code publicly since, could pinch the
# tests from it, at least...
# For testing seek_wrapper invariant (note that
# test_urllib2.HandlerTest.test_seekable is expected to fail when this
# invariant checking is turned on). The invariant checking is done by module
# ipdc, which is available here:
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
## from ipdbc import ContractBase
## class seek_wrapper(ContractBase):
class seek_wrapper:
"""Adds a seek method to a file object.
This is only designed for seeking on readonly file-like objects.
Wrapped file-like object must have a read method. The readline method is
only supported if that method is present on the wrapped object. The
readlines method is always supported. xreadlines and iteration are
supported only for Python 2.2 and above.
Public attributes:
wrapped: the wrapped file object
is_closed: true iff .close() has been called
WARNING: All other attributes of the wrapped object (ie. those that are not
one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
are passed through unaltered, which may or may not make sense for your
particular file object.
"""
# General strategy is to check that cache is full enough, then delegate to
# the cache (self.__cache, which is a cStringIO.StringIO instance). A seek
# position (self.__pos) is maintained independently of the cache, in order
# that a single cache may be shared between multiple seek_wrapper objects.
# Copying using module copy shares the cache in this way.
def __init__(self, wrapped):
self.wrapped = wrapped
self.__read_complete_state = [False]
self.__is_closed_state = [False]
self.__have_readline = hasattr(self.wrapped, "readline")
self.__cache = StringIO()
self.__pos = 0 # seek position
def invariant(self):
# The end of the cache is always at the same place as the end of the
# wrapped file (though the .tell() method is not required to be present
# on wrapped file).
return self.wrapped.tell() == len(self.__cache.getvalue())
def close(self):
self.wrapped.close()
self.is_closed = True
def __getattr__(self, name):
if name == "is_closed":
return self.__is_closed_state[0]
elif name == "read_complete":
return self.__read_complete_state[0]
wrapped = self.__dict__.get("wrapped")
if wrapped:
return getattr(wrapped, name)
return getattr(self.__class__, name)
def __setattr__(self, name, value):
if name == "is_closed":
self.__is_closed_state[0] = bool(value)
elif name == "read_complete":
if not self.is_closed:
self.__read_complete_state[0] = bool(value)
else:
self.__dict__[name] = value
def seek(self, offset, whence=0):
assert whence in [0,1,2]
# how much data, if any, do we need to read?
if whence == 2: # 2: relative to end of *wrapped* file
if offset < 0: raise ValueError("negative seek offset")
# since we don't know yet where the end of that file is, we must
# read everything
to_read = None
else:
if whence == 0: # 0: absolute
if offset < 0: raise ValueError("negative seek offset")
dest = offset
else: # 1: relative to current position
pos = self.__pos
if pos < offset:
raise ValueError("seek to before start of file")
dest = pos + offset
end = len_of_seekable(self.__cache)
to_read = dest - end
if to_read < 0:
to_read = 0
if to_read != 0:
self.__cache.seek(0, 2)
if to_read is None:
assert whence == 2
self.__cache.write(self.wrapped.read())
self.read_complete = True
self.__pos = self.__cache.tell() - offset
else:
data = self.wrapped.read(to_read)
if not data:
self.read_complete = True
else:
self.__cache.write(data)
# Don't raise an exception even if we've seek()ed past the end
# of .wrapped, since fseek() doesn't complain in that case.
# Also like fseek(), pretend we have seek()ed past the end,
# i.e. not:
#self.__pos = self.__cache.tell()
# but rather:
self.__pos = dest
else:
self.__pos = dest
def tell(self):
return self.__pos
def __copy__(self):
cpy = self.__class__(self.wrapped)
cpy.__cache = self.__cache
cpy.__read_complete_state = self.__read_complete_state
cpy.__is_closed_state = self.__is_closed_state
return cpy
def get_data(self):
pos = self.__pos
try:
self.seek(0)
return self.read(-1)
finally:
self.__pos = pos
def read(self, size=-1):
pos = self.__pos
end = len_of_seekable(self.__cache)
available = end - pos
# enough data already cached?
if size <= available and size != -1:
self.__cache.seek(pos)
self.__pos = pos+size
return self.__cache.read(size)
# no, so read sufficient data from wrapped file and cache it
self.__cache.seek(0, 2)
if size == -1:
self.__cache.write(self.wrapped.read())
self.read_complete = True
else:
to_read = size - available
assert to_read > 0
data = self.wrapped.read(to_read)
if not data:
self.read_complete = True
else:
self.__cache.write(data)
self.__cache.seek(pos)
data = self.__cache.read(size)
self.__pos = self.__cache.tell()
assert self.__pos == pos + len(data)
return data
def readline(self, size=-1):
if not self.__have_readline:
raise NotImplementedError("no readline method on wrapped object")
# line we're about to read might not be complete in the cache, so
# read another line first
pos = self.__pos
self.__cache.seek(0, 2)
data = self.wrapped.readline()
if not data:
self.read_complete = True
else:
self.__cache.write(data)
self.__cache.seek(pos)
data = self.__cache.readline()
if size != -1:
r = data[:size]
self.__pos = pos+size
else:
r = data
self.__pos = pos+len(data)
return r
def readlines(self, sizehint=-1):
pos = self.__pos
self.__cache.seek(0, 2)
self.__cache.write(self.wrapped.read())
self.read_complete = True
self.__cache.seek(pos)
data = self.__cache.readlines(sizehint)
self.__pos = self.__cache.tell()
return data
def __iter__(self): return self
def next(self):
line = self.readline()
if line == "": raise StopIteration
return line
xreadlines = __iter__
def __repr__(self):
return ("<%s at %s whose wrapped object = %r>" %
(self.__class__.__name__, hex(abs(id(self))), self.wrapped))
class response_seek_wrapper(seek_wrapper):
"""
Supports copying response objects and setting response body data.
"""
def __init__(self, wrapped):
seek_wrapper.__init__(self, wrapped)
self._headers = self.wrapped.info()
def __copy__(self):
cpy = seek_wrapper.__copy__(self)
# copy headers from delegate
cpy._headers = copy.copy(self.info())
return cpy
# Note that .info() and .geturl() (the only two urllib2 response methods
# that are not implemented by seek_wrapper) must be here explicitly rather
# than by seek_wrapper's __getattr__ delegation) so that the nasty
# dynamically-created HTTPError classes in get_seek_wrapper_class() get the
# wrapped object's implementation, and not HTTPError's.
def info(self):
return self._headers
def geturl(self):
return self.wrapped.geturl()
def set_data(self, data):
self.seek(0)
self.read()
self.close()
cache = self._seek_wrapper__cache = StringIO()
cache.write(data)
self.seek(0)
class eoffile:
# file-like object that always claims to be at end-of-file...
def read(self, size=-1): return ""
def readline(self, size=-1): return ""
def __iter__(self): return self
def next(self): return ""
def close(self): pass
class eofresponse(eoffile):
def __init__(self, url, headers, code, msg):
self._url = url
self._headers = headers
self.code = code
self.msg = msg
def geturl(self): return self._url
def info(self): return self._headers
class closeable_response:
"""Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
Only supports responses returned by mechanize.HTTPHandler.
After .close(), the following methods are supported:
.read()
.readline()
.info()
.geturl()
.__iter__()
.next()
.close()
and the following attributes are supported:
.code
.msg
Also supports pickling (but the stdlib currently does something to prevent
it: http://python.org/sf/1144636).
"""
# presence of this attr indicates is useable after .close()
closeable_response = None
def __init__(self, fp, headers, url, code, msg):
self._set_fp(fp)
self._headers = headers
self._url = url
self.code = code
self.msg = msg
def _set_fp(self, fp):
self.fp = fp
self.read = self.fp.read
self.readline = self.fp.readline
if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
if hasattr(self.fp, "fileno"):
self.fileno = self.fp.fileno
else:
self.fileno = lambda: None
self.__iter__ = self.fp.__iter__
self.next = self.fp.next
def __repr__(self):
return '<%s at %s whose fp = %r>' % (
self.__class__.__name__, hex(abs(id(self))), self.fp)
def info(self):
return self._headers
def geturl(self):
return self._url
def close(self):
wrapped = self.fp
wrapped.close()
new_wrapped = eofresponse(
self._url, self._headers, self.code, self.msg)
self._set_fp(new_wrapped)
def __getstate__(self):
# There are three obvious options here:
# 1. truncate
# 2. read to end
# 3. close socket, pickle state including read position, then open
# again on unpickle and use Range header
# XXXX um, 4. refuse to pickle unless .close()d. This is better,
# actually ("errors should never pass silently"). Pickling doesn't
# work anyway ATM, because of http://python.org/sf/1144636 so fix
# this later
# 2 breaks pickle protocol, because one expects the original object
# to be left unscathed by pickling. 3 is too complicated and
# surprising (and too much work ;-) to happen in a sane __getstate__.
# So we do 1.
state = self.__dict__.copy()
new_wrapped = eofresponse(
self._url, self._headers, self.code, self.msg)
state["wrapped"] = new_wrapped
return state
def test_response(data='test data', headers=[],
url="http://example.com/", code=200, msg="OK"):
return make_response(data, headers, url, code, msg)
def test_html_response(data='test data', headers=[],
url="http://example.com/", code=200, msg="OK"):
headers += [("Content-type", "text/html")]
return make_response(data, headers, url, code, msg)
def make_response(data, headers, url, code, msg):
"""Convenient factory for objects implementing response interface.
data: string containing response body data
headers: sequence of (name, value) pairs
url: URL of response
code: integer response code (e.g. 200)
msg: string response code message (e.g. "OK")
"""
mime_headers = make_headers(headers)
r = closeable_response(StringIO(data), mime_headers, url, code, msg)
return response_seek_wrapper(r)
def make_headers(headers):
"""
headers: sequence of (name, value) pairs
"""
hdr_text = []
for name_value in headers:
hdr_text.append("%s: %s" % name_value)
return mimetools.Message(StringIO("\n".join(hdr_text)))
# Rest of this module is especially horrible, but needed, at least until fork
# urllib2. Even then, may want to preseve urllib2 compatibility.
def get_seek_wrapper_class(response):
# in order to wrap response objects that are also exceptions, we must
# dynamically subclass the exception :-(((
if (isinstance(response, urllib2.HTTPError) and
not hasattr(response, "seek")):
if response.__class__.__module__ == "__builtin__":
exc_class_name = response.__class__.__name__
else:
exc_class_name = "%s.%s" % (
response.__class__.__module__, response.__class__.__name__)
class httperror_seek_wrapper(response_seek_wrapper, response.__class__):
# this only derives from HTTPError in order to be a subclass --
# the HTTPError behaviour comes from delegation
_exc_class_name = exc_class_name
def __init__(self, wrapped):
response_seek_wrapper.__init__(self, wrapped)
# be compatible with undocumented HTTPError attributes :-(
self.hdrs = wrapped.info()
self.filename = wrapped.geturl()
def __repr__(self):
return (
"<%s (%s instance) at %s "
"whose wrapped object = %r>" % (
self.__class__.__name__, self._exc_class_name,
hex(abs(id(self))), self.wrapped)
)
wrapper_class = httperror_seek_wrapper
else:
wrapper_class = response_seek_wrapper
return wrapper_class
def seek_wrapped_response(response):
"""Return a copy of response that supports seekable response interface.
Accepts responses from both mechanize and urllib2 handlers.
Copes with both ordinary response instances and HTTPError instances (which
can't be simply wrapped due to the requirement of preserving the exception
base class).
"""
if not hasattr(response, "seek"):
wrapper_class = get_seek_wrapper_class(response)
response = wrapper_class(response)
assert hasattr(response, "get_data")
return response
def upgrade_response(response):
"""Return a copy of response that supports Browser response interface.
Browser response interface is that of "seekable responses"
(response_seek_wrapper), plus the requirement that responses must be
useable after .close() (closeable_response).
Accepts responses from both mechanize and urllib2 handlers.
Copes with both ordinary response instances and HTTPError instances (which
can't be simply wrapped due to the requirement of preserving the exception
base class).
"""
wrapper_class = get_seek_wrapper_class(response)
if hasattr(response, "closeable_response"):
if not hasattr(response, "seek"):
response = wrapper_class(response)
assert hasattr(response, "get_data")
return copy.copy(response)
# a urllib2 handler constructed the response, i.e. the response is an
# urllib.addinfourl or a urllib2.HTTPError, instead of a
# _Util.closeable_response as returned by e.g. mechanize.HTTPHandler
try:
code = response.code
except AttributeError:
code = None
try:
msg = response.msg
except AttributeError:
msg = None
# may have already-.read() data from .seek() cache
data = None
get_data = getattr(response, "get_data", None)
if get_data:
data = get_data()
response = closeable_response(
response.fp, response.info(), response.geturl(), code, msg)
response = wrapper_class(response)
if data:
response.set_data(data)
return response

View File

@@ -1,245 +0,0 @@
"""RFC 3986 URI parsing and relative reference resolution / absolutization.
(aka splitting and joining)
Copyright 2006 John J. Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it under
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
included with the distribution).
"""
# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.
import re, urllib
## def chr_range(a, b):
## return "".join(map(chr, range(ord(a), ord(b)+1)))
## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
## "abcdefghijklmnopqrstuvwxyz"
## "0123456789"
## "-_.~")
## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
# this re matches any character that's not in URI_CHARS
BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
def clean_url(url, encoding):
# percent-encode illegal URI characters
# Trying to come up with test cases for this gave me a headache, revisit
# when do switch to unicode.
# Somebody else's comments (lost the attribution):
## - IE will return you the url in the encoding you send it
## - Mozilla/Firefox will send you latin-1 if there's no non latin-1
## characters in your link. It will send you utf-8 however if there are...
if type(url) == type(""):
url = url.decode(encoding, "replace")
url = url.strip()
# for second param to urllib.quote(), we want URI_CHARS, minus the
# 'always_safe' characters that urllib.quote() never percent-encodes
return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
def is_clean_uri(uri):
"""
>>> is_clean_uri("ABC!")
True
>>> is_clean_uri(u"ABC!")
True
>>> is_clean_uri("ABC|")
False
>>> is_clean_uri(u"ABC|")
False
>>> is_clean_uri("http://example.com/0")
True
>>> is_clean_uri(u"http://example.com/0")
True
"""
# note module re treats bytestrings as through they were decoded as latin-1
# so this function accepts both unicode and bytestrings
return not bool(BAD_URI_CHARS_RE.search(uri))
SPLIT_MATCH = re.compile(
r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
def urlsplit(absolute_uri):
"""Return scheme, authority, path, query, fragment."""
match = SPLIT_MATCH(absolute_uri)
if match:
g = match.groups()
return g[1], g[3], g[4], g[6], g[8]
def urlunsplit(parts):
scheme, authority, path, query, fragment = parts
r = []
append = r.append
if scheme is not None:
append(scheme)
append(":")
if authority is not None:
append("//")
append(authority)
append(path)
if query is not None:
append("?")
append(query)
if fragment is not None:
append("#")
append(fragment)
return "".join(r)
def urljoin(base_uri, uri_reference):
"""Join a base URI with a URI reference and return the resulting URI.
See RFC 3986.
"""
return urlunsplit(urljoin_parts(urlsplit(base_uri),
urlsplit(uri_reference)))
# oops, this doesn't do the same thing as the literal translation
# from the RFC below
## import posixpath
## def urljoin_parts(base_parts, reference_parts):
## scheme, authority, path, query, fragment = base_parts
## rscheme, rauthority, rpath, rquery, rfragment = reference_parts
## # compute target URI path
## if rpath == "":
## tpath = path
## else:
## tpath = rpath
## if not tpath.startswith("/"):
## tpath = merge(authority, path, tpath)
## tpath = posixpath.normpath(tpath)
## if rscheme is not None:
## return (rscheme, rauthority, tpath, rquery, rfragment)
## elif rauthority is not None:
## return (scheme, rauthority, tpath, rquery, rfragment)
## elif rpath == "":
## if rquery is not None:
## tquery = rquery
## else:
## tquery = query
## return (scheme, authority, tpath, tquery, rfragment)
## else:
## return (scheme, authority, tpath, rquery, rfragment)
def urljoin_parts(base_parts, reference_parts):
scheme, authority, path, query, fragment = base_parts
rscheme, rauthority, rpath, rquery, rfragment = reference_parts
if rscheme == scheme:
rscheme = None
if rscheme is not None:
tscheme, tauthority, tpath, tquery = (
rscheme, rauthority, remove_dot_segments(rpath), rquery)
else:
if rauthority is not None:
tauthority, tpath, tquery = (
rauthority, remove_dot_segments(rpath), rquery)
else:
if rpath == "":
tpath = path
if rquery is not None:
tquery = rquery
else:
tquery = query
else:
if rpath.startswith("/"):
tpath = remove_dot_segments(rpath)
else:
tpath = merge(authority, path, rpath)
tpath = remove_dot_segments(tpath)
tquery = rquery
tauthority = authority
tscheme = scheme
tfragment = rfragment
return (tscheme, tauthority, tpath, tquery, tfragment)
# um, something *vaguely* like this is what I want, but I have to generate
# lots of test cases first, if only to understand what it is that
# remove_dot_segments really does...
## def remove_dot_segments(path):
## if path == '':
## return ''
## comps = path.split('/')
## new_comps = []
## for comp in comps:
## if comp in ['.', '']:
## if not new_comps or new_comps[-1]:
## new_comps.append('')
## continue
## if comp != '..':
## new_comps.append(comp)
## elif new_comps:
## new_comps.pop()
## return '/'.join(new_comps)
def remove_dot_segments(path):
r = []
while path:
# A
if path.startswith("../"):
path = path[3:]
continue
if path.startswith("./"):
path = path[2:]
continue
# B
if path.startswith("/./"):
path = path[2:]
continue
if path == "/.":
path = "/"
continue
# C
if path.startswith("/../"):
path = path[3:]
if r:
r.pop()
continue
if path == "/..":
path = "/"
if r:
r.pop()
continue
# D
if path == ".":
path = path[1:]
continue
if path == "..":
path = path[2:]
continue
# E
start = 0
if path.startswith("/"):
start = 1
ii = path.find("/", start)
if ii < 0:
ii = None
r.append(path[:ii])
if ii is None:
break
path = path[ii:]
return "".join(r)
def merge(base_authority, base_path, ref_path):
# XXXX Oddly, the sample Perl implementation of this by Roy Fielding
# doesn't even take base_authority as a parameter, despite the wording in
# the RFC suggesting otherwise. Perhaps I'm missing some obvious identity.
#if base_authority is not None and base_path == "":
if base_path == "":
return "/" + ref_path
ii = base_path.rfind("/")
if ii >= 0:
return base_path[:ii+1] + ref_path
return ref_path
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@@ -1,559 +0,0 @@
# Taken from Python 2.6.4 and regexp module constants modified
"""A parser for SGML, using the derived class as a static DTD."""
# XXX This only supports those SGML features used by HTML.
# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special). RCDATA is
# not supported at all.
# from warnings import warnpy3k
# warnpy3k("the sgmllib module has been removed in Python 3.0",
# stacklevel=2)
# del warnpy3k
import markupbase
import re
__all__ = ["SGMLParser", "SGMLParseError"]
# Regular expressions used for parsing
interesting = re.compile('[&<]')
incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
'<([a-zA-Z][^<>]*|'
'/([a-zA-Z][^<>]*)?|'
'![^<>]*)?')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
# hack to fix http://bugs.python.org/issue803422
# charref = re.compile('&#([0-9]+)[^0-9]')
charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
starttagopen = re.compile('<[>a-zA-Z]')
shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
piclose = re.compile('>')
endbracket = re.compile('[<>]')
# hack moved from _beautifulsoup.py (bundled BeautifulSoup version 2)
#This code makes Beautiful Soup able to parse XML with namespaces
# tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
class SGMLParseError(RuntimeError):
"""Exception raised for all parse errors."""
pass
# SGML parser base class -- find tags and call handler functions.
# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
# The dtd is defined by deriving a class which defines methods
# with special names to handle tags: start_foo and end_foo to handle
# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
# (Tags are converted to lower case for this purpose.) The data
# between tags is passed to the parser by calling self.handle_data()
# with some data as argument (the data may be split up in arbitrary
# chunks). Entity references are passed by calling
# self.handle_entityref() with the entity reference as argument.
class SGMLParser(markupbase.ParserBase):
# Definition of entities -- derived classes may override
entity_or_charref = re.compile('&(?:'
'([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
')(;?)')
def __init__(self, verbose=0):
"""Initialize and reset this instance."""
self.verbose = verbose
self.reset()
def reset(self):
"""Reset this instance. Loses all unprocessed data."""
self.__starttag_text = None
self.rawdata = ''
self.stack = []
self.lasttag = '???'
self.nomoretags = 0
self.literal = 0
markupbase.ParserBase.reset(self)
def setnomoretags(self):
"""Enter literal mode (CDATA) till EOF.
Intended for derived classes only.
"""
self.nomoretags = self.literal = 1
def setliteral(self, *args):
"""Enter literal mode (CDATA).
Intended for derived classes only.
"""
self.literal = 1
def feed(self, data):
"""Feed some data to the parser.
Call this as often as you want, with as little or as much text
as you want (may include '\n'). (This just saves the text,
all the processing is done by goahead().)
"""
self.rawdata = self.rawdata + data
self.goahead(0)
def close(self):
"""Handle the remaining data."""
self.goahead(1)
def error(self, message):
raise SGMLParseError(message)
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
def goahead(self, end):
rawdata = self.rawdata
i = 0
n = len(rawdata)
while i < n:
if self.nomoretags:
self.handle_data(rawdata[i:n])
i = n
break
match = interesting.search(rawdata, i)
if match: j = match.start()
else: j = n
if i < j:
self.handle_data(rawdata[i:j])
i = j
if i == n: break
if rawdata[i] == '<':
if starttagopen.match(rawdata, i):
if self.literal:
self.handle_data(rawdata[i])
i = i+1
continue
k = self.parse_starttag(i)
if k < 0: break
i = k
continue
if rawdata.startswith("</", i):
k = self.parse_endtag(i)
if k < 0: break
i = k
self.literal = 0
continue
if self.literal:
if n > (i + 1):
self.handle_data("<")
i = i+1
else:
# incomplete
break
continue
if rawdata.startswith("<!--", i):
# Strictly speaking, a comment is --.*--
# within a declaration tag <!...>.
# This should be removed,
# and comments handled only in parse_declaration.
k = self.parse_comment(i)
if k < 0: break
i = k
continue
if rawdata.startswith("<?", i):
k = self.parse_pi(i)
if k < 0: break
i = i+k
continue
if rawdata.startswith("<!", i):
# This is some sort of declaration; in "HTML as
# deployed," this should only be the document type
# declaration ("<!DOCTYPE html...>").
k = self.parse_declaration(i)
if k < 0: break
i = k
continue
elif rawdata[i] == '&':
if self.literal:
self.handle_data(rawdata[i])
i = i+1
continue
match = charref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_charref(name)
i = match.end(0)
if rawdata[i-1] != ';': i = i-1
continue
match = entityref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_entityref(name)
i = match.end(0)
if rawdata[i-1] != ';': i = i-1
continue
else:
self.error('neither < nor & ??')
# We get here only if incomplete matches but
# nothing else
match = incomplete.match(rawdata, i)
if not match:
self.handle_data(rawdata[i])
i = i+1
continue
j = match.end(0)
if j == n:
break # Really incomplete
self.handle_data(rawdata[i:j])
i = j
# end while
if end and i < n:
self.handle_data(rawdata[i:n])
i = n
self.rawdata = rawdata[i:]
# XXX if end: check for empty stack
# Extensions for the DOCTYPE scanner:
_decl_otherchars = '='
# Internal -- parse processing instr, return length or -1 if not terminated
def parse_pi(self, i):
rawdata = self.rawdata
if rawdata[i:i+2] != '<?':
self.error('unexpected call to parse_pi()')
match = piclose.search(rawdata, i+2)
if not match:
return -1
j = match.start(0)
self.handle_pi(rawdata[i+2: j])
j = match.end(0)
return j-i
def get_starttag_text(self):
return self.__starttag_text
# Internal -- handle starttag, return length or -1 if not terminated
def parse_starttag(self, i):
self.__starttag_text = None
start_pos = i
rawdata = self.rawdata
if shorttagopen.match(rawdata, i):
# SGML shorthand: <tag/data/ == <tag>data</tag>
# XXX Can data contain &... (entity or char refs)?
# XXX Can data contain < or > (tag characters)?
# XXX Can there be whitespace before the first /?
match = shorttag.match(rawdata, i)
if not match:
return -1
tag, data = match.group(1, 2)
self.__starttag_text = '<%s/' % tag
tag = tag.lower()
k = match.end(0)
self.finish_shorttag(tag, data)
self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
return k
# XXX The following should skip matching quotes (' or ")
# As a shortcut way to exit, this isn't so bad, but shouldn't
# be used to locate the actual end of the start tag since the
# < or > characters may be embedded in an attribute value.
match = endbracket.search(rawdata, i+1)
if not match:
return -1
j = match.start(0)
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
if rawdata[i:i+2] == '<>':
# SGML shorthand: <> == <last open tag seen>
k = j
tag = self.lasttag
else:
match = tagfind.match(rawdata, i+1)
if not match:
self.error('unexpected call to parse_starttag')
k = match.end(0)
tag = rawdata[i+1:k].lower()
self.lasttag = tag
while k < j:
match = attrfind.match(rawdata, k)
if not match: break
attrname, rest, attrvalue = match.group(1, 2, 3)
if not rest:
attrvalue = attrname
else:
if (attrvalue[:1] == "'" == attrvalue[-1:] or
attrvalue[:1] == '"' == attrvalue[-1:]):
# strip quotes
attrvalue = attrvalue[1:-1]
attrvalue = self.entity_or_charref.sub(
self._convert_ref, attrvalue)
attrs.append((attrname.lower(), attrvalue))
k = match.end(0)
if rawdata[j] == '>':
j = j+1
self.__starttag_text = rawdata[start_pos:j]
self.finish_starttag(tag, attrs)
return j
# Internal -- convert entity or character reference
def _convert_ref(self, match):
if match.group(2):
return self.convert_charref(match.group(2)) or \
'&#%s%s' % match.groups()[1:]
elif match.group(3):
return self.convert_entityref(match.group(1)) or \
'&%s;' % match.group(1)
else:
return '&%s' % match.group(1)
# Internal -- parse endtag
def parse_endtag(self, i):
rawdata = self.rawdata
match = endbracket.search(rawdata, i+1)
if not match:
return -1
j = match.start(0)
tag = rawdata[i+2:j].strip().lower()
if rawdata[j] == '>':
j = j+1
self.finish_endtag(tag)
return j
# Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
def finish_shorttag(self, tag, data):
self.finish_starttag(tag, [])
self.handle_data(data)
self.finish_endtag(tag)
# Internal -- finish processing of start tag
# Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
def finish_starttag(self, tag, attrs):
try:
method = getattr(self, 'start_' + tag)
except AttributeError:
try:
method = getattr(self, 'do_' + tag)
except AttributeError:
self.unknown_starttag(tag, attrs)
return -1
else:
self.handle_starttag(tag, method, attrs)
return 0
else:
self.stack.append(tag)
self.handle_starttag(tag, method, attrs)
return 1
# Internal -- finish processing of end tag
def finish_endtag(self, tag):
if not tag:
found = len(self.stack) - 1
if found < 0:
self.unknown_endtag(tag)
return
else:
if tag not in self.stack:
try:
method = getattr(self, 'end_' + tag)
except AttributeError:
self.unknown_endtag(tag)
else:
self.report_unbalanced(tag)
return
found = len(self.stack)
for i in range(found):
if self.stack[i] == tag: found = i
while len(self.stack) > found:
tag = self.stack[-1]
try:
method = getattr(self, 'end_' + tag)
except AttributeError:
method = None
if method:
self.handle_endtag(tag, method)
else:
self.unknown_endtag(tag)
del self.stack[-1]
# Overridable -- handle start tag
def handle_starttag(self, tag, method, attrs):
method(attrs)
# Overridable -- handle end tag
def handle_endtag(self, tag, method):
method()
# Example -- report an unbalanced </...> tag.
def report_unbalanced(self, tag):
if self.verbose:
print '*** Unbalanced </' + tag + '>'
print '*** Stack:', self.stack
def convert_charref(self, name):
"""Convert character reference, may be overridden."""
try:
n = int(name)
except ValueError:
return
if not 0 <= n <= 127:
return
return self.convert_codepoint(n)
def convert_codepoint(self, codepoint):
return chr(codepoint)
def handle_charref(self, name):
"""Handle character reference, no need to override."""
replacement = self.convert_charref(name)
if replacement is None:
self.unknown_charref(name)
else:
self.handle_data(replacement)
# Definition of entities -- derived classes may override
entitydefs = \
{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
def convert_entityref(self, name):
"""Convert entity references.
As an alternative to overriding this method; one can tailor the
results by setting up the self.entitydefs mapping appropriately.
"""
table = self.entitydefs
if name in table:
return table[name]
else:
return
def handle_entityref(self, name):
"""Handle entity references, no need to override."""
replacement = self.convert_entityref(name)
if replacement is None:
self.unknown_entityref(name)
else:
self.handle_data(replacement)
# Example -- handle data, should be overridden
def handle_data(self, data):
pass
# Example -- handle comment, could be overridden
def handle_comment(self, data):
pass
# Example -- handle declaration, could be overridden
def handle_decl(self, decl):
pass
# Example -- handle processing instruction, could be overridden
def handle_pi(self, data):
pass
# To be overridden -- handlers for unknown objects
def unknown_starttag(self, tag, attrs): pass
def unknown_endtag(self, tag): pass
def unknown_charref(self, ref): pass
def unknown_entityref(self, ref): pass
class TestSGMLParser(SGMLParser):
def __init__(self, verbose=0):
self.testdata = ""
SGMLParser.__init__(self, verbose)
def handle_data(self, data):
self.testdata = self.testdata + data
if len(repr(self.testdata)) >= 70:
self.flush()
def flush(self):
data = self.testdata
if data:
self.testdata = ""
print 'data:', repr(data)
def handle_comment(self, data):
self.flush()
r = repr(data)
if len(r) > 68:
r = r[:32] + '...' + r[-32:]
print 'comment:', r
def unknown_starttag(self, tag, attrs):
self.flush()
if not attrs:
print 'start tag: <' + tag + '>'
else:
print 'start tag: <' + tag,
for name, value in attrs:
print name + '=' + '"' + value + '"',
print '>'
def unknown_endtag(self, tag):
self.flush()
print 'end tag: </' + tag + '>'
def unknown_entityref(self, ref):
self.flush()
print '*** unknown entity ref: &' + ref + ';'
def unknown_charref(self, ref):
self.flush()
print '*** unknown char ref: &#' + ref + ';'
def unknown_decl(self, data):
self.flush()
print '*** unknown decl: [' + data + ']'
def close(self):
SGMLParser.close(self)
self.flush()
def test(args = None):
import sys
if args is None:
args = sys.argv[1:]
if args and args[0] == '-s':
args = args[1:]
klass = SGMLParser
else:
klass = TestSGMLParser
if args:
file = args[0]
else:
file = 'test.html'
if file == '-':
f = sys.stdin
else:
try:
f = open(file, 'r')
except IOError, msg:
print file, ":", msg
sys.exit(1)
data = f.read()
if f is not sys.stdin:
f.close()
x = klass()
for c in data:
x.feed(c)
x.close()
if __name__ == '__main__':
test()

View File

@@ -1,6 +0,0 @@
import socket
try:
_GLOBAL_DEFAULT_TIMEOUT = socket._GLOBAL_DEFAULT_TIMEOUT
except AttributeError:
_GLOBAL_DEFAULT_TIMEOUT = object()

View File

@@ -1,162 +0,0 @@
import os
import shutil
import subprocess
import tempfile
import unittest
class SetupStack(object):
def __init__(self):
self._on_teardown = []
def add_teardown(self, teardown):
self._on_teardown.append(teardown)
def tear_down(self):
for func in reversed(self._on_teardown):
func()
class TearDownConvenience(object):
def __init__(self, setup_stack=None):
self._own_setup_stack = setup_stack is None
if setup_stack is None:
setup_stack = SetupStack()
self._setup_stack = setup_stack
# only call this convenience method if no setup_stack was supplied to c'tor
def tear_down(self):
assert self._own_setup_stack
self._setup_stack.tear_down()
class TempDirMaker(TearDownConvenience):
def make_temp_dir(self, dir_=None):
temp_dir = tempfile.mkdtemp(prefix="tmp-%s-" % self.__class__.__name__,
dir=dir_)
def tear_down():
shutil.rmtree(temp_dir)
self._setup_stack.add_teardown(tear_down)
return temp_dir
class MonkeyPatcher(TearDownConvenience):
Unset = object()
def monkey_patch(self, obj, name, value):
orig_value = getattr(obj, name)
setattr(obj, name, value)
def reverse_patch():
setattr(obj, name, orig_value)
self._setup_stack.add_teardown(reverse_patch)
def _set_environ(self, env, name, value):
if value is self.Unset:
try:
del env[name]
except KeyError:
pass
else:
env[name] = value
def monkey_patch_environ(self, name, value, env=os.environ):
orig_value = env.get(name, self.Unset)
self._set_environ(env, name, value)
def reverse_patch():
self._set_environ(env, name, orig_value)
self._setup_stack.add_teardown(reverse_patch)
class FixtureFactory(object):
def __init__(self):
self._setup_stack = SetupStack()
self._context_managers = {}
self._fixtures = {}
def register_context_manager(self, name, context_manager):
self._context_managers[name] = context_manager
def get_fixture(self, name, add_teardown):
context_manager = self._context_managers[name]
fixture = context_manager.__enter__()
add_teardown(lambda: context_manager.__exit__(None, None, None))
return fixture
def get_cached_fixture(self, name):
fixture = self._fixtures.get(name)
if fixture is None:
fixture = self.get_fixture(name, self._setup_stack.add_teardown)
self._fixtures[name] = fixture
return fixture
def tear_down(self):
self._setup_stack.tear_down()
class TestCase(unittest.TestCase):
def setUp(self):
self._setup_stack = SetupStack()
self._monkey_patcher = MonkeyPatcher(self._setup_stack)
def tearDown(self):
self._setup_stack.tear_down()
def register_context_manager(self, name, context_manager):
return self.fixture_factory.register_context_manager(
name, context_manager)
def get_fixture(self, name):
return self.fixture_factory.get_fixture(name, self.add_teardown)
def get_cached_fixture(self, name):
return self.fixture_factory.get_cached_fixture(name)
def add_teardown(self, *args, **kwds):
self._setup_stack.add_teardown(*args, **kwds)
def make_temp_dir(self, *args, **kwds):
return TempDirMaker(self._setup_stack).make_temp_dir(*args, **kwds)
def monkey_patch(self, *args, **kwds):
return self._monkey_patcher.monkey_patch(*args, **kwds)
def monkey_patch_environ(self, *args, **kwds):
return self._monkey_patcher.monkey_patch_environ(*args, **kwds)
def assert_contains(self, container, containee):
self.assertTrue(containee in container, "%r not in %r" %
(containee, container))
def assert_less_than(self, got, expected):
self.assertTrue(got < expected, "%r >= %r" %
(got, expected))
# http://lackingrhoticity.blogspot.com/2009/01/testing-using-golden-files-in-python.html
class GoldenTestCase(TestCase):
run_meld = False
def assert_golden(self, dir_got, dir_expect):
assert os.path.exists(dir_expect), dir_expect
proc = subprocess.Popen(["diff", "--recursive", "-u", "-N",
"--exclude=.*", dir_expect, dir_got],
stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
if len(stdout) > 0:
if self.run_meld:
# Put expected output on the right because that is the
# side we usually edit.
subprocess.call(["meld", dir_got, dir_expect])
raise AssertionError(
"Differences from golden files found.\n"
"Try running with --meld to update golden files.\n"
"%s" % stdout)
self.assertEquals(proc.wait(), 0)

View File

@@ -1,50 +0,0 @@
# urllib2 work-alike interface
# ...from urllib2...
from urllib2 import \
URLError, \
HTTPError
# ...and from mechanize
from _auth import \
HTTPProxyPasswordMgr, \
HTTPSClientCertMgr
from _debug import \
HTTPResponseDebugProcessor, \
HTTPRedirectDebugProcessor
# crap ATM
## from _gzip import \
## HTTPGzipProcessor
from _urllib2_fork import \
AbstractBasicAuthHandler, \
AbstractDigestAuthHandler, \
BaseHandler, \
CacheFTPHandler, \
FileHandler, \
FTPHandler, \
HTTPBasicAuthHandler, \
HTTPCookieProcessor, \
HTTPDefaultErrorHandler, \
HTTPDigestAuthHandler, \
HTTPErrorProcessor, \
HTTPHandler, \
HTTPPasswordMgr, \
HTTPPasswordMgrWithDefaultRealm, \
HTTPRedirectHandler, \
ProxyBasicAuthHandler, \
ProxyDigestAuthHandler, \
ProxyHandler, \
UnknownHandler
from _http import \
HTTPEquivProcessor, \
HTTPRefererProcessor, \
HTTPRefreshProcessor, \
HTTPRobotRulesProcessor, \
RobotExclusionError
import httplib
if hasattr(httplib, 'HTTPS'):
from _urllib2_fork import HTTPSHandler
del httplib
from _opener import OpenerDirector, \
SeekableResponseOpener, \
build_opener, install_opener, urlopen
from _request import \
Request

File diff suppressed because it is too large Load Diff

View File

@@ -1,367 +0,0 @@
"""Convenient HTTP UserAgent class.
This is a subclass of urllib2.OpenerDirector.
Copyright 2003-2006 John J. Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it under
the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
included with the distribution).
"""
import warnings
import _auth
import _gzip
import _opener
import _response
import _sockettimeout
import _urllib2
class UserAgentBase(_opener.OpenerDirector):
"""Convenient user-agent class.
Do not use .add_handler() to add a handler for something already dealt with
by this code.
The only reason at present for the distinction between UserAgent and
UserAgentBase is so that classes that depend on .seek()able responses
(e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass
UserAgent exposes a .set_seekable_responses() method that allows switching
off the adding of a .seek() method to responses.
Public attributes:
addheaders: list of (name, value) pairs specifying headers to send with
every request, unless they are overridden in the Request instance.
>>> ua = UserAgentBase()
>>> ua.addheaders = [
... ("User-agent", "Mozilla/5.0 (compatible)"),
... ("From", "responsible.person@example.com")]
"""
handler_classes = {
# scheme handlers
"http": _urllib2.HTTPHandler,
# CacheFTPHandler is buggy, at least in 2.3, so we don't use it
"ftp": _urllib2.FTPHandler,
"file": _urllib2.FileHandler,
# other handlers
"_unknown": _urllib2.UnknownHandler,
# HTTP{S,}Handler depend on HTTPErrorProcessor too
"_http_error": _urllib2.HTTPErrorProcessor,
"_http_default_error": _urllib2.HTTPDefaultErrorHandler,
# feature handlers
"_basicauth": _urllib2.HTTPBasicAuthHandler,
"_digestauth": _urllib2.HTTPDigestAuthHandler,
"_redirect": _urllib2.HTTPRedirectHandler,
"_cookies": _urllib2.HTTPCookieProcessor,
"_refresh": _urllib2.HTTPRefreshProcessor,
"_equiv": _urllib2.HTTPEquivProcessor,
"_proxy": _urllib2.ProxyHandler,
"_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
"_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
"_robots": _urllib2.HTTPRobotRulesProcessor,
"_gzip": _gzip.HTTPGzipProcessor, # experimental!
# debug handlers
"_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
"_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
}
default_schemes = ["http", "ftp", "file"]
default_others = ["_unknown", "_http_error", "_http_default_error"]
default_features = ["_redirect", "_cookies",
"_refresh", "_equiv",
"_basicauth", "_digestauth",
"_proxy", "_proxy_basicauth", "_proxy_digestauth",
"_robots",
]
if hasattr(_urllib2, 'HTTPSHandler'):
handler_classes["https"] = _urllib2.HTTPSHandler
default_schemes.append("https")
def __init__(self):
_opener.OpenerDirector.__init__(self)
ua_handlers = self._ua_handlers = {}
for scheme in (self.default_schemes+
self.default_others+
self.default_features):
klass = self.handler_classes[scheme]
ua_handlers[scheme] = klass()
for handler in ua_handlers.itervalues():
self.add_handler(handler)
# Yuck.
# Ensure correct default constructor args were passed to
# HTTPRefreshProcessor and HTTPEquivProcessor.
if "_refresh" in ua_handlers:
self.set_handle_refresh(True)
if "_equiv" in ua_handlers:
self.set_handle_equiv(True)
# Ensure default password managers are installed.
pm = ppm = None
if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
if ("_proxy_basicauth" in ua_handlers or
"_proxy_digestauth" in ua_handlers):
ppm = _auth.HTTPProxyPasswordMgr()
self.set_password_manager(pm)
self.set_proxy_password_manager(ppm)
# set default certificate manager
if "https" in ua_handlers:
cm = _urllib2.HTTPSClientCertMgr()
self.set_client_cert_manager(cm)
def close(self):
_opener.OpenerDirector.close(self)
self._ua_handlers = None
# XXX
## def set_timeout(self, timeout):
## self._timeout = timeout
## def set_http_connection_cache(self, conn_cache):
## self._http_conn_cache = conn_cache
## def set_ftp_connection_cache(self, conn_cache):
## # XXX ATM, FTP has cache as part of handler; should it be separate?
## self._ftp_conn_cache = conn_cache
def set_handled_schemes(self, schemes):
"""Set sequence of URL scheme (protocol) strings.
For example: ua.set_handled_schemes(["http", "ftp"])
If this fails (with ValueError) because you've passed an unknown
scheme, the set of handled schemes will not be changed.
"""
want = {}
for scheme in schemes:
if scheme.startswith("_"):
raise ValueError("not a scheme '%s'" % scheme)
if scheme not in self.handler_classes:
raise ValueError("unknown scheme '%s'")
want[scheme] = None
# get rid of scheme handlers we don't want
for scheme, oldhandler in self._ua_handlers.items():
if scheme.startswith("_"): continue # not a scheme handler
if scheme not in want:
self._replace_handler(scheme, None)
else:
del want[scheme] # already got it
# add the scheme handlers that are missing
for scheme in want.keys():
self._set_handler(scheme, True)
def set_cookiejar(self, cookiejar):
"""Set a mechanize.CookieJar, or None."""
self._set_handler("_cookies", obj=cookiejar)
# XXX could use Greg Stein's httpx for some of this instead?
# or httplib2??
def set_proxies(self, proxies=None, proxy_bypass=None):
"""Configure proxy settings.
proxies: dictionary mapping URL scheme to proxy specification. None
means use the default system-specific settings.
proxy_bypass: function taking hostname, returning whether proxy should
be used. None means use the default system-specific settings.
The default is to try to obtain proxy settings from the system (see the
documentation for urllib.urlopen for information about the
system-specific methods used -- note that's urllib, not urllib2).
To avoid all use of proxies, pass an empty proxies dict.
>>> ua = UserAgentBase()
>>> def proxy_bypass(hostname):
... return hostname == "noproxy.com"
>>> ua.set_proxies(
... {"http": "joe:password@myproxy.example.com:3128",
... "ftp": "proxy.example.com"},
... proxy_bypass)
"""
self._set_handler("_proxy", True,
constructor_kwds=dict(proxies=proxies,
proxy_bypass=proxy_bypass))
def add_password(self, url, user, password, realm=None):
self._password_manager.add_password(realm, url, user, password)
def add_proxy_password(self, user, password, hostport=None, realm=None):
self._proxy_password_manager.add_password(
realm, hostport, user, password)
def add_client_certificate(self, url, key_file, cert_file):
"""Add an SSL client certificate, for HTTPS client auth.
key_file and cert_file must be filenames of the key and certificate
files, in PEM format. You can use e.g. OpenSSL to convert a p12 (PKCS
12) file to PEM format:
openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem
openssl pkcs12 -nocerts -in cert.p12 -out key.pem
Note that client certificate password input is very inflexible ATM. At
the moment this seems to be console only, which is presumably the
default behaviour of libopenssl. In future mechanize may support
third-party libraries that (I assume) allow more options here.
"""
self._client_cert_manager.add_key_cert(url, key_file, cert_file)
# the following are rarely useful -- use add_password / add_proxy_password
# instead
def set_password_manager(self, password_manager):
"""Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
self._password_manager = password_manager
self._set_handler("_basicauth", obj=password_manager)
self._set_handler("_digestauth", obj=password_manager)
def set_proxy_password_manager(self, password_manager):
"""Set a mechanize.HTTPProxyPasswordMgr, or None."""
self._proxy_password_manager = password_manager
self._set_handler("_proxy_basicauth", obj=password_manager)
self._set_handler("_proxy_digestauth", obj=password_manager)
def set_client_cert_manager(self, cert_manager):
"""Set a mechanize.HTTPClientCertMgr, or None."""
self._client_cert_manager = cert_manager
handler = self._ua_handlers["https"]
handler.client_cert_manager = cert_manager
# these methods all take a boolean parameter
def set_handle_robots(self, handle):
"""Set whether to observe rules from robots.txt."""
self._set_handler("_robots", handle)
def set_handle_redirect(self, handle):
"""Set whether to handle HTTP 30x redirections."""
self._set_handler("_redirect", handle)
def set_handle_refresh(self, handle, max_time=None, honor_time=True):
"""Set whether to handle HTTP Refresh headers."""
self._set_handler("_refresh", handle, constructor_kwds=
{"max_time": max_time, "honor_time": honor_time})
def set_handle_equiv(self, handle, head_parser_class=None):
"""Set whether to treat HTML http-equiv headers like HTTP headers.
Response objects may be .seek()able if this is set (currently returned
responses are, raised HTTPError exception responses are not).
"""
if head_parser_class is not None:
constructor_kwds = {"head_parser_class": head_parser_class}
else:
constructor_kwds={}
self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
def set_handle_gzip(self, handle):
"""Handle gzip transfer encoding.
"""
if handle:
warnings.warn(
"gzip transfer encoding is experimental!", stacklevel=2)
self._set_handler("_gzip", handle)
def set_debug_redirects(self, handle):
"""Log information about HTTP redirects (including refreshes).
Logging is performed using module logging. The logger name is
"mechanize.http_redirects". To actually print some debug output,
eg:
import sys, logging
logger = logging.getLogger("mechanize.http_redirects")
logger.addHandler(logging.StreamHandler(sys.stdout))
logger.setLevel(logging.INFO)
Other logger names relevant to this module:
"mechanize.http_responses"
"mechanize.cookies"
To turn on everything:
import sys, logging
logger = logging.getLogger("mechanize")
logger.addHandler(logging.StreamHandler(sys.stdout))
logger.setLevel(logging.INFO)
"""
self._set_handler("_debug_redirect", handle)
def set_debug_responses(self, handle):
"""Log HTTP response bodies.
See docstring for .set_debug_redirects() for details of logging.
Response objects may be .seek()able if this is set (currently returned
responses are, raised HTTPError exception responses are not).
"""
self._set_handler("_debug_response_body", handle)
def set_debug_http(self, handle):
"""Print HTTP headers to sys.stdout."""
level = int(bool(handle))
for scheme in "http", "https":
h = self._ua_handlers.get(scheme)
if h is not None:
h.set_http_debuglevel(level)
def _set_handler(self, name, handle=None, obj=None,
constructor_args=(), constructor_kwds={}):
if handle is None:
handle = obj is not None
if handle:
handler_class = self.handler_classes[name]
if obj is not None:
newhandler = handler_class(obj)
else:
newhandler = handler_class(
*constructor_args, **constructor_kwds)
else:
newhandler = None
self._replace_handler(name, newhandler)
def _replace_handler(self, name, newhandler=None):
# first, if handler was previously added, remove it
if name is not None:
handler = self._ua_handlers.get(name)
if handler:
try:
self.handlers.remove(handler)
except ValueError:
pass
# then add the replacement, if any
if newhandler is not None:
self.add_handler(newhandler)
self._ua_handlers[name] = newhandler
class UserAgent(UserAgentBase):
def __init__(self):
UserAgentBase.__init__(self)
self._seekable = False
def set_seekable_responses(self, handle):
"""Make response objects .seek()able."""
self._seekable = bool(handle)
def open(self, fullurl, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
if self._seekable:
def bound_open(fullurl, data=None,
timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
return UserAgentBase.open(self, fullurl, data, timeout)
response = _opener.wrapped_open(
bound_open, _response.seek_wrapped_response, fullurl, data,
timeout)
else:
response = UserAgentBase.open(self, fullurl, data)
return response

View File

@@ -1,305 +0,0 @@
"""Utility functions and date/time routines.
Copyright 2002-2006 John J Lee <jjl@pobox.com>
This code is free software; you can redistribute it and/or modify it
under the terms of the BSD or ZPL 2.1 licenses (see the file
COPYING.txt included with the distribution).
"""
import re
import time
import warnings
class ExperimentalWarning(UserWarning):
pass
def experimental(message):
warnings.warn(message, ExperimentalWarning, stacklevel=3)
def hide_experimental_warnings():
warnings.filterwarnings("ignore", category=ExperimentalWarning)
def reset_experimental_warnings():
warnings.filterwarnings("default", category=ExperimentalWarning)
def deprecation(message):
warnings.warn(message, DeprecationWarning, stacklevel=3)
def hide_deprecations():
warnings.filterwarnings("ignore", category=DeprecationWarning)
def reset_deprecations():
warnings.filterwarnings("default", category=DeprecationWarning)
def write_file(filename, data):
f = open(filename, "wb")
try:
f.write(data)
finally:
f.close()
def get1(sequence):
assert len(sequence) == 1
return sequence[0]
def isstringlike(x):
try: x+""
except: return False
else: return True
## def caller():
## try:
## raise SyntaxError
## except:
## import sys
## return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
from calendar import timegm
# Date/time conversion routines for formats used by the HTTP protocol.
EPOCH = 1970
def my_timegm(tt):
year, month, mday, hour, min, sec = tt[:6]
if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
(0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
return timegm(tt)
else:
return None
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
months_lower = []
for month in months: months_lower.append(month.lower())
def time2isoz(t=None):
"""Return a string representing time in seconds since epoch, t.
If the function is called without an argument, it will use the current
time.
The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
representing Universal Time (UTC, aka GMT). An example of this format is:
1994-11-24 08:49:37Z
"""
if t is None: t = time.time()
year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
year, mon, mday, hour, min, sec)
def time2netscape(t=None):
"""Return a string representing time in seconds since epoch, t.
If the function is called without an argument, it will use the current
time.
The format of the returned string is like this:
Wed, DD-Mon-YYYY HH:MM:SS GMT
"""
if t is None: t = time.time()
year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
days[wday], mday, months[mon-1], year, hour, min, sec)
UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
def offset_from_tz_string(tz):
offset = None
if UTC_ZONES.has_key(tz):
offset = 0
else:
m = timezone_re.search(tz)
if m:
offset = 3600 * int(m.group(2))
if m.group(3):
offset = offset + 60 * int(m.group(3))
if m.group(1) == '-':
offset = -offset
return offset
def _str2time(day, mon, yr, hr, min, sec, tz):
# translate month name to number
# month numbers start with 1 (January)
try:
mon = months_lower.index(mon.lower())+1
except ValueError:
# maybe it's already a number
try:
imon = int(mon)
except ValueError:
return None
if 1 <= imon <= 12:
mon = imon
else:
return None
# make sure clock elements are defined
if hr is None: hr = 0
if min is None: min = 0
if sec is None: sec = 0
yr = int(yr)
day = int(day)
hr = int(hr)
min = int(min)
sec = int(sec)
if yr < 1000:
# find "obvious" year
cur_yr = time.localtime(time.time())[0]
m = cur_yr % 100
tmp = yr
yr = yr + cur_yr - m
m = m - tmp
if abs(m) > 50:
if m > 0: yr = yr + 100
else: yr = yr - 100
# convert UTC time tuple to seconds since epoch (not timezone-adjusted)
t = my_timegm((yr, mon, day, hr, min, sec, tz))
if t is not None:
# adjust time using timezone string, to get absolute time since epoch
if tz is None:
tz = "UTC"
tz = tz.upper()
offset = offset_from_tz_string(tz)
if offset is None:
return None
t = t - offset
return t
strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
wkday_re = re.compile(
r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
loose_http_re = re.compile(
r"""^
(\d\d?) # day
(?:\s+|[-\/])
(\w+) # month
(?:\s+|[-\/])
(\d+) # year
(?:
(?:\s+|:) # separator before clock
(\d\d?):(\d\d) # hour:min
(?::(\d\d))? # optional seconds
)? # optional clock
\s*
([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
\s*
(?:\(\w+\))? # ASCII representation of timezone in parens.
\s*$""", re.X)
def http2time(text):
"""Returns time in seconds since epoch of time represented by a string.
Return value is an integer.
None is returned if the format of str is unrecognized, the time is outside
the representable range, or the timezone string is not recognized. If the
string contains no timezone, UTC is assumed.
The timezone in the string may be numerical (like "-0800" or "+0100") or a
string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
timezone strings equivalent to UTC (zero offset) are known to the function.
The function loosely parses the following formats:
Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
The parser ignores leading and trailing whitespace. The time may be
absent.
If the year is given with only 2 digits, the function will select the
century that makes the year closest to the current date.
"""
# fast exit for strictly conforming string
m = strict_re.search(text)
if m:
g = m.groups()
mon = months_lower.index(g[1].lower()) + 1
tt = (int(g[2]), mon, int(g[0]),
int(g[3]), int(g[4]), float(g[5]))
return my_timegm(tt)
# No, we need some messy parsing...
# clean up
text = text.lstrip()
text = wkday_re.sub("", text, 1) # Useless weekday
# tz is time zone specifier string
day, mon, yr, hr, min, sec, tz = [None]*7
# loose regexp parse
m = loose_http_re.search(text)
if m is not None:
day, mon, yr, hr, min, sec, tz = m.groups()
else:
return None # bad format
return _str2time(day, mon, yr, hr, min, sec, tz)
iso_re = re.compile(
"""^
(\d{4}) # year
[-\/]?
(\d\d?) # numerical month
[-\/]?
(\d\d?) # day
(?:
(?:\s+|[-:Tt]) # separator before clock
(\d\d?):?(\d\d) # hour:min
(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
)? # optional clock
\s*
([-+]?\d\d?:?(:?\d\d)?
|Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
\s*$""", re.X)
def iso2time(text):
"""
As for http2time, but parses the ISO 8601 formats:
1994-02-03 14:15:29 -0100 -- ISO 8601 format
1994-02-03 14:15:29 -- zone is optional
1994-02-03 -- only date
1994-02-03T14:15:29 -- Use T as separator
19940203T141529Z -- ISO 8601 compact format
19940203 -- only date
"""
# clean up
text = text.lstrip()
# tz is time zone specifier string
day, mon, yr, hr, min, sec, tz = [None]*7
# loose regexp parse
m = iso_re.search(text)
if m is not None:
# XXX there's an extra bit of the timezone I'm ignoring here: is
# this the right thing to do?
yr, mon, day, hr, min, sec, tz, _ = m.groups()
else:
return None # bad format
return _str2time(day, mon, yr, hr, min, sec, tz)

View File

@@ -1,2 +0,0 @@
"0.2.5"
__version__ = (0, 2, 5, None, None)

View File

@@ -516,7 +516,7 @@ def set_context_commands(item, parent_item):
from_action=item.action).tourl())))
# Añadir a Alfavoritos (Mis enlaces)
if item.channel not in ["favorites", "videolibrary", "help", ""] and parent_item.channel != "favorites":
context_commands.append(('[COLOR blue]Guardar enlace[/COLOR]', "XBMC.RunPlugin(%s?%s)" %
context_commands.append(('[COLOR blue]%s[/COLOR]' % config.get_localized_string(70557), "XBMC.RunPlugin(%s?%s)" %
(sys.argv[0], item.clone(channel="alfavorites", action="addFavourite",
from_channel=item.channel,
from_action=item.action).tourl())))
@@ -538,7 +538,7 @@ def set_context_commands(item, parent_item):
mediatype = 'tv'
else:
mediatype = item.contentType
context_commands.append(("[COLOR yellow]Buscar Similares[/COLOR]", "XBMC.Container.Update (%s?%s)" % (
context_commands.append(("[COLOR yellow]%s[/COLOR]" % config.get_localized_string(70561), "XBMC.Container.Update (%s?%s)" % (
sys.argv[0], item.clone(channel='search', action='discover_list', search_type='list', page='1',
list_type='%s/%s/similar' % (mediatype,item.infoLabels['tmdb_id'])).tourl())))
@@ -1044,6 +1044,8 @@ def torrent_client_installed(show_tuple=False):
def play_torrent(item, xlistitem, mediaurl):
logger.info()
import time
# Opciones disponibles para Reproducir torrents
torrent_options = list()
torrent_options.append(["Cliente interno (necesario libtorrent)"])
@@ -1066,28 +1068,32 @@ def play_torrent(item, xlistitem, mediaurl):
# Plugins externos
if seleccion > 1:
#### Compatibilidad con Kodi 18: evita cuelgues/cancelaciones cuando el .torrent se lanza desde pantalla convencional
if xbmc.getCondVisibility('Window.IsMedia'):
xbmcplugin.setResolvedUrl(int(sys.argv[1]), False, xlistitem) #Preparamos el entorno para evutar error Kod1 18
time.sleep(1) #Dejamos que se ejecute
mediaurl = urllib.quote_plus(item.url)
if ("quasar" in torrent_options[seleccion][1] or "elementum" in torrent_options[seleccion][1]) and item.infoLabels['tmdb_id']: #Llamada con más parámetros para completar el título
if item.contentType == 'episode' and "elementum" not in torrent_options[seleccion][1]:
mediaurl += "&episode=%s&library=&season=%s&show=%s&tmdb=%s&type=episode" % (item.infoLabels['episode'], item.infoLabels['season'], item.infoLabels['tmdb_id'], item.infoLabels['tmdb_id'])
elif item.contentType == 'movie':
mediaurl += "&library=&tmdb=%s&type=movie" % (item.infoLabels['tmdb_id'])
xbmc.executebuiltin("PlayMedia(" + torrent_options[seleccion][1] % mediaurl + ")")
if "quasar" in torrent_options[seleccion][1] or "elementum" in torrent_options[seleccion][1]: #Seleccionamos que clientes torrent soportamos
if item.strm_path: #Sólo si es de Videoteca
import time
time_limit = time.time() + 150 #Marcamos el timepo máx. de buffering
while not is_playing() and time.time() < time_limit: #Esperamos mientra buffera
time.sleep(5) #Repetimos cada intervalo
#logger.debug(str(time_limit))
if is_playing(): #Ha terminado de bufferar o ha cancelado
from platformcode import xbmc_videolibrary
xbmc_videolibrary.mark_auto_as_watched(item) #Marcamos como visto al terminar
#logger.debug("Llamado el marcado")
#else:
#logger.debug("Video cancelado o timeout")
xbmc.executebuiltin("PlayMedia(" + torrent_options[seleccion][1] % mediaurl + ")")
#Seleccionamos que clientes torrent soportamos para el marcado de vídeos vistos
if "quasar" in torrent_options[seleccion][1] or "elementum" in torrent_options[seleccion][1]:
time_limit = time.time() + 150 #Marcamos el timepo máx. de buffering
while not is_playing() and time.time() < time_limit: #Esperamos mientra buffera
time.sleep(5) #Repetimos cada intervalo
#logger.debug(str(time_limit))
if item.strm_path and is_playing(): #Sólo si es de Videoteca
from platformcode import xbmc_videolibrary
xbmc_videolibrary.mark_auto_as_watched(item) #Marcamos como visto al terminar
#logger.debug("Llamado el marcado")
if seleccion == 1:
from platformcode import mct

View File

@@ -1734,7 +1734,7 @@ msgid "[COLOR %s]Filter configuration for TV series...[/COLOR]"
msgstr ""
msgctxt "#60430"
msgid "FILTRO: Delete '%s'"
msgid "FILTER: Delete '%s'"
msgstr ""
msgctxt "#60431"
@@ -4804,14 +4804,142 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
msgstr ""
msgctxt "#70527"
msgid "Now in Theatres "
msgid "My links"
msgstr ""
msgctxt "#70528"
msgid "Movies by Genre"
msgid "Default folder"
msgstr ""
msgctxt "#70529"
msgid "tv show"
msgid "Repeated link"
msgstr ""
msgctxt "#70530"
msgid "You already have this link in the folder"
msgstr ""
msgctxt "#70531"
msgid "Saved link"
msgstr ""
msgctxt "#70532"
msgid "Folder: %s"
msgstr ""
msgctxt "#70533"
msgid "Rename folder"
msgstr ""
msgctxt "#70534"
msgid "Delete folder"
msgstr ""
msgctxt "#70535"
msgid "Move up all"
msgstr ""
msgctxt "#70536"
msgid "Move up"
msgstr ""
msgctxt "#70537"
msgid "Move down"
msgstr ""
msgctxt "#70538"
msgid "Move down all"
msgstr ""
msgctxt "#70539"
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
msgstr ""
msgctxt "#70540"
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
msgstr ""
msgctxt "#70541"
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
msgstr ""
msgctxt "#70542"
msgid "Create new folder ..."
msgstr "Creaa nuova cartella ..."
msgctxt "#70543"
msgid "Move to another folder"
msgstr ""
msgctxt "#70544"
msgid "Change title"
msgstr ""
msgctxt "#70545"
msgid "Change color"
msgstr ""
msgctxt "#70546"
msgid "Save link in:"
msgstr ""
msgctxt "#70547"
msgid "Change thumbnail"
msgstr ""
msgctxt "#70548"
msgid "Delete link"
msgstr ""
msgctxt "#70549"
msgid "Select folder"
msgstr ""
msgctxt "#70550"
msgid "Create new folder"
msgstr ""
msgctxt "#70551"
msgid "Folder name"
msgstr ""
msgctxt "#70552"
msgid "Delete the folder and links it contains?"
msgstr ""
msgctxt "#70553"
msgid "Change link title"
msgstr ""
msgctxt "#70554"
msgid "Select thumbnail:"
msgstr ""
msgctxt "#70555"
msgid "Move link to:"
msgstr ""
msgctxt "#70556"
msgid "%d links in folder"
msgstr ""
msgctxt "#70557"
msgid "Save link"
msgstr ""
msgctxt "#70558"
msgid "Select color:"
msgstr ""
msgctxt "#70559"
msgid "Now in Theatres "
msgstr ""
msgctxt "#70560"
msgid "Movies by Genre"
msgstr "
msgctxt "#70561"
msgid "Search Similar
msgstr ""

View File

@@ -4792,14 +4792,143 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
msgstr "Verifica dei contatori di video visti/non visti (deselezionare per verificare)"
msgctxt "#70527"
msgid "My links"
msgstr "I Miei Link"
msgctxt "#70528"
msgid "Default folder"
msgstr "Cartella di Default"
msgctxt "#70529"
msgid "Repeated link"
msgstr "Link ripetuto"
msgctxt "#70530"
msgid "You already have this link in the folder"
msgstr "C'è già un link nella cartella"
msgctxt "#70531"
msgid "Saved link"
msgstr "Link salvato"
msgctxt "#70532"
msgid "Folder: %s"
msgstr "Cartella: %s"
msgctxt "#70533"
msgid "Rename folder"
msgstr "Cambia nome alla cartella"
msgctxt "#70534"
msgid "Delete folder"
msgstr "Elimina la cartella"
msgctxt "#70535"
msgid "Move up all"
msgstr "Sposta tutto in alto"
msgctxt "#70536"
msgid "Move up"
msgstr "Sposta in su"
msgctxt "#70537"
msgid "Move down"
msgstr "Sposta in giù"
msgctxt "#70538"
msgid "Move down all"
msgstr "Sposta tutto in basso"
msgctxt "#70539"
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
msgstr "* Crea diverse cartelle per memorizzare i tuoi collegamenti preferiti all'interno di Icarus."
msgctxt "#70540"
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
msgstr "* Per aggiungere collegamenti alle cartelle accedi al menu contestuale da qualsiasi punto di Icarus."
msgctxt "#70541"
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
msgstr "* I collegamenti possono essere canali, sezioni all'interno dei canali, ricerche e persino film e serie, sebbene per quest'ultimo sia preferibile utilizzare la videoteca."
msgctxt "#70542"
msgid "Create new folder ..."
msgstr "Crea nuova cartella ..."
msgctxt "#70543"
msgid "Move to another folder"
msgstr "Sposta in altra cartella"
msgctxt "#70544"
msgid "Change title"
msgstr "Cambia titolo"
msgctxt "#70545"
msgid "Change color"
msgstr "Cambia colore"
msgctxt "#70546"
msgid "Save link in:"
msgstr "Salva link in:"
msgctxt "#70547"
msgid "Change thumbnail"
msgstr "Cambia thumbnail"
msgctxt "#70548"
msgid "Delete link"
msgstr "Elimina link"
msgctxt "#70549"
msgid "Select folder"
msgstr "Seleziona cartella"
msgctxt "#70550"
msgid "Create new folder"
msgstr "Crea nuova cartella"
msgctxt "#70551"
msgid "Folder name"
msgstr "Nome della cartella"
msgctxt "#70552"
msgid "Delete the folder and links it contains?"
msgstr "Eliminare la cartella con tutti i link?"
msgctxt "#70553"
msgid "Change link title"
msgstr "Cambia titolo del link"
msgctxt "#70554"
msgid "Select thumbnail:"
msgstr "Seleziona thumbnail:"
msgctxt "#70555"
msgid "Move link to:"
msgstr "Sposta link in:"
msgctxt "#70556"
msgid "%d links in folder"
msgstr "%d link nella cartella"
msgctxt "#70557"
msgid "Save link"
msgstr "Salva link"
msgctxt "#70558"
msgid "Select color:"
msgstr "Seleziona colore:"
msgctxt "#70559"
msgid "Now in Theatres "
msgstr "Oggi in Sala"
msgctxt "#70528"
msgctxt "#70560"
msgid "Movies by Genre"
msgstr "Per genere"
msgctxt "#70529"
msgid "tv show"
msgstr "serie"
msgctxt "#70561"
msgid "Search Similar
msgstr "Cerca Simili"

View File

@@ -1734,7 +1734,7 @@ msgid "[COLOR %s]Filter configuration for TV series...[/COLOR]"
msgstr "[COLOR %s]Configurar filtro para series...[/COLOR]"
msgctxt "#60430"
msgid "FILTRO: Delete '%s'"
msgid "FILTER: Delete '%s'"
msgstr "FILTRO: Borrar '%s'"
msgctxt "#60431"
@@ -4792,13 +4792,150 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
msgstr "Verificación de los contadores de vídeos vistos/no vistos (desmarcar para verificar)"
msgctxt "#70527"
msgid "My links"
msgstr "Mis enlaces"
msgctxt "#70528"
msgid "Default folder"
msgstr "Carpeta por defecto"
msgctxt "#70529"
msgid "Repeated link"
msgstr "Enlace repetido"
msgctxt "#70530"
msgid "You already have this link in the folder"
msgstr "Ya tienes este enlace en la carpeta"
msgctxt "#70531"
msgid "Saved link"
msgstr "Guardado enlace"
msgctxt "#70532"
msgid "Folder: %s"
msgstr "Carpeta: %s"
msgctxt "#70533"
msgid "Rename folder"
msgstr "Cambiar nombre de la carpeta"
msgctxt "#70534"
msgid "Delete folder"
msgstr "Eliminar la carpeta"
msgctxt "#70535"
msgid "Move up all"
msgstr "Mover arriba del todo"
msgctxt "#70536"
msgid "Move up"
msgstr "Mover hacia arriba"
msgctxt "#70537"
msgid "Move down"
msgstr "Mover hacia abajo"
msgctxt "#70538"
msgid "Move down all"
msgstr "Mover abajo del todo"
msgctxt "#70539"
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
msgstr "* Crea diferentes carpetas para guardar tus enlaces favoritos dentro de Icarus.[CR]]"
msgctxt "#70540"
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
msgstr "* Para añadir enlaces a las carpetas accede al menú contextual desde cualquier punto de Icarus.[CR]"
msgctxt "#70541"
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
msgstr "* Los enlaces pueden ser canales, secciones dentro de los canales, búsquedas, e incluso películas y series aunque para esto último es preferible utilizar la videoteca."
msgctxt "#70542"
msgid "Create new folder ..."
msgstr "Crear nueva carpeta ..."
msgctxt "#70543"
msgid "Move to another folder"
msgstr "Mover a otra carpeta"
msgctxt "#70544"
msgid "Change title"
msgstr "Cambiar título"
msgctxt "#70545"
msgid "Change color"
msgstr "Cambiar color"
msgctxt "#70546"
msgid "Save link in:"
msgstr "Guardar enlace en:"
msgctxt "#70547"
msgid "Change thumbnail"
msgstr "Cambiar thumbnail"
msgctxt "#70548"
msgid "Delete link"
msgstr "Eliminar enlace"
msgctxt "#70549"
msgid "Select folder"
msgstr "Seleccionar carpeta"
msgctxt "#70550"
msgid "Create new folder"
msgstr "Crear nueva carpeta"
msgctxt "#70551"
msgid "Folder name"
msgstr "Nombre de la carpeta"
msgctxt "#70552"
msgid "Delete the folder and links it contains?"
msgstr "¿Borrar la carpeta y los enlaces que contiene?"
msgctxt "#70553"
msgid "Change link title"
msgstr "Cambiar título del enlace"
msgctxt "#70554"
msgid "Select thumbnail:"
msgstr "Seleccionar thumbnail:"
msgctxt "#70555"
msgid "Move link to:"
msgstr "Mover enlace a:"
msgctxt "#70556"
msgid "%d links in folder"
msgstr "%d enlaces en la carpeta"
msgctxt "#70557"
msgid "Save link"
msgstr "Guardar enlace"
msgctxt "#70558"
msgid "Select color:"
msgstr "Seleccionar color:"
msgctxt "#70559"
msgid "Now in Theatres "
msgstr "Ahora en cines"
msgctxt "#70528"
msgctxt "#70560"
msgid "Movies by Genre"
msgstr "Por generos"
msgctxt "#70529"
msgid "tv show"
msgstr "serie"
msgctxt "#70561"
msgid "Search Similar
msgstr "Buscar Similares"

View File

@@ -1734,7 +1734,7 @@ msgid "[COLOR %s]Filter configuration for TV series...[/COLOR]"
msgstr "[COLOR %s]Configurar filtro para series...[/COLOR]"
msgctxt "#60430"
msgid "FILTRO: Delete '%s'"
msgid "FILTER: Delete '%s'"
msgstr "FILTRO: Borrar '%s'"
msgctxt "#60431"
@@ -4792,13 +4792,150 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
msgstr "Verificación de los contadores de vídeos vistos/no vistos (desmarcar para verificar)"
msgctxt "#70527"
msgid "My links"
msgstr "Mis enlaces"
msgctxt "#70528"
msgid "Default folder"
msgstr "Carpeta por defecto"
msgctxt "#70529"
msgid "Repeated link"
msgstr "Enlace repetido"
msgctxt "#70530"
msgid "You already have this link in the folder"
msgstr "Ya tienes este enlace en la carpeta"
msgctxt "#70531"
msgid "Saved link"
msgstr "Guardado enlace"
msgctxt "#70532"
msgid "Folder: %s"
msgstr "Carpeta: %s"
msgctxt "#70533"
msgid "Rename folder"
msgstr "Cambiar nombre de la carpeta"
msgctxt "#70534"
msgid "Delete folder"
msgstr "Eliminar la carpeta"
msgctxt "#70535"
msgid "Move up all"
msgstr "Mover arriba del todo"
msgctxt "#70536"
msgid "Move up"
msgstr "Mover hacia arriba"
msgctxt "#70537"
msgid "Move down"
msgstr "Mover hacia abajo"
msgctxt "#70538"
msgid "Move down all"
msgstr "Mover abajo del todo"
msgctxt "#70539"
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
msgstr "* Crea diferentes carpetas para guardar tus enlaces favoritos dentro de Icarus.[CR]]"
msgctxt "#70540"
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
msgstr "* Para añadir enlaces a las carpetas accede al menú contextual desde cualquier punto de Icarus.[CR]"
msgctxt "#70541"
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
msgstr "* Los enlaces pueden ser canales, secciones dentro de los canales, búsquedas, e incluso películas y series aunque para esto último es preferible utilizar la videoteca."
msgctxt "#70542"
msgid "Create new folder ..."
msgstr "Crear nueva carpeta ..."
msgctxt "#70543"
msgid "Move to another folder"
msgstr "Mover a otra carpeta"
msgctxt "#70544"
msgid "Change title"
msgstr "Cambiar título"
msgctxt "#70545"
msgid "Change color"
msgstr "Cambiar color"
msgctxt "#70546"
msgid "Save link in:"
msgstr "Guardar enlace en:"
msgctxt "#70547"
msgid "Change thumbnail"
msgstr "Cambiar thumbnail"
msgctxt "#70548"
msgid "Delete link"
msgstr "Eliminar enlace"
msgctxt "#70549"
msgid "Select folder"
msgstr "Seleccionar carpeta"
msgctxt "#70550"
msgid "Create new folder"
msgstr "Crear nueva carpeta"
msgctxt "#70551"
msgid "Folder name"
msgstr "Nombre de la carpeta"
msgctxt "#70552"
msgid "Delete the folder and links it contains?"
msgstr "¿Borrar la carpeta y los enlaces que contiene?"
msgctxt "#70553"
msgid "Change link title"
msgstr "Cambiar título del enlace"
msgctxt "#70554"
msgid "Select thumbnail:"
msgstr "Seleccionar thumbnail:"
msgctxt "#70555"
msgid "Move link to:"
msgstr "Mover enlace a:"
msgctxt "#70556"
msgid "%d links in folder"
msgstr "%d enlaces en la carpeta"
msgctxt "#70557"
msgid "Save link"
msgstr "Guardar enlace"
msgctxt "#70558"
msgid "Select color:"
msgstr "Seleccionar color:"
msgctxt "#70559"
msgid "Now in Theatres "
msgstr "Ahora en cines"
msgctxt "#70528"
msgctxt "#70560"
msgid "Movies by Genre"
msgstr "Por generos"
msgctxt "#70529"
msgid "tv show"
msgstr "serie"
msgctxt "#70561"
msgid "Search Similar
msgstr "Buscar Similares"

View File

@@ -1734,7 +1734,7 @@ msgid "[COLOR %s]Filter configuration for TV series...[/COLOR]"
msgstr "[COLOR %s]Configurar filtro para series...[/COLOR]"
msgctxt "#60430"
msgid "FILTRO: Delete '%s'"
msgid "FILTER: Delete '%s'"
msgstr "FILTRO: Borrar '%s'"
msgctxt "#60431"
@@ -4792,13 +4792,150 @@ msgid "Verification of counters of videos seen / not seen (uncheck to verify)"
msgstr "Verificación de los contadores de vídeos vistos/no vistos (desmarcar para verificar)"
msgctxt "#70527"
msgid "My links"
msgstr "Mis enlaces"
msgctxt "#70528"
msgid "Default folder"
msgstr "Carpeta por defecto"
msgctxt "#70529"
msgid "Repeated link"
msgstr "Enlace repetido"
msgctxt "#70530"
msgid "You already have this link in the folder"
msgstr "Ya tienes este enlace en la carpeta"
msgctxt "#70531"
msgid "Saved link"
msgstr "Guardado enlace"
msgctxt "#70532"
msgid "Folder: %s"
msgstr "Carpeta: %s"
msgctxt "#70533"
msgid "Rename folder"
msgstr "Cambiar nombre de la carpeta"
msgctxt "#70534"
msgid "Delete folder"
msgstr "Eliminar la carpeta"
msgctxt "#70535"
msgid "Move up all"
msgstr "Mover arriba del todo"
msgctxt "#70536"
msgid "Move up"
msgstr "Mover hacia arriba"
msgctxt "#70537"
msgid "Move down"
msgstr "Mover hacia abajo"
msgctxt "#70538"
msgid "Move down all"
msgstr "Mover abajo del todo"
msgctxt "#70539"
msgid "* Create different folders to store your favorite links within Icarus. [CR]"
msgstr "* Crea diferentes carpetas para guardar tus enlaces favoritos dentro de Icarus.[CR]]"
msgctxt "#70540"
msgid "* To add links to folders, access the context menu from any point in Icarus.[CR]"
msgstr "* Para añadir enlaces a las carpetas accede al menú contextual desde cualquier punto de Icarus.[CR]"
msgctxt "#70541"
msgid "* The links can be channels, sections within the channels, searches, and even movies and series although for the latter it is preferable to use the video library."
msgstr "* Los enlaces pueden ser canales, secciones dentro de los canales, búsquedas, e incluso películas y series aunque para esto último es preferible utilizar la videoteca."
msgctxt "#70542"
msgid "Create new folder ..."
msgstr "Crear nueva carpeta ..."
msgctxt "#70543"
msgid "Move to another folder"
msgstr "Mover a otra carpeta"
msgctxt "#70544"
msgid "Change title"
msgstr "Cambiar título"
msgctxt "#70545"
msgid "Change color"
msgstr "Cambiar color"
msgctxt "#70546"
msgid "Save link in:"
msgstr "Guardar enlace en:"
msgctxt "#70547"
msgid "Change thumbnail"
msgstr "Cambiar thumbnail"
msgctxt "#70548"
msgid "Delete link"
msgstr "Eliminar enlace"
msgctxt "#70549"
msgid "Select folder"
msgstr "Seleccionar carpeta"
msgctxt "#70550"
msgid "Create new folder"
msgstr "Crear nueva carpeta"
msgctxt "#70551"
msgid "Folder name"
msgstr "Nombre de la carpeta"
msgctxt "#70552"
msgid "Delete the folder and links it contains?"
msgstr "¿Borrar la carpeta y los enlaces que contiene?"
msgctxt "#70553"
msgid "Change link title"
msgstr "Cambiar título del enlace"
msgctxt "#70554"
msgid "Select thumbnail:"
msgstr "Seleccionar thumbnail:"
msgctxt "#70555"
msgid "Move link to:"
msgstr "Mover enlace a:"
msgctxt "#70556"
msgid "%d links in folder"
msgstr "%d enlaces en la carpeta"
msgctxt "#70557"
msgid "Save link"
msgstr "Guardar enlace"
msgctxt "#70558"
msgid "Select color:"
msgstr "Seleccionar color:"
msgctxt "#70559"
msgid "Now in Theatres "
msgstr "Ahora en cines"
msgctxt "#70528"
msgctxt "#70560"
msgid "Movies by Genre"
msgstr "Por generos"
msgctxt "#70529"
msgid "tv show"
msgstr "serie"
msgctxt "#70561"
msgid "Search Similar
msgstr "Buscar Similares"

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

View File

@@ -9,7 +9,7 @@ from platformcode import logger, config
def test_video_exists(page_url):
logger.info("(page_url='%s')" % page_url)
data = httptools.downloadpage(page_url).data
if "File Not Found" in data:
if "File Not Found" in data or "File was deleted" in data:
return False, config.get_localized_string(70292) % "ClipWatching"
return True, ""

View File

@@ -4,18 +4,14 @@
"ignore_urls": [],
"patterns": [
{
"pattern": "http://tusfiles.org/\\?([A-z0-9]+)",
"url": "http://tusfiles.org/?\\1/"
},
{
"pattern": "tusfiles.net/(?:embed-|)([A-z0-9]+)",
"url": "http://tusfiles.net/\\1"
"pattern": "megadrive.co/embed/([A-z0-9]+)",
"url": "https://megadrive.co/embed/\\1"
}
]
},
"free": true,
"id": "tusfiles",
"name": "tusfiles",
"id": "megadrive",
"name": "megadrive",
"settings": [
{
"default": false,
@@ -41,5 +37,6 @@
"type": "list",
"visible": false
}
]
}
],
"thumbnail": "https://s8.postimg.cc/kr5olxmad/megadrive1.png"
}

View File

@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from core import httptools
from core import scrapertools
from platformcode import logger
def test_video_exists(page_url):
logger.info("(page_url='%s')" % page_url)
data = httptools.downloadpage(page_url).data
if "no longer exists" in data or "to copyright issues" in data:
return False, "[Megadrive] El video ha sido borrado"
if "please+try+again+later." in data:
return False, "[Megadrive] Error de Megadrive, no se puede generar el enlace al video"
if "File has been removed due to inactivity" in data:
return False, "[Megadrive] El archivo ha sido removido por inactividad"
return True, ""
def get_video_url(page_url, user="", password="", video_password=""):
logger.info("(page_url='%s')" % page_url)
data = httptools.downloadpage(page_url).data
video_urls = []
videourl = scrapertools.find_single_match(data, "<source.*?src='([^']+)")
video_urls.append([".MP4 [megadrive]", videourl])
return video_urls

View File

@@ -0,0 +1,42 @@
{
"active": true,
"find_videos": {
"ignore_urls": [],
"patterns": [
{
"pattern": "(thevid.net/e/\\w+)",
"url": "https://\\1"
}
]
},
"free": true,
"id": "thevid",
"name": "thevid",
"settings": [
{
"default": false,
"enabled": true,
"id": "black_list",
"label": "@60654",
"type": "bool",
"visible": true
},
{
"default": 0,
"enabled": true,
"id": "favorites_servers_list",
"label": "@60655",
"lvalues": [
"No",
"1",
"2",
"3",
"4",
"5"
],
"type": "list",
"visible": false
}
],
"thumbnail": ""
}

View File

@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
from core import httptools
from core import scrapertools
from lib import jsunpack
from platformcode import logger, config
def test_video_exists(page_url):
logger.info("(page_url='%s')" % page_url)
data = httptools.downloadpage(page_url).data
if "Video not found..." in data:
return False, config.get_localized_string(70292) % "Thevid"
return True, ""
def get_video_url(page_url, user="", password="", video_password=""):
logger.info("(page_url='%s')" % page_url)
data = httptools.downloadpage(page_url).data
packed = scrapertools.find_multiple_matches(data, "(?s)<script>\s*eval(.*?)\s*</script>")
for pack in packed:
unpacked = jsunpack.unpack(pack)
if "file" in unpacked:
videos = scrapertools.find_multiple_matches(unpacked, 'file.="(//[^"]+)')
video_urls = []
for video in videos:
video = "https:" + video
video_urls.append(["mp4 [Thevid]", video])
logger.info("Url: %s" % videos)
return video_urls

View File

@@ -4,7 +4,7 @@
"ignore_urls": [],
"patterns": [
{
"pattern": "(?:thevideo.me|tvad.me|thevid.net|thevideo.ch|thevideo.us)/(?:embed-|)([A-z0-9]+)",
"pattern": "(?:thevideo.me|tvad.me|thevideo.ch|thevideo.us)/(?:embed-|)([A-z0-9]+)",
"url": "https://thevideo.me/embed-\\1.html"
}
]

View File

@@ -1,53 +0,0 @@
# -*- coding: utf-8 -*-
from core import httptools
from core import scrapertools
from platformcode import logger
def test_video_exists(page_url):
logger.info("(page_url='%s')" % page_url)
if "tusfiles.net" in page_url:
data = httptools.downloadpage(page_url).data
if "File Not Found" in data:
return False, "[Tusfiles] El archivo no existe o ha sido borrado"
if "download is no longer available" in data:
return False, "[Tusfiles] El archivo ya no está disponible"
return True, ""
def get_video_url(page_url, premium=False, user="", password="", video_password=""):
logger.info("page_url='%s'" % page_url)
# Saca el código del vídeo
data = httptools.downloadpage(page_url).data.replace("\\", "")
video_urls = []
if "tusfiles.org" in page_url:
matches = scrapertools.find_multiple_matches(data,
'"label"\s*:\s*(.*?),"type"\s*:\s*"([^"]+)","file"\s*:\s*"([^"]+)"')
for calidad, tipo, video_url in matches:
tipo = tipo.replace("video/", "")
video_urls.append([".%s %sp [tusfiles]" % (tipo, calidad), video_url])
video_urls.sort(key=lambda it: int(it[0].split("p ", 1)[0].rsplit(" ")[1]))
else:
matches = scrapertools.find_multiple_matches(data, '<source src="([^"]+)" type="([^"]+)"')
for video_url, tipo in matches:
tipo = tipo.replace("video/", "")
video_urls.append([".%s [tusfiles]" % tipo, video_url])
id = scrapertools.find_single_match(data, 'name="id" value="([^"]+)"')
rand = scrapertools.find_single_match(data, 'name="rand" value="([^"]+)"')
if id and rand:
post = "op=download2&id=%s&rand=%s&referer=&method_free=&method_premium=" % (id, rand)
location = httptools.downloadpage(page_url, post, follow_redirects=False, only_headers=True).headers.get(
"location")
if location:
ext = location[-4:]
video_urls.append(["%s [tusfiles]" % ext, location])
return video_urls

View File

@@ -0,0 +1,42 @@
{
"active": true,
"find_videos": {
"ignore_urls": [],
"patterns": [
{
"pattern": "(vev.io/embed/[A-z0-9]+)",
"url": "https://\\1"
}
]
},
"free": true,
"id": "vevio",
"name": "vevio",
"settings": [
{
"default": false,
"enabled": true,
"id": "black_list",
"label": "@60654",
"type": "bool",
"visible": true
},
{
"default": 0,
"enabled": true,
"id": "favorites_servers_list",
"label": "@60655",
"lvalues": [
"No",
"1",
"2",
"3",
"4",
"5"
],
"type": "list",
"visible": false
}
],
"thumbnail": "https://s8.postimg.cc/opp2c3p6d/vevio1.png"
}

View File

@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
import urllib
from core import httptools
from core import scrapertools
from platformcode import logger, config
def test_video_exists(page_url):
logger.info("(page_url='%s')" % page_url)
data = httptools.downloadpage(page_url).data
if "File was deleted" in data or "Page Cannot Be Found" in data or "<title>Video not found" in data:
return False, "[vevio] El archivo ha sido eliminado o no existe"
return True, ""
def get_video_url(page_url, premium=False, user="", password="", video_password=""):
logger.info("url=" + page_url)
video_urls = []
post = {}
post = urllib.urlencode(post)
url = page_url
data = httptools.downloadpage("https://vev.io/api/serve/video/" + scrapertools.find_single_match(url, "embed/([A-z0-9]+)"), post=post).data
bloque = scrapertools.find_single_match(data, 'qualities":\{(.*?)\}')
matches = scrapertools.find_multiple_matches(bloque, '"([^"]+)":"([^"]+)')
for res, media_url in matches:
video_urls.append(
[scrapertools.get_filename_from_url(media_url)[-4:] + " (" + res + ") [vevio.me]", media_url])
return video_urls

View File

@@ -1,73 +0,0 @@
{
"active": true,
"find_videos": {
"ignore_urls": [
"http://vidspot.net/embed-theme.html",
"http://vidspot.net/embed-jquery.html",
"http://vidspot.net/embed-s.html",
"http://vidspot.net/embed-images.html",
"http://vidspot.net/embed-faq.html",
"http://vidspot.net/embed-embed.html",
"http://vidspot.net/embed-ri.html",
"http://vidspot.net/embed-d.html",
"http://vidspot.net/embed-css.html",
"http://vidspot.net/embed-js.html",
"http://vidspot.net/embed-player.html",
"http://vidspot.net/embed-cgi.html",
"http://vidspot.net/embed-i.html",
"http://vidspot.net/images",
"http://vidspot.net/theme",
"http://vidspot.net/xupload",
"http://vidspot.net/s",
"http://vidspot.net/js",
"http://vidspot.net/jquery",
"http://vidspot.net/login",
"http://vidspot.net/make",
"http://vidspot.net/i",
"http://vidspot.net/faq",
"http://vidspot.net/tos",
"http://vidspot.net/premium",
"http://vidspot.net/checkfiles",
"http://vidspot.net/privacy",
"http://vidspot.net/refund",
"http://vidspot.net/links",
"http://vidspot.net/contact"
],
"patterns": [
{
"pattern": "vidspot.(?:net/|php\\?id=)(?:embed-)?([a-z0-9]+)",
"url": "http://vidspot.net/\\1"
}
]
},
"free": true,
"id": "vidspot",
"name": "vidspot",
"settings": [
{
"default": false,
"enabled": true,
"id": "black_list",
"label": "@60654",
"type": "bool",
"visible": true
},
{
"default": 0,
"enabled": true,
"id": "favorites_servers_list",
"label": "@60655",
"lvalues": [
"No",
"1",
"2",
"3",
"4",
"5"
],
"type": "list",
"visible": false
}
],
"thumbnail": "server_vidspot.png"
}

View File

@@ -1,57 +0,0 @@
# -*- coding: utf-8 -*-
from core import scrapertools
from platformcode import logger
def test_video_exists(page_url):
logger.info("(page_url='%s')" % page_url)
# No existe / borrado: http://vidspot.net/8jcgbrzhujri
data = scrapertools.cache_page("http://anonymouse.org/cgi-bin/anon-www.cgi/" + page_url)
if "File Not Found" in data or "Archivo no encontrado" in data or '<b class="err">Deleted' in data \
or '<b class="err">Removed' in data or '<font class="err">No such' in data:
return False, "No existe o ha sido borrado de vidspot"
return True, ""
def get_video_url(page_url, premium=False, user="", password="", video_password=""):
logger.info("url=%s" % page_url)
# Normaliza la URL
videoid = scrapertools.get_match(page_url, "http://vidspot.net/([a-z0-9A-Z]+)")
page_url = "http://vidspot.net/embed-%s-728x400.html" % videoid
data = scrapertools.cachePage(page_url)
if "Access denied" in data:
geobloqueo = True
else:
geobloqueo = False
if geobloqueo:
url = "http://www.videoproxy.co/hide.php"
post = "go=%s" % page_url
location = scrapertools.get_header_from_response(url, post=post, header_to_get="location")
url = "http://www.videoproxy.co/%s" % location
data = scrapertools.cachePage(url)
# Extrae la URL
media_url = scrapertools.find_single_match(data, '"file" : "([^"]+)",')
video_urls = []
if media_url != "":
if geobloqueo:
url = "http://www.videoproxy.co/hide.php"
post = "go=%s" % media_url
location = scrapertools.get_header_from_response(url, post=post, header_to_get="location")
media_url = "http://www.videoproxy.co/%s&direct=false" % location
else:
media_url += "&direct=false"
video_urls.append([scrapertools.get_filename_from_url(media_url)[-4:] + " [vidspot]", media_url])
for video_url in video_urls:
logger.info("%s - %s" % (video_url[0], video_url[1]))
return video_urls