') end = data.find('

', start) data = data[start:end] patron_res = '

(.*?)

' patron_mir = '

(.*?)

' patron_media = r'' res = scrapertoolsV2.find_single_match(data, patron_res) itemlist = [] for res_url, res_video in scrapertoolsV2.find_multiple_matches(res, '([^<]+?)'): data = httptools.downloadpage(urlparse.urljoin(url, res_url)).data.replace('\n', '') mir = scrapertoolsV2.find_single_match(data, patron_mir) for mir_url, server in scrapertoolsV2.find_multiple_matches(mir, '([^<]+?)'): data = httptools.downloadpage(urlparse.urljoin(url, mir_url)).data.replace('\n', '') for media_label, media_url in scrapertoolsV2.find_multiple_matches(data, patron_media): itemlist.append(Item(channel=item.channel, action="play", title=item.title+"["+color(server, 'orange')+"]"+" - "+color(res_video, 'green'), fulltitle=item.fulltitle, quality=res_video, show=item.show, thumbnail=item.thumbnail, contentType=item.contentType, server=server, url=url_decode(media_url))) logger.info("video ->" + res_video) return itemlist def url_decode(url_enc): lenght = len(url_enc) if lenght % 2 == 0: len2 = lenght / 2 first = url_enc[0:len2] last = url_enc[len2:lenght] url_enc = last + first reverse = url_enc[::-1] return base64.b64decode(reverse) last_car = url_enc[lenght - 1] url_enc[lenght - 1] = ' ' url_enc = url_enc.strip() len1 = len(url_enc) len2 = len1 / 2 first = url_enc[0:len2] last = url_enc[len2:len1] url_enc = last + first reverse = url_enc[::-1] reverse = reverse + last_car return base64.b64decode(reverse) def color(text, color): return "[COLOR " + color + "]" + text + "[/COLOR]" def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data="", patron_block="", patronNext="", action="findvideos", url_host=""): # patron: the patron to use for scraping page, all capturing group must match with listGroups # listGroups: a list containing the scraping info obtained by your patron, in order # accepted values are: url, title, thumb, quality, year, plot, duration, genre, rating # header: values to pass to request header # blacklist: titles that you want to exclude(service articles for example) # data: if you want to pass data manually, maybe because you need some custom replacement # patron_block: patron to get parts of the page (to scrape with patron attribute), # if you need a "block inside another block" you can create a list, please note that all matches # will be packed as string # patronNext: patron for scraping next page link # action: if you want results perform an action different from "findvideos", useful when scraping film by genres # url_host: string to prepend to scrapedurl, useful when url don't contain host # example usage: # import support # itemlist = [] # patron = 'blablabla' # headers = [['Referer', host]] # blacklist = 'Request a TV serie!' # return support.scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'], # headers=headers, blacklist=blacklist) itemlist = [] if not data: data = httptools.downloadpage(item.url, headers=headers).data.replace("'", '"') data = re.sub('\n|\t', '', data) # replace all ' with " and eliminate newline, so we don't need to worry about logger.info('DATA ='+data) if patron_block: if type(patron_block) == str: patron_block = [patron_block] for n, regex in enumerate(patron_block): blocks = scrapertoolsV2.find_multiple_matches(data, regex) data = str(blocks) logger.info('BLOCK '+str(n)+'=' + data) if patron and listGroups: matches = scrapertoolsV2.find_multiple_matches(data, patron) logger.info('MATCHES ='+str(matches)) for match in matches: if len(listGroups) > len(match): # to fix a bug match = list(match) match.extend([''] * (len(listGroups)-len(match))) scrapedurl = url_host+match[listGroups.index('url')] if 'url' in listGroups else '' scrapedtitle = match[listGroups.index('title')] if 'title' in listGroups else '' scrapedthumb = match[listGroups.index('thumb')] if 'thumb' in listGroups else '' scrapedquality = match[listGroups.index('quality')] if 'quality' in listGroups else '' scrapedyear = match[listGroups.index('year')] if 'year' in listGroups else '' scrapedplot = match[listGroups.index('plot')] if 'plot' in listGroups else '' scrapedduration = match[listGroups.index('duration')] if 'duration' in listGroups else '' scrapedgenre = match[listGroups.index('genre')] if 'genre' in listGroups else '' scrapedrating = match[listGroups.index('rating')] if 'rating' in listGroups else '' title = scrapertoolsV2.decodeHtmlentities(scrapedtitle) plot = scrapertoolsV2.decodeHtmlentities(scrapedplot) if scrapedquality: longtitle = '[B]' + title + '[/B] [COLOR blue][' + scrapedquality + '][/COLOR]' else: longtitle = '[B]' + title + '[/B]' infolabels = {} if scrapedyear: infolabels['year'] = scrapedyear if scrapedplot: infolabels['plot'] = plot if scrapedduration: infolabels['duration'] = scrapedduration if scrapedgenre: infolabels['genre'] = scrapertoolsV2.find_multiple_matches(scrapedgenre, '(?:<[^<]+?>)?([^<>]+)') # delete all html tags and match text if scrapedrating: infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scrapedrating) if not scrapedtitle in blacklist: itemlist.append( Item(channel=item.channel, action=action, contentType=item.contentType, title=longtitle, fulltitle=title, show=title, quality=scrapedquality, url=scrapedurl, infoLabels=infolabels, thumbnail=scrapedthumb ) ) tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True) if patronNext: next_page = scrapertoolsV2.find_single_match(data, patronNext) logger.info('NEXT ' + next_page) if next_page != "": itemlist.append( Item(channel=item.channel, action="peliculas", contentType=item.contentType, title="[COLOR blue]" + config.get_localized_string(30992) + " >[/COLOR]", url=next_page)) return itemlist def dooplay_get_links(item, host): # get links from websites using dooplay theme and dooplay_player # return a list of dict containing these values: url, title and server data = httptools.downloadpage(item.url).data.replace("'", '"') patron = r'

([^<>]+)(?:([^<>]+))?' matches = scrapertoolsV2.find_multiple_matches(data, patron) ret = [] for type, post, nume, title, server in matches: postData = urllib.urlencode({ "action": "doo_player_ajax", "post": post, "nume": nume, "type": type }) dataAdmin = httptools.downloadpage(host + 'wp-admin/admin-ajax.php', post=postData,headers={'Referer': item.url}).data link = scrapertoolsV2.get_match(dataAdmin, "') if not match else match if not match: from lib import jsunpack try: data = scrapertoolsV2.get_match(data, r"(eval\s?\(function\(p,a,c,k,e,d.*?)") data = jsunpack.unpack(data) logger.debug("##### play /link/ unpack ##\n%s\n##" % data) except IndexError: logger.debug("##### The content is yet unpacked ##\n%s\n##" % data) data = scrapertoolsV2.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";') data, c = unshortenit.unwrap_30x_only(data) else: data = match if data.startswith('/'): data = urlparse.urljoin("http://swzz.xyz", data) data = httptools.downloadpage(data).data logger.debug("##### play /link/ data ##\n%s\n##" % data) else: data = item.url return data def menu(itemlist, title='', action='', url='', contentType='movie'): frame = inspect.stack()[1] filename = frame[0].f_code.co_filename filename = os.path.basename(filename).replace('.py','') logger.info('FILENAME= ' + filename) itemlist.append(Item( channel = filename, title = title, action = action, url = url, contentType = contentType )) from channelselector import thumb itemlist = thumb(itemlist) return itemlist