From e9f6987324c469d4dfdb81d61cf50c5ddf5413de Mon Sep 17 00:00:00 2001 From: marco Date: Mon, 27 Apr 2020 20:31:31 +0200 Subject: [PATCH] parse torrent name --- channels/ilcorsaronero.py | 4 +- core/support.py | 43 ++++++++---- lib/PTN/__init__.py | 15 +++++ lib/PTN/parse.py | 136 ++++++++++++++++++++++++++++++++++++++ lib/PTN/patterns.py | 43 ++++++++++++ 5 files changed, 226 insertions(+), 15 deletions(-) create mode 100755 lib/PTN/__init__.py create mode 100755 lib/PTN/parse.py create mode 100755 lib/PTN/patterns.py diff --git a/channels/ilcorsaronero.py b/channels/ilcorsaronero.py index ac5a47d7..b456435f 100644 --- a/channels/ilcorsaronero.py +++ b/channels/ilcorsaronero.py @@ -40,9 +40,11 @@ def mainlist(item): @support.scrape def peliculas(item): + ptn = True patron = r'>(?P[^"<]+) ]+>(?P[^<]+)<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<size>[^<]+)<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<seed>[^<]+)' def itemHook(item): - item.title = item.title.replace('.',' ') + # item.title = item.title.replace('.',' ') + item.contentType = item.args[1] thumb = (item.args[1] if type(item.args) == list else item.args) + '.png' item.thumbnail = support.thumb(thumb=thumb) return item diff --git a/core/support.py b/core/support.py index 77d533f0..42576c01 100755 --- a/core/support.py +++ b/core/support.py @@ -6,6 +6,9 @@ import inspect import os import re import sys + +from lib.PTN import PTN + PY3 = False if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int if PY3: @@ -171,7 +174,7 @@ def cleantitle(title): cleantitle = title.replace('"', "'").replace('×', 'x').replace('–', '-').strip() return cleantitle -def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang): +def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang, ptn): itemlist = [] log("scrapeBlock qui") if debug: @@ -240,17 +243,6 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t Type = scraped['type'] if scraped['type'] else '' plot = cleantitle(scraped["plot"]) if scraped["plot"] else '' - # make formatted Title [longtitle] - s = ' - ' - title = episode + (s if episode and title else '') + title - longtitle = title + (s if title and title2 else '') + title2 - longtitle = typo(longtitle, 'bold') - longtitle += typo(quality, '_ [] color kod') if quality else '' - longtitle += typo(scraped['size'], '_ [] color kod') if scraped['size'] else '' - longtitle += typo(scraped['seed']+ ' SEEDS', '_ [] color kod') if scraped['seed'] else '' - - lang1, longtitle = scrapeLang(scraped, lang, longtitle) - # if title is set, probably this is a list of episodes or video sources # necessaria l'aggiunta di == scraped["title"] altrimenti non prende i gruppi dopo le categorie if item.infoLabels["title"] == scraped["title"]: @@ -275,6 +267,28 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t if scraped["rating"]: infolabels['rating'] = scrapertools.decodeHtmlentities(scraped["rating"]) + # make formatted Title [longtitle] + s = ' - ' + title = episode + (s if episode and title else '') + title + longtitle = title + (s if title and title2 else '') + title2 + + lang1, longtitle = scrapeLang(scraped, lang, longtitle) + + if ptn: + titlePTN = PTN().parse(title) + title = titlePTN.get('title', '') + if titlePTN.get('quality', '') or titlePTN.get('resolution', ''): + quality = titlePTN.get('quality', '') + " " + titlePTN.get('resolution', '') + if not scraped['year']: + infolabels['year'] = titlePTN.get('year', '') + if titlePTN.get('episode', None) and titlePTN.get('season', None): + longtitle = title + ' - ' + str(titlePTN.get('episode')) + 'x' + str(titlePTN.get('season')) + + longtitle = typo(longtitle, 'bold') + longtitle += typo(quality, '_ [] color kod') if quality else '' + longtitle += typo(scraped['size'], '_ [] color kod') if scraped['size'] else '' + longtitle += typo(scraped['seed'] + ' SEEDS', '_ [] color kod') if scraped['seed'] else '' + AC = CT = '' if typeContentDict: for name, variants in typeContentDict.items(): @@ -380,6 +394,7 @@ def scrape(func): if 'pagination' in args and inspect.stack()[1][3] not in ['add_tvshow', 'get_episodes', 'update', 'find_episodes']: pagination = args['pagination'] if args['pagination'] else 20 else: pagination = '' lang = args['deflang'] if 'deflang' in args else '' + ptn = args.get('ptn', False) pag = item.page if item.page else 1 # pagination matches = [] @@ -402,7 +417,7 @@ def scrape(func): if 'season' in bl and bl['season']: item.season = bl['season'] blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug, - typeContentDict, typeActionDict, blacklist, search, pag, function, lang) + typeContentDict, typeActionDict, blacklist, search, pag, function, lang, ptn) for it in blockItemlist: if 'lang' in bl: it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title) @@ -413,7 +428,7 @@ def scrape(func): matches.extend(blockMatches) elif patron: itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict, - typeActionDict, blacklist, search, pag, function, lang) + typeActionDict, blacklist, search, pag, function, lang, ptn) if 'itemlistHook' in args: itemlist = args['itemlistHook'](itemlist) diff --git a/lib/PTN/__init__.py b/lib/PTN/__init__.py new file mode 100755 index 00000000..d4346dbd --- /dev/null +++ b/lib/PTN/__init__.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from .parse import PTN +# https://github.com/divijbindlish/parse-torrent-name +__author__ = 'Divij Bindlish' +__email__ = 'dvjbndlsh93@gmail.com' +__version__ = '1.1.1' +__license__ = 'MIT' + +ptn = PTN() + + +def parse(name): + return ptn.parse(name) diff --git a/lib/PTN/parse.py b/lib/PTN/parse.py new file mode 100755 index 00000000..688dbe38 --- /dev/null +++ b/lib/PTN/parse.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import re +from .patterns import patterns, types + + +class PTN(object): + def _escape_regex(self, string): + return re.sub('[\-\[\]{}()*+?.,\\\^$|#\s]', '\\$&', string) + + def __init__(self): + self.torrent = None + self.excess_raw = None + self.group_raw = None + self.start = None + self.end = None + self.title_raw = None + self.parts = None + + def _part(self, name, match, raw, clean): + # The main core instructuions + self.parts[name] = clean + + if len(match) != 0: + # The instructions for extracting title + index = self.torrent['name'].find(match[0]) + if index == 0: + self.start = len(match[0]) + elif self.end is None or index < self.end: + self.end = index + + if name != 'excess': + # The instructions for adding excess + if name == 'group': + self.group_raw = raw + if raw is not None: + self.excess_raw = self.excess_raw.replace(raw, '') + + def _late(self, name, clean): + if name == 'group': + self._part(name, [], None, clean) + elif name == 'episodeName': + clean = re.sub('[\._]', ' ', clean) + clean = re.sub('_+$', '', clean) + self._part(name, [], None, clean.strip()) + + def parse(self, name): + self.parts = {} + self.torrent = {'name': name} + self.excess_raw = name + self.group_raw = '' + self.start = 0 + self.end = None + self.title_raw = None + + for key, pattern in patterns: + if key not in ('season', 'episode', 'website'): + pattern = r'\b%s\b' % pattern + + clean_name = re.sub('_', ' ', self.torrent['name']) + match = re.findall(pattern, clean_name, re.I) + if len(match) == 0: + continue + + index = {} + if isinstance(match[0], tuple): + match = list(match[0]) + if len(match) > 1: + index['raw'] = 0 + index['clean'] = 1 + else: + index['raw'] = 0 + index['clean'] = 0 + + if key in types.keys() and types[key] == 'boolean': + clean = True + else: + clean = match[index['clean']] + if key in types.keys() and types[key] == 'integer': + clean = int(clean) + if key == 'group': + if re.search(patterns[5][1], clean, re.I) \ + or re.search(patterns[4][1], clean): + continue # Codec and quality. + if re.match('[^ ]+ [^ ]+ .+', clean): + key = 'episodeName' + if key == 'episode': + sub_pattern = self._escape_regex(match[index['raw']]) + self.torrent['map'] = re.sub( + sub_pattern, '{episode}', self.torrent['name'] + ) + self._part(key, match, match[index['raw']], clean) + + # Start process for title + raw = self.torrent['name'] + if self.end is not None: + raw = raw[self.start:self.end].split('(')[0] + + clean = re.sub('^ -', '', raw) + if clean.find(' ') == -1 and clean.find('.') != -1: + clean = re.sub('\.', ' ', clean) + clean = re.sub('_', ' ', clean) + clean = re.sub('([\[\(_]|- )$', '', clean).strip() + + self._part('title', [], raw, clean) + + # Start process for end + clean = re.sub('(^[-\. ()]+)|([-\. ]+$)', '', self.excess_raw) + clean = re.sub('[\(\)\/]', ' ', clean) + match = re.split('\.\.+| +', clean) + if len(match) > 0 and isinstance(match[0], tuple): + match = list(match[0]) + + clean = filter(bool, match) + clean = [item for item in filter(lambda a: a != '-', clean)] + clean = [item.strip('-') for item in clean] + if len(clean) != 0: + group_pattern = clean[-1] + self.group_raw + if self.torrent['name'].find(group_pattern) == \ + len(self.torrent['name']) - len(group_pattern): + self._late('group', clean.pop() + self.group_raw) + + if 'map' in self.torrent.keys() and len(clean) != 0: + episode_name_pattern = ( + '{episode}' + '' + re.sub('_+$', '', clean[0]) + ) + if self.torrent['map'].find(episode_name_pattern) != -1: + self._late('episodeName', clean.pop(0)) + + if len(clean) != 0: + if len(clean) == 1: + clean = clean[0] + self._part('excess', [], self.excess_raw, clean) + return self.parts diff --git a/lib/PTN/patterns.py b/lib/PTN/patterns.py new file mode 100755 index 00000000..05261e30 --- /dev/null +++ b/lib/PTN/patterns.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +patterns = [ + ('season', '(s?([0-9]{1,2}))[ex]'), + ('episode', '([ex]([0-9]{2})(?:[^0-9]|$))'), + ('year', '([\[\(]?((?:19[0-9]|20[01])[0-9])[\]\)]?)'), + ('resolution', '([0-9]{3,4}p)'), + ('quality', ('((?:PPV\.)?[HP]DTV|(?:HD)?CAM|B[DR]Rip|(?:HD-?)?TS|' + '(?:PPV )?WEB-?DL(?: DVDRip)?|HDRip|DVDRip|DVDRIP|' + 'CamRip|W[EB]BRip|BluRay|DvDScr|hdtv|telesync)')), + ('codec', '(xvid|[hx]\.?26[45])'), + ('audio', ('(MP3|DD5\.?1|Dual[\- ]Audio|LiNE|DTS|' + 'AAC[.-]LC|AAC(?:\.?2\.0)?|' + 'AC3(?:\.5\.1)?)')), + ('group', '(- ?([^-]+(?:-={[^-]+-?$)?))$'), + ('region', 'R[0-9]'), + ('extended', '(EXTENDED(:?.CUT)?)'), + ('hardcoded', 'HC'), + ('proper', 'PROPER'), + ('repack', 'REPACK'), + ('container', '(MKV|AVI|MP4)'), + ('widescreen', 'WS'), + ('website', '^(\[ ?([^\]]+?) ?\])'), + ('language', '(rus\.eng|ita\.eng)'), + ('sbs', '(?:Half-)?SBS'), + ('unrated', 'UNRATED'), + ('size', '(\d+(?:\.\d+)?(?:GB|MB))'), + ('3d', '3D') +] + +types = { + 'season': 'integer', + 'episode': 'integer', + 'year': 'integer', + 'extended': 'boolean', + 'hardcoded': 'boolean', + 'proper': 'boolean', + 'repack': 'boolean', + 'widescreen': 'boolean', + 'unrated': 'boolean', + '3d': 'boolean' +}