parse torrent name
This commit is contained in:
@@ -40,9 +40,11 @@ def mainlist(item):
|
|||||||
|
|
||||||
@support.scrape
|
@support.scrape
|
||||||
def peliculas(item):
|
def peliculas(item):
|
||||||
|
ptn = True
|
||||||
patron = r'>(?P<quality>[^"<]+)</td> <TD[^>]+><A class="tab" HREF="(?P<url>[^"]+)"\s*>(?P<title>[^<]+)<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<size>[^<]+)<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<seed>[^<]+)'
|
patron = r'>(?P<quality>[^"<]+)</td> <TD[^>]+><A class="tab" HREF="(?P<url>[^"]+)"\s*>(?P<title>[^<]+)<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<size>[^<]+)<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<seed>[^<]+)'
|
||||||
def itemHook(item):
|
def itemHook(item):
|
||||||
item.title = item.title.replace('.',' ')
|
# item.title = item.title.replace('.',' ')
|
||||||
|
item.contentType = item.args[1]
|
||||||
thumb = (item.args[1] if type(item.args) == list else item.args) + '.png'
|
thumb = (item.args[1] if type(item.args) == list else item.args) + '.png'
|
||||||
item.thumbnail = support.thumb(thumb=thumb)
|
item.thumbnail = support.thumb(thumb=thumb)
|
||||||
return item
|
return item
|
||||||
|
|||||||
+29
-14
@@ -6,6 +6,9 @@ import inspect
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from lib.PTN import PTN
|
||||||
|
|
||||||
PY3 = False
|
PY3 = False
|
||||||
if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
|
if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
|
||||||
if PY3:
|
if PY3:
|
||||||
@@ -171,7 +174,7 @@ def cleantitle(title):
|
|||||||
cleantitle = title.replace('"', "'").replace('×', 'x').replace('–', '-').strip()
|
cleantitle = title.replace('"', "'").replace('×', 'x').replace('–', '-').strip()
|
||||||
return cleantitle
|
return cleantitle
|
||||||
|
|
||||||
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang):
|
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang, ptn):
|
||||||
itemlist = []
|
itemlist = []
|
||||||
log("scrapeBlock qui")
|
log("scrapeBlock qui")
|
||||||
if debug:
|
if debug:
|
||||||
@@ -240,17 +243,6 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
|||||||
Type = scraped['type'] if scraped['type'] else ''
|
Type = scraped['type'] if scraped['type'] else ''
|
||||||
plot = cleantitle(scraped["plot"]) if scraped["plot"] else ''
|
plot = cleantitle(scraped["plot"]) if scraped["plot"] else ''
|
||||||
|
|
||||||
# make formatted Title [longtitle]
|
|
||||||
s = ' - '
|
|
||||||
title = episode + (s if episode and title else '') + title
|
|
||||||
longtitle = title + (s if title and title2 else '') + title2
|
|
||||||
longtitle = typo(longtitle, 'bold')
|
|
||||||
longtitle += typo(quality, '_ [] color kod') if quality else ''
|
|
||||||
longtitle += typo(scraped['size'], '_ [] color kod') if scraped['size'] else ''
|
|
||||||
longtitle += typo(scraped['seed']+ ' SEEDS', '_ [] color kod') if scraped['seed'] else ''
|
|
||||||
|
|
||||||
lang1, longtitle = scrapeLang(scraped, lang, longtitle)
|
|
||||||
|
|
||||||
# if title is set, probably this is a list of episodes or video sources
|
# if title is set, probably this is a list of episodes or video sources
|
||||||
# necessaria l'aggiunta di == scraped["title"] altrimenti non prende i gruppi dopo le categorie
|
# necessaria l'aggiunta di == scraped["title"] altrimenti non prende i gruppi dopo le categorie
|
||||||
if item.infoLabels["title"] == scraped["title"]:
|
if item.infoLabels["title"] == scraped["title"]:
|
||||||
@@ -275,6 +267,28 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
|||||||
if scraped["rating"]:
|
if scraped["rating"]:
|
||||||
infolabels['rating'] = scrapertools.decodeHtmlentities(scraped["rating"])
|
infolabels['rating'] = scrapertools.decodeHtmlentities(scraped["rating"])
|
||||||
|
|
||||||
|
# make formatted Title [longtitle]
|
||||||
|
s = ' - '
|
||||||
|
title = episode + (s if episode and title else '') + title
|
||||||
|
longtitle = title + (s if title and title2 else '') + title2
|
||||||
|
|
||||||
|
lang1, longtitle = scrapeLang(scraped, lang, longtitle)
|
||||||
|
|
||||||
|
if ptn:
|
||||||
|
titlePTN = PTN().parse(title)
|
||||||
|
title = titlePTN.get('title', '')
|
||||||
|
if titlePTN.get('quality', '') or titlePTN.get('resolution', ''):
|
||||||
|
quality = titlePTN.get('quality', '') + " " + titlePTN.get('resolution', '')
|
||||||
|
if not scraped['year']:
|
||||||
|
infolabels['year'] = titlePTN.get('year', '')
|
||||||
|
if titlePTN.get('episode', None) and titlePTN.get('season', None):
|
||||||
|
longtitle = title + ' - ' + str(titlePTN.get('episode')) + 'x' + str(titlePTN.get('season'))
|
||||||
|
|
||||||
|
longtitle = typo(longtitle, 'bold')
|
||||||
|
longtitle += typo(quality, '_ [] color kod') if quality else ''
|
||||||
|
longtitle += typo(scraped['size'], '_ [] color kod') if scraped['size'] else ''
|
||||||
|
longtitle += typo(scraped['seed'] + ' SEEDS', '_ [] color kod') if scraped['seed'] else ''
|
||||||
|
|
||||||
AC = CT = ''
|
AC = CT = ''
|
||||||
if typeContentDict:
|
if typeContentDict:
|
||||||
for name, variants in typeContentDict.items():
|
for name, variants in typeContentDict.items():
|
||||||
@@ -380,6 +394,7 @@ def scrape(func):
|
|||||||
if 'pagination' in args and inspect.stack()[1][3] not in ['add_tvshow', 'get_episodes', 'update', 'find_episodes']: pagination = args['pagination'] if args['pagination'] else 20
|
if 'pagination' in args and inspect.stack()[1][3] not in ['add_tvshow', 'get_episodes', 'update', 'find_episodes']: pagination = args['pagination'] if args['pagination'] else 20
|
||||||
else: pagination = ''
|
else: pagination = ''
|
||||||
lang = args['deflang'] if 'deflang' in args else ''
|
lang = args['deflang'] if 'deflang' in args else ''
|
||||||
|
ptn = args.get('ptn', False)
|
||||||
pag = item.page if item.page else 1 # pagination
|
pag = item.page if item.page else 1 # pagination
|
||||||
matches = []
|
matches = []
|
||||||
|
|
||||||
@@ -402,7 +417,7 @@ def scrape(func):
|
|||||||
if 'season' in bl and bl['season']:
|
if 'season' in bl and bl['season']:
|
||||||
item.season = bl['season']
|
item.season = bl['season']
|
||||||
blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
|
blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
|
||||||
typeContentDict, typeActionDict, blacklist, search, pag, function, lang)
|
typeContentDict, typeActionDict, blacklist, search, pag, function, lang, ptn)
|
||||||
for it in blockItemlist:
|
for it in blockItemlist:
|
||||||
if 'lang' in bl:
|
if 'lang' in bl:
|
||||||
it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
|
it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
|
||||||
@@ -413,7 +428,7 @@ def scrape(func):
|
|||||||
matches.extend(blockMatches)
|
matches.extend(blockMatches)
|
||||||
elif patron:
|
elif patron:
|
||||||
itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
|
itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
|
||||||
typeActionDict, blacklist, search, pag, function, lang)
|
typeActionDict, blacklist, search, pag, function, lang, ptn)
|
||||||
|
|
||||||
if 'itemlistHook' in args:
|
if 'itemlistHook' in args:
|
||||||
itemlist = args['itemlistHook'](itemlist)
|
itemlist = args['itemlistHook'](itemlist)
|
||||||
|
|||||||
Executable
+15
@@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from .parse import PTN
|
||||||
|
# https://github.com/divijbindlish/parse-torrent-name
|
||||||
|
__author__ = 'Divij Bindlish'
|
||||||
|
__email__ = 'dvjbndlsh93@gmail.com'
|
||||||
|
__version__ = '1.1.1'
|
||||||
|
__license__ = 'MIT'
|
||||||
|
|
||||||
|
ptn = PTN()
|
||||||
|
|
||||||
|
|
||||||
|
def parse(name):
|
||||||
|
return ptn.parse(name)
|
||||||
Executable
+136
@@ -0,0 +1,136 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
from .patterns import patterns, types
|
||||||
|
|
||||||
|
|
||||||
|
class PTN(object):
|
||||||
|
def _escape_regex(self, string):
|
||||||
|
return re.sub('[\-\[\]{}()*+?.,\\\^$|#\s]', '\\$&', string)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.torrent = None
|
||||||
|
self.excess_raw = None
|
||||||
|
self.group_raw = None
|
||||||
|
self.start = None
|
||||||
|
self.end = None
|
||||||
|
self.title_raw = None
|
||||||
|
self.parts = None
|
||||||
|
|
||||||
|
def _part(self, name, match, raw, clean):
|
||||||
|
# The main core instructuions
|
||||||
|
self.parts[name] = clean
|
||||||
|
|
||||||
|
if len(match) != 0:
|
||||||
|
# The instructions for extracting title
|
||||||
|
index = self.torrent['name'].find(match[0])
|
||||||
|
if index == 0:
|
||||||
|
self.start = len(match[0])
|
||||||
|
elif self.end is None or index < self.end:
|
||||||
|
self.end = index
|
||||||
|
|
||||||
|
if name != 'excess':
|
||||||
|
# The instructions for adding excess
|
||||||
|
if name == 'group':
|
||||||
|
self.group_raw = raw
|
||||||
|
if raw is not None:
|
||||||
|
self.excess_raw = self.excess_raw.replace(raw, '')
|
||||||
|
|
||||||
|
def _late(self, name, clean):
|
||||||
|
if name == 'group':
|
||||||
|
self._part(name, [], None, clean)
|
||||||
|
elif name == 'episodeName':
|
||||||
|
clean = re.sub('[\._]', ' ', clean)
|
||||||
|
clean = re.sub('_+$', '', clean)
|
||||||
|
self._part(name, [], None, clean.strip())
|
||||||
|
|
||||||
|
def parse(self, name):
|
||||||
|
self.parts = {}
|
||||||
|
self.torrent = {'name': name}
|
||||||
|
self.excess_raw = name
|
||||||
|
self.group_raw = ''
|
||||||
|
self.start = 0
|
||||||
|
self.end = None
|
||||||
|
self.title_raw = None
|
||||||
|
|
||||||
|
for key, pattern in patterns:
|
||||||
|
if key not in ('season', 'episode', 'website'):
|
||||||
|
pattern = r'\b%s\b' % pattern
|
||||||
|
|
||||||
|
clean_name = re.sub('_', ' ', self.torrent['name'])
|
||||||
|
match = re.findall(pattern, clean_name, re.I)
|
||||||
|
if len(match) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
index = {}
|
||||||
|
if isinstance(match[0], tuple):
|
||||||
|
match = list(match[0])
|
||||||
|
if len(match) > 1:
|
||||||
|
index['raw'] = 0
|
||||||
|
index['clean'] = 1
|
||||||
|
else:
|
||||||
|
index['raw'] = 0
|
||||||
|
index['clean'] = 0
|
||||||
|
|
||||||
|
if key in types.keys() and types[key] == 'boolean':
|
||||||
|
clean = True
|
||||||
|
else:
|
||||||
|
clean = match[index['clean']]
|
||||||
|
if key in types.keys() and types[key] == 'integer':
|
||||||
|
clean = int(clean)
|
||||||
|
if key == 'group':
|
||||||
|
if re.search(patterns[5][1], clean, re.I) \
|
||||||
|
or re.search(patterns[4][1], clean):
|
||||||
|
continue # Codec and quality.
|
||||||
|
if re.match('[^ ]+ [^ ]+ .+', clean):
|
||||||
|
key = 'episodeName'
|
||||||
|
if key == 'episode':
|
||||||
|
sub_pattern = self._escape_regex(match[index['raw']])
|
||||||
|
self.torrent['map'] = re.sub(
|
||||||
|
sub_pattern, '{episode}', self.torrent['name']
|
||||||
|
)
|
||||||
|
self._part(key, match, match[index['raw']], clean)
|
||||||
|
|
||||||
|
# Start process for title
|
||||||
|
raw = self.torrent['name']
|
||||||
|
if self.end is not None:
|
||||||
|
raw = raw[self.start:self.end].split('(')[0]
|
||||||
|
|
||||||
|
clean = re.sub('^ -', '', raw)
|
||||||
|
if clean.find(' ') == -1 and clean.find('.') != -1:
|
||||||
|
clean = re.sub('\.', ' ', clean)
|
||||||
|
clean = re.sub('_', ' ', clean)
|
||||||
|
clean = re.sub('([\[\(_]|- )$', '', clean).strip()
|
||||||
|
|
||||||
|
self._part('title', [], raw, clean)
|
||||||
|
|
||||||
|
# Start process for end
|
||||||
|
clean = re.sub('(^[-\. ()]+)|([-\. ]+$)', '', self.excess_raw)
|
||||||
|
clean = re.sub('[\(\)\/]', ' ', clean)
|
||||||
|
match = re.split('\.\.+| +', clean)
|
||||||
|
if len(match) > 0 and isinstance(match[0], tuple):
|
||||||
|
match = list(match[0])
|
||||||
|
|
||||||
|
clean = filter(bool, match)
|
||||||
|
clean = [item for item in filter(lambda a: a != '-', clean)]
|
||||||
|
clean = [item.strip('-') for item in clean]
|
||||||
|
if len(clean) != 0:
|
||||||
|
group_pattern = clean[-1] + self.group_raw
|
||||||
|
if self.torrent['name'].find(group_pattern) == \
|
||||||
|
len(self.torrent['name']) - len(group_pattern):
|
||||||
|
self._late('group', clean.pop() + self.group_raw)
|
||||||
|
|
||||||
|
if 'map' in self.torrent.keys() and len(clean) != 0:
|
||||||
|
episode_name_pattern = (
|
||||||
|
'{episode}'
|
||||||
|
'' + re.sub('_+$', '', clean[0])
|
||||||
|
)
|
||||||
|
if self.torrent['map'].find(episode_name_pattern) != -1:
|
||||||
|
self._late('episodeName', clean.pop(0))
|
||||||
|
|
||||||
|
if len(clean) != 0:
|
||||||
|
if len(clean) == 1:
|
||||||
|
clean = clean[0]
|
||||||
|
self._part('excess', [], self.excess_raw, clean)
|
||||||
|
return self.parts
|
||||||
Executable
+43
@@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
patterns = [
|
||||||
|
('season', '(s?([0-9]{1,2}))[ex]'),
|
||||||
|
('episode', '([ex]([0-9]{2})(?:[^0-9]|$))'),
|
||||||
|
('year', '([\[\(]?((?:19[0-9]|20[01])[0-9])[\]\)]?)'),
|
||||||
|
('resolution', '([0-9]{3,4}p)'),
|
||||||
|
('quality', ('((?:PPV\.)?[HP]DTV|(?:HD)?CAM|B[DR]Rip|(?:HD-?)?TS|'
|
||||||
|
'(?:PPV )?WEB-?DL(?: DVDRip)?|HDRip|DVDRip|DVDRIP|'
|
||||||
|
'CamRip|W[EB]BRip|BluRay|DvDScr|hdtv|telesync)')),
|
||||||
|
('codec', '(xvid|[hx]\.?26[45])'),
|
||||||
|
('audio', ('(MP3|DD5\.?1|Dual[\- ]Audio|LiNE|DTS|'
|
||||||
|
'AAC[.-]LC|AAC(?:\.?2\.0)?|'
|
||||||
|
'AC3(?:\.5\.1)?)')),
|
||||||
|
('group', '(- ?([^-]+(?:-={[^-]+-?$)?))$'),
|
||||||
|
('region', 'R[0-9]'),
|
||||||
|
('extended', '(EXTENDED(:?.CUT)?)'),
|
||||||
|
('hardcoded', 'HC'),
|
||||||
|
('proper', 'PROPER'),
|
||||||
|
('repack', 'REPACK'),
|
||||||
|
('container', '(MKV|AVI|MP4)'),
|
||||||
|
('widescreen', 'WS'),
|
||||||
|
('website', '^(\[ ?([^\]]+?) ?\])'),
|
||||||
|
('language', '(rus\.eng|ita\.eng)'),
|
||||||
|
('sbs', '(?:Half-)?SBS'),
|
||||||
|
('unrated', 'UNRATED'),
|
||||||
|
('size', '(\d+(?:\.\d+)?(?:GB|MB))'),
|
||||||
|
('3d', '3D')
|
||||||
|
]
|
||||||
|
|
||||||
|
types = {
|
||||||
|
'season': 'integer',
|
||||||
|
'episode': 'integer',
|
||||||
|
'year': 'integer',
|
||||||
|
'extended': 'boolean',
|
||||||
|
'hardcoded': 'boolean',
|
||||||
|
'proper': 'boolean',
|
||||||
|
'repack': 'boolean',
|
||||||
|
'widescreen': 'boolean',
|
||||||
|
'unrated': 'boolean',
|
||||||
|
'3d': 'boolean'
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user