ops
This commit is contained in:
+9
-9
@@ -389,8 +389,16 @@ def downloadpage(url, **opt):
|
|||||||
response_code = req.status_code
|
response_code = req.status_code
|
||||||
response['url'] = req.url
|
response['url'] = req.url
|
||||||
|
|
||||||
|
response['data'] = req.content if req.content else ''
|
||||||
|
|
||||||
|
if type(response['data']) != str:
|
||||||
|
try:
|
||||||
|
response['data'] = response['data'].decode('utf-8')
|
||||||
|
except:
|
||||||
|
response['data'] = response['data'].decode('ISO-8859-1')
|
||||||
|
|
||||||
if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
|
if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
|
||||||
and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in req.content:
|
and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in response['data']:
|
||||||
logger.debug("CF retry... for domain: %s" % domain)
|
logger.debug("CF retry... for domain: %s" % domain)
|
||||||
from lib import proxytranslate
|
from lib import proxytranslate
|
||||||
gResp = proxytranslate.process_request_proxy(url)
|
gResp = proxytranslate.process_request_proxy(url)
|
||||||
@@ -399,14 +407,6 @@ def downloadpage(url, **opt):
|
|||||||
response_code = req.status_code
|
response_code = req.status_code
|
||||||
response['url'] = gResp['url']
|
response['url'] = gResp['url']
|
||||||
response['data'] = gResp['data']
|
response['data'] = gResp['data']
|
||||||
else:
|
|
||||||
response['data'] = req.content if req.content else ''
|
|
||||||
|
|
||||||
if type(response['data']) != str:
|
|
||||||
try:
|
|
||||||
response['data'] = response['data'].decode('utf-8')
|
|
||||||
except:
|
|
||||||
response['data'] = response['data'].decode('ISO-8859-1')
|
|
||||||
|
|
||||||
if not response['data']:
|
if not response['data']:
|
||||||
response['data'] = ''
|
response['data'] = ''
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
# based on https://github.com/MrCl0wnLab/ProxyGoogleTranslate
|
# based on https://github.com/MrCl0wnLab/ProxyGoogleTranslate
|
||||||
|
import sys
|
||||||
|
if sys.version_info[0] >= 3:
|
||||||
|
PY3 = True
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@@ -61,20 +63,22 @@ def process_request_proxy(url):
|
|||||||
|
|
||||||
logger.debug(url_request_proxy)
|
logger.debug(url_request_proxy)
|
||||||
|
|
||||||
|
data = None
|
||||||
result = None
|
result = None
|
||||||
while not result or 'Sto traducendo' in result.content:
|
while not data or 'Sto traducendo' in data:
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
result = requests.get(
|
result = requests.get(
|
||||||
url_request_proxy,
|
url_request_proxy,
|
||||||
timeout=20,
|
timeout=20,
|
||||||
headers={'User-Agent': 'android'}
|
headers={'User-Agent': 'android'}
|
||||||
)
|
)
|
||||||
|
data = result.content.decode('utf-8', 'ignore')
|
||||||
|
if not PY3:
|
||||||
|
data = data.encode('utf-8')
|
||||||
logger.debug()
|
logger.debug()
|
||||||
|
|
||||||
data = result.content.decode('utf-8', 'ignore').encode('utf-8')
|
|
||||||
data = re.sub('\s(\w+)=(?!")([^<>\s]+)', r' \1="\2"', data)
|
data = re.sub('\s(\w+)=(?!")([^<>\s]+)', r' \1="\2"', data)
|
||||||
data = re.sub('https://translate\.googleusercontent\.com/.*?u=(.*?)&usg=[A-Za-z0-9_-]+',
|
data = re.sub('https://translate\.googleusercontent\.com/.*?u=(.*?)&usg=[A-Za-z0-9_-]+', '\\1', data)
|
||||||
'\\1', data)
|
|
||||||
|
|
||||||
return {'url': url.strip(), 'result': result, 'data': data.replace('&', '&')}
|
return {'url': url.strip(), 'result': result, 'data': data.replace('&', '&')}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user