This commit is contained in:
marco
2020-12-10 22:53:20 +01:00
parent 1c623ad47a
commit 23e61f23c6
2 changed files with 22 additions and 17 deletions

View File

@@ -389,8 +389,16 @@ def downloadpage(url, **opt):
response_code = req.status_code
response['url'] = req.url
response['data'] = req.content if req.content else ''
if type(response['data']) != str:
try:
response['data'] = response['data'].decode('utf-8')
except:
response['data'] = response['data'].decode('ISO-8859-1')
if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403]\
and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in req.content:
and not opt.get('CF', False) and 'Please turn JavaScript on and reload the page' in response['data']:
logger.debug("CF retry... for domain: %s" % domain)
from lib import proxytranslate
gResp = proxytranslate.process_request_proxy(url)
@@ -399,14 +407,6 @@ def downloadpage(url, **opt):
response_code = req.status_code
response['url'] = gResp['url']
response['data'] = gResp['data']
else:
response['data'] = req.content if req.content else ''
if type(response['data']) != str:
try:
response['data'] = response['data'].decode('utf-8')
except:
response['data'] = response['data'].decode('ISO-8859-1')
if not response['data']:
response['data'] = ''

View File

@@ -1,4 +1,11 @@
# based on https://github.com/MrCl0wnLab/ProxyGoogleTranslate
import sys
if sys.version_info[0] >= 3:
from urllib import request
PY3 = True
else:
PY3 = False
import urllib as request
import re
import time
@@ -6,10 +13,6 @@ import time
import requests
from platformcode import logger
try:
from urllib import request
except:
import urllib as request
HEADERS = {
'Host': 'translate.google.com',
'User-Agent': 'android'
@@ -61,20 +64,22 @@ def process_request_proxy(url):
logger.debug(url_request_proxy)
data = None
result = None
while not result or 'Sto traducendo' in result.content:
while not data or 'Sto traducendo' in data:
time.sleep(0.5)
result = requests.get(
url_request_proxy,
timeout=20,
headers={'User-Agent': 'android'}
)
data = result.content.decode('utf-8', 'ignore')
if not PY3:
data = data.encode('utf-8')
logger.debug()
data = result.content.decode('utf-8', 'ignore').encode('utf-8')
data = re.sub('\s(\w+)=(?!")([^<>\s]+)', r' \1="\2"', data)
data = re.sub('https://translate\.googleusercontent\.com/.*?u=(.*?)&amp;usg=[A-Za-z0-9_-]+',
'\\1', data)
data = re.sub('https://translate\.googleusercontent\.com/.*?u=(.*?)&amp;usg=[A-Za-z0-9_-]+', '\\1', data)
return {'url': url.strip(), 'result': result, 'data': data.replace('&amp;', '&')}
except Exception as e: