Files
addon/lib/githash.py
mac12m99 3fb9b068d9 KoD 0.4 (#57)
* fix next page

* testing new filmontv

* Wstream quick fix, no resolution displayed :(

* new filmontv

* now regex is ok

* fix .po files

* +netlovers

* working on filmontv

* fix debriders

* new updater

* updater

* fix crash

* fix updater and re-add dev mode

* new url eurostreaming

* Delete netlovers.py

* Delete netlovers.json

* -net from menù

* fix eurostreaming: numero stagione e newest (#50)

* fix canale

* fix newest

* fix numero puntata

* cleanup

* cleanup 2

* fix updater crash on windows

* Fix Animeworld

* Nuovo Autorenumber

* initial background downloader support

* ops

* Update channels.json

* Update channels.json

* fix openload

* move json update to cohesist with updater

* disable json url updates

* fix typo

* fix typo 2

* Add files via upload

* Add files via upload

* fix autoplay in community channels

* fix toonitalia

* Fix Toonitalia

* workaround serietvsubita

* Nuova Rinumerazione Automatica

* Fix per Rinumerazione Automatica

* workaround updater

* Fix on air

* ops

* Personalizzazione sezione "Oggi in TV"

* Aggiunto orario sezione Oggi in TV

* aggiunto bit.ly (#56)

* aggiunto bit.ly

* Aggiunta personalizzazione homepage

* Revert "initial background downloader support"

This reverts commit f676ab0f

* KoD 0.4
2019-06-30 10:35:48 +02:00

168 lines
5.9 KiB
Python

# https://github.com/chris3torek/scripts/blob/master/githash.py
#! /usr/bin/env python
"""
Compute git hash values.
This is meant to work with both Python2 and Python3; it
has been tested with Python2.7 and Python 3.4.
"""
from __future__ import print_function
import argparse
import os
import stat
import sys
from hashlib import sha1
if sys.version_info[0] >= 3:
# Python3 encodes "impossible" strings using UTF-8 and
# surrogate escapes. For instance, a file named <\300><\300>eek
# (where \300 is octal 300, 0xc0 hex) turns into '\udcc0\udcc0eek'.
# This is how we can losslessly re-encode this as a byte string:
path_to_bytes = lambda path: path.encode('utf8', 'surrogateescape')
# If we wish to print one of these byte strings, we have a
# problem, because they're not valid UTF-8. This method
# treats the encoded bytes as pass-through, which is
# probably the best we can do.
bpath_to_str = lambda path: path.decode('unicode_escape')
else:
# Python2 just uses byte strings, so OS paths are already
# byte strings and we return them unmodified.
path_to_bytes = lambda path: path
bpath_to_str = lambda path: path
def strmode(mode):
"""
Turn internal mode (octal with leading 0s suppressed) into
print form (i.e., left pad => right justify with 0s as needed).
"""
return mode.rjust(6, '0')
#
def classify(path):
"""
Return git classification of a path (as both mode,
100644/100755 etc, and git object type, i.e., blob vs tree).
Also throw in st_size field since we want it for file blobs.
"""
# We need the X bit of regular files for the mode, so
# might as well just use lstat rather than os.isdir().
st = os.lstat(path)
if stat.S_ISLNK(st.st_mode):
gitclass = 'blob'
mode = '120000'
elif stat.S_ISDIR(st.st_mode):
gitclass = 'tree'
mode = '40000' # note: no leading 0!
elif stat.S_ISREG(st.st_mode):
# 100755 if any execute permission bit set, else 100644
gitclass = 'blob'
mode = '100755' if (st.st_mode & 0o111) != 0 else '100644'
else:
raise ValueError('un-git-able file system entity %s' % fullpath)
return mode, gitclass, st.st_size
#
def blob_hash(stream, size):
"""
Return (as hash instance) the hash of a blob,
as read from the given stream.
"""
hasher = sha1()
hasher.update(('blob %u\0' % size).encode('ascii'))
nread = 0
while True:
# We read just 64K at a time to be kind to
# runtime storage requirements.
data = stream.read(65536)
if data == b'':
break
nread += len(data)
hasher.update(data)
if nread != size:
raise ValueError('%s: expected %u bytes, found %u bytes' %
(stream.name, size, nread))
return hasher
def symlink_hash(path):
"""
Return (as hash instance) the hash of a symlink.
Caller must use hexdigest() or digest() as needed on
the result.
"""
hasher = sha1()
data = path_to_bytes(os.readlink(path))
hasher.update(('blob %u\0' % len(data)).encode('ascii'))
hasher.update(data)
return hasher
def tree_hash(path):
"""
Return the hash of a tree. We need to know all
files and sub-trees. Since order matters, we must
walk the sub-trees and files in their natural (byte) order,
so we cannot use os.walk.
This is also slightly defective in that it does not know
about .gitignore files (we can't just read them since git
retains files that are in the index, even if they would be
ignored by a .gitignore directive).
We also do not (cannot) deal with submodules here.
"""
# Annoyingly, the tree object encodes its size, which requires
# two passes, one to find the size and one to compute the hash.
contents = os.listdir(path)
tsize = 0
to_skip = ('.', '..', '.git', '.DS_Store', '.idea', '.directory')
to_skip_ext = ('pyo', 'pyc')
pass1 = []
for entry in contents:
if entry not in to_skip and (entry.split('.')[1] not in to_skip_ext if '.' in entry else True):
fullpath = os.path.join(path, entry)
mode, gitclass, esize = classify(fullpath)
# git stores as mode<sp><entry-name>\0<digest-bytes>
encoded_form = path_to_bytes(entry)
tsize += len(mode) + 1 + len(encoded_form) + 1 + 20
pass1.append((fullpath, mode, gitclass, esize, encoded_form))
# Git's cache sorts foo/bar before fooXbar but after foo-bar,
# because it actually stores foo/bar as the literal string
# "foo/bar" in the index, rather than using recursion. That is,
# a directory name should sort as if it ends with '/' rather than
# with '\0'. Sort pass1 contents with funky sorting.
#
# (i[4] is the utf-8 encoded form of the name, i[1] is the
# mode which is '40000' for directories.)
pass1.sort(key = lambda i: i[4] + b'/' if i[1] == '40000' else i[4])
hasher = sha1()
hasher.update(('tree %u\0' % tsize).encode('ascii'))
for (fullpath, mode, gitclass, esize, encoded_form) in pass1:
sub_hash = generic_hash(fullpath, mode, esize)
# Annoyingly, git stores the tree hash as 20 bytes, rather
# than 40 ASCII characters. This is why we return the
# hash instance (so we can use .digest() directly).
# The format here is <mode><sp><path>\0<raw-hash>.
hasher.update(mode.encode('ascii'))
hasher.update(b' ')
hasher.update(encoded_form)
hasher.update(b'\0')
hasher.update(sub_hash.digest())
return hasher
def generic_hash(path, mode, size):
"""
Hash an object based on its mode.
"""
if mode == '120000':
hasher = symlink_hash(path)
elif mode == '40000':
hasher = tree_hash(path)
else:
# 100755 if any execute permission bit set, else 100644
with open(path, 'rb') as stream:
hasher = blob_hash(stream, size)
return hasher