1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
from bencodepy import decode
from enum import Enum
from hashlib import sha1, sha256
from os import scandir
from re import search, IGNORECASE
class Type(Enum):
UNDEF = 0,
V1 = 1,
V2 = 2,
HYBRID = 3
class Torrent():
def __init__(self):
self.sha1 = b''
self.files = {}
self.type = Type.UNDEF
self.cache = None
self.hadpieces = False
def file(self, f):
self.parse(open(f, "rb").read())
def parse(self, b):
infodict = b[b.find(b'4:info')+6:b.rfind(b'6:sourced2:ip')]
self.sha1 = sha1(infodict).digest()
self.sha256 = sha256(infodict).digest()
self.dict = decode(b)
if b'pieces' in self.dict.get(b'info'):
self.dict.get(b'info').pop(b'pieces')
self.hadpieces = True
if b'files' in self.dict.get(b'info').keys():
self.type = Type.V1
for file in self.dict.get(b'info').get(b'files'):
if file.get(b'attr') is not None and b'p' in file.get(b'attr') or b'padding.file' in b'/'.join(file.get(b'path')) or b'.pad' in file.get(b'path') or b'_____padding_file_' in b'/'.join(file.get(b'path')):
continue
def insert_file(d, path, length, self):
name = path.pop()
if not len(path):
d[name] = length
return
if name not in d.keys():
d[name] = {}
insert_file(d[name], path, length, self)
file.get(b'path').reverse()
insert_file(self.files, file.get(b'path'), file.get(b'length'), self)
self.dict.get(b'info').pop(b'files')
if b'file tree' in self.dict.get(b'info').keys(): # some torrents have broken file trees so we use files first
if self.type is Type.V1:
self.type = Type.HYBRID
else:
self.type = Type.V2
def filetree(names):
r = {}
for key in names.keys():
if key == b'':
return names.get(key).get(b'length')
r[key] = filetree(names.get(key))
return r
self.files = filetree(self.dict.get(b'info').get(b'file tree'))
self.dict.get(b'info').pop(b'file tree')
if not len(self.files):
self.type = Type.V1
self.files[self.dict.get(b'info').get(b'name')] = self.dict.get(b'info').get(b'length')
first_filename = [i for i in self.files.keys()][0]
if self.type == Type.V2 and self.hadpieces:
self.type = Type.HYBRID;
if len(self.files) == 1 and self.files[first_filename] == {}:
print("fixed bad single file torrent", self.sha1.hex())
self.files[first_filename] = self.dict.get(b'info').get(b'length')
def paths(self):
def paths_r(d, path=None):
if path is None:
path = []
for f in d.keys():
if type(d[f]) is int:
z = path.copy()
z.append(f)
yield z, d[f]
else:
z = path.copy()
z.append(f)
for z, v in paths_r(d[f], z):
yield z, v
for z, v in paths_r(self.files):
yield z, v
def matches(self, r, cache=False):
does = False
if cache and self.cache:
return search(r, self.cache, IGNORECASE)
try:
decoded = self.dict.get(b'info').get(b'name').decode()
except UnicodeDecodeError:
decoded = self.dict.get(b'info').get(b'name').decode("iso-8859-2")
except AttributeError:
decoded = str(self.dict.get(b'info').get(b'name'))
if search(r, self.dict.get(b'source').get(b'ip').decode(), IGNORECASE):
does = True
if not cache:
return True
if search(r, decoded, IGNORECASE):
does = True
if not cache:
return True
if cache:
self.cache = self.dict.get(b'source').get(b'ip').decode() + "|" + decoded + "|"
for path, size in self.paths():
try:
decd = b'/'.join(path).decode()
except UnicodeDecodeError:
decd = b'/'.join(path).decode("iso-8859-2")
self.cache += decd + "|"
if search(r, decd, IGNORECASE):
does = True
if not cache:
return True
return does
def matching_files(self, r, decode=False):
def matching_files_r(dirc, r, decode):
files = {}
for name, content in dirc.items():
try:
decoded = name.decode()
except UnicodeDecodeError:
decoded = name.decode("iso-8859-2") # TODO we could try detecting the encoding
if search(r, decoded, IGNORECASE):
files[decoded if decode else name] = content if type(content) is int else {}
if type(content) is dict:
inhalt = matching_files_r(content, r, decode)
if inhalt:
files[decoded if decode else name] = inhalt
return files
return matching_files_r(self.files, r, decode)
def __repr__(self):
return str(self.__dict__)
def __hash__(self):
if len(self.sha1):
return int.from_bytes(self.sha1, byteorder="big")
return id(self)
def glob(d):
r = {}
for f in scandir(d):
try:
if f.name.endswith(".torrent") and f.is_file():
t = Torrent()
t.file(f.path)
r[t.sha1] = t
except Exception as e:
print(f"skipping broken torrent {f.name} due to exception:")
print(e)
return r
|