feat: conncurrent ftp listings

This commit is contained in:
2024-05-31 00:56:12 +02:00
parent 30b4b94d2e
commit 89c30a7d7d

View File

@@ -106,10 +106,14 @@ class FTP(DataBackend):
paths_listed = {} paths_listed = {}
def _connect(self): def _connect(self, individual_connection=False):
if self.ftp: if self.ftp and not individual_connection:
try:
self.ftp.voidcmd("NOOP")
return self.ftp return self.ftp
except ssl.SSLError:
pass # reconnect
if self.server.startswith("ftp://"): if self.server.startswith("ftp://"):
tls = False tls = False
@@ -155,6 +159,7 @@ class FTP(DataBackend):
# cache dir is automatically set # # cache dir is automatically set #
self.cache_dir = None self.cache_dir = None
if not individual_connection:
self.ftp = ftp self.ftp = ftp
return ftp return ftp
@@ -213,7 +218,7 @@ class FTP(DataBackend):
return local_file return local_file
def list(self, path, fullpaths=False): def list(self, path, fullpaths=False, new_connection=False):
# prepend root dir if not given # # prepend root dir if not given #
fullpath = path fullpath = path
@@ -231,14 +236,20 @@ class FTP(DataBackend):
paths = self.paths_listed[fullpath] paths = self.paths_listed[fullpath]
#print("Retrieved paths from cache:", fullpath, paths) #print("Retrieved paths from cache:", fullpath, paths)
else: else:
ftp = self._connect() ftp = self._connect(individual_connection=new_connection)
print("Listing previously unlisted path: {}".format(fullpath))
self.paths_listed.update({fullpath: []}) # in case dir does not exit self.paths_listed.update({fullpath: []}) # in case dir does not exit
paths = ftp.nlst(fullpath) paths = ftp.nlst(fullpath)
self.paths_listed.update({fullpath: paths}) self.paths_listed.update({fullpath: paths})
if new_connection: # close individual connections
ftp.close()
if not fullpaths: if not fullpaths:
return paths return paths
return [ os.path.join(path, filename).replace("\\", "/") for filename in paths ] return [ os.path.join(path, filename).replace("\\", "/") for filename in paths ]
except ftplib.error_perm as e: except ftplib.error_perm as e:
if "550 No files found" in str(e): if "550 No files found" in str(e):
print("No files in this directory: {}".format(fullpath)) print("No files in this directory: {}".format(fullpath))
@@ -254,15 +265,28 @@ class FTP(DataBackend):
local_meta_file_list = [] local_meta_file_list = []
root_elements = self.list(self.remote_root_dir) root_elements = self.list(self.remote_root_dir)
for s in root_elements: import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()*5) as executor:
software_dir_contents = list(executor.map(
lambda s: self.list(s, fullpaths=True, new_connection=True), root_elements))
# this caches the paths, done remove it #
cache_list = [os.path.join(s, "registry_files") for s in root_elements ]
cache_list += [os.path.join(s, "pictures") for s in root_elements ]
picture_contents_async_cache = list(executor.map(
lambda s: self.list(s, fullpaths=True, new_connection=True), cache_list))
for files in software_dir_contents:
#print(s) #print(s)
files = self.list(s, fullpaths=True) #files = self.list(s, fullpaths=True)
#print(files) print(files)
for f in files: for f in files:
if f.endswith("meta.yaml"): if f.endswith("meta.yaml"):
meta_file_content = self.get(f, cache_dir="cache", return_content=True) meta_file_content = self.get(f, cache_dir="cache", return_content=True)
#print(meta_file_content) #print(meta_file_content)
local_meta_file_list.append(f) local_meta_file_list.append(f)
return list(filter(lambda x: not x.invalid, [ software.Software(meta_file, self, self.progress_bar_wrapper) return list(filter(lambda x: not x.invalid,
[ software.Software(meta_file, self, self.progress_bar_wrapper)
for meta_file in local_meta_file_list ])) for meta_file in local_meta_file_list ]))