feat: conncurrent ftp listings

This commit is contained in:
2024-05-31 00:56:12 +02:00
parent 30b4b94d2e
commit 89c30a7d7d

View File

@@ -106,10 +106,14 @@ class FTP(DataBackend):
paths_listed = {}
def _connect(self):
def _connect(self, individual_connection=False):
if self.ftp:
return self.ftp
if self.ftp and not individual_connection:
try:
self.ftp.voidcmd("NOOP")
return self.ftp
except ssl.SSLError:
pass # reconnect
if self.server.startswith("ftp://"):
tls = False
@@ -155,7 +159,8 @@ class FTP(DataBackend):
# cache dir is automatically set #
self.cache_dir = None
self.ftp = ftp
if not individual_connection:
self.ftp = ftp
return ftp
@@ -213,7 +218,7 @@ class FTP(DataBackend):
return local_file
def list(self, path, fullpaths=False):
def list(self, path, fullpaths=False, new_connection=False):
# prepend root dir if not given #
fullpath = path
@@ -231,14 +236,20 @@ class FTP(DataBackend):
paths = self.paths_listed[fullpath]
#print("Retrieved paths from cache:", fullpath, paths)
else:
ftp = self._connect()
ftp = self._connect(individual_connection=new_connection)
print("Listing previously unlisted path: {}".format(fullpath))
self.paths_listed.update({fullpath: []}) # in case dir does not exit
paths = ftp.nlst(fullpath)
self.paths_listed.update({fullpath: paths})
if new_connection: # close individual connections
ftp.close()
if not fullpaths:
return paths
return [ os.path.join(path, filename).replace("\\", "/") for filename in paths ]
except ftplib.error_perm as e:
if "550 No files found" in str(e):
print("No files in this directory: {}".format(fullpath))
@@ -254,15 +265,28 @@ class FTP(DataBackend):
local_meta_file_list = []
root_elements = self.list(self.remote_root_dir)
for s in root_elements:
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()*5) as executor:
software_dir_contents = list(executor.map(
lambda s: self.list(s, fullpaths=True, new_connection=True), root_elements))
# this caches the paths, done remove it #
cache_list = [os.path.join(s, "registry_files") for s in root_elements ]
cache_list += [os.path.join(s, "pictures") for s in root_elements ]
picture_contents_async_cache = list(executor.map(
lambda s: self.list(s, fullpaths=True, new_connection=True), cache_list))
for files in software_dir_contents:
#print(s)
files = self.list(s, fullpaths=True)
#print(files)
#files = self.list(s, fullpaths=True)
print(files)
for f in files:
if f.endswith("meta.yaml"):
meta_file_content = self.get(f, cache_dir="cache", return_content=True)
#print(meta_file_content)
local_meta_file_list.append(f)
return list(filter(lambda x: not x.invalid, [ software.Software(meta_file, self, self.progress_bar_wrapper)
for meta_file in local_meta_file_list ]))
return list(filter(lambda x: not x.invalid,
[ software.Software(meta_file, self, self.progress_bar_wrapper)
for meta_file in local_meta_file_list ]))