from ftplib import FTP, Error, error_perm
from socket import timeout
import uuid
from ..spec import AbstractBufferedFile, AbstractFileSystem
from ..utils import infer_storage_options
[docs]class FTPFileSystem(AbstractFileSystem):
"""A filesystem over classic """
root_marker = "/"
cachable = False
[docs] def __init__(
self,
host,
port=21,
username=None,
password=None,
acct=None,
block_size=None,
tempdir="/tmp",
timeout=30,
**kwargs
):
"""
You can use _get_kwargs_from_urls to get some kwargs from
a reasonable FTP url.
Authentication will be anonymous if username/password are not
given.
Parameters
----------
host: str
The remote server name/ip to connect to
port: int
Port to connect with
username: str or None
If authenticating, the user's identifier
password: str of None
User's password on the server, if using
acct: str or None
Some servers also need an "account" string for auth
block_size: int or None
If given, the read-ahead or write buffer size.
tempdir: str
Directory on remote to put temporary files when in a transaction
"""
super(FTPFileSystem, self).__init__(**kwargs)
self.host = host
self.port = port
self.tempdir = tempdir
self.cred = username, password, acct
self.timeout = timeout
if block_size is not None:
self.blocksize = block_size
else:
self.blocksize = 2 ** 16
self._connect()
def _connect(self):
self.ftp = FTP(timeout=self.timeout)
self.ftp.connect(self.host, self.port)
self.ftp.login(*self.cred)
@classmethod
def _strip_protocol(cls, path):
return "/" + infer_storage_options(path)["path"].lstrip("/").rstrip("/")
@staticmethod
def _get_kwargs_from_urls(urlpath):
out = infer_storage_options(urlpath)
out.pop("path", None)
out.pop("protocol", None)
return out
def invalidate_cache(self, path=None):
if path is not None:
self.dircache.pop(path, None)
else:
self.dircache.clear()
def ls(self, path, detail=True):
path = self._strip_protocol(path)
out = []
if path not in self.dircache:
try:
try:
out = [
(fn, details)
for (fn, details) in self.ftp.mlsd(path)
if fn not in [".", ".."]
and details["type"] not in ["pdir", "cdir"]
]
except error_perm:
out = _mlsd2(self.ftp, path) # Not platform independent
for fn, details in out:
if path == "/":
path = "" # just for forming the names, below
details["name"] = "/".join([path, fn.lstrip("/")])
if details["type"] == "file":
details["size"] = int(details["size"])
else:
details["size"] = 0
self.dircache[path] = out
except Error:
try:
info = self.info(path)
if info["type"] == "file":
out = [(path, info)]
except (Error, IndexError):
raise FileNotFoundError
files = self.dircache.get(path, out)
if not detail:
return sorted([fn for fn, details in files])
return [details for fn, details in files]
def info(self, path, **kwargs):
# implement with direct method
path = self._strip_protocol(path)
files = self.ls(self._parent(path).lstrip("/"), True)
try:
out = [f for f in files if f["name"] == path][0]
except IndexError:
raise FileNotFoundError(path)
return out
def _open(
self,
path,
mode="rb",
block_size=None,
cache_options=None,
autocommit=True,
**kwargs
):
path = self._strip_protocol(path)
block_size = block_size or self.blocksize
return FTPFile(
self,
path,
mode=mode,
block_size=block_size,
tempdir=self.tempdir,
autocommit=autocommit,
cache_options=cache_options,
)
def _rm(self, path):
path = self._strip_protocol(path)
self.ftp.delete(path)
self.invalidate_cache(path.rsplit("/", 1)[0])
def mkdir(self, path, **kwargs):
path = self._strip_protocol(path)
self.ftp.mkd(path)
def rmdir(self, path):
path = self._strip_protocol(path)
self.ftp.rmd(path)
def mv(self, path1, path2, **kwargs):
path1 = self._strip_protocol(path1)
path2 = self._strip_protocol(path2)
self.ftp.rename(path1, path2)
self.invalidate_cache(self._parent(path1))
self.invalidate_cache(self._parent(path2))
def __del__(self):
self.ftp.close()
class TransferDone(Exception):
"""Internal exception to break out of transfer"""
pass
class FTPFile(AbstractBufferedFile):
"""Interact with a remote FTP file with read/write buffering"""
def __init__(
self,
fs,
path,
mode="rb",
block_size="default",
autocommit=True,
cache_type="readahead",
cache_options=None,
**kwargs
):
super().__init__(
fs,
path,
mode=mode,
block_size=block_size,
autocommit=autocommit,
cache_type=cache_type,
cache_options=cache_options,
**kwargs
)
if not autocommit:
self.target = self.path
self.path = "/".join([kwargs["tempdir"], str(uuid.uuid4())])
def commit(self):
self.fs.mv(self.path, self.target)
def discard(self):
self.fs.rm(self.path)
def _fetch_range(self, start, end):
"""Get bytes between given byte limits
Implemented by raising an exception in the fetch callback when the
number of bytes received reaches the requested amount.
Will fail if the server does not respect the REST command on
retrieve requests.
"""
out = []
total = [0]
def callback(x):
total[0] += len(x)
if total[0] > end - start:
out.append(x[: (end - start) - total[0]])
raise TransferDone
else:
out.append(x)
if total[0] == end - start:
raise TransferDone
try:
self.fs.ftp.retrbinary(
"RETR %s" % self.path,
blocksize=self.blocksize,
rest=start,
callback=callback,
)
except TransferDone:
try:
self.fs.ftp.abort()
self.fs.ftp.voidresp()
except timeout:
self.fs._connect()
return b"".join(out)
def _upload_chunk(self, final=False):
self.buffer.seek(0)
self.fs.ftp.storbinary(
"STOR " + self.path, self.buffer, blocksize=self.blocksize, rest=self.offset
)
return True
def _mlsd2(ftp, path="."):
"""
Fall back to using `dir` instead of `mlsd` if not supported.
This parses a Linux style `ls -l` response to `dir`, but the response may
be platform dependent.
Parameters
----------
ftp: ftplib.FTP
path: str
Expects to be given path, but defaults to ".".
"""
lines = []
minfo = []
ftp.dir(path, lines.append)
for line in lines:
line = line.split()
this = (
line[-1],
{
"modify": " ".join(line[5:8]),
"unix.owner": line[2],
"unix.group": line[3],
"unix.mode": line[0],
"size": line[4],
},
)
if "d" == this[1]["unix.mode"][0]:
this[1]["type"] = "dir"
else:
this[1]["type"] = "file"
minfo.append(this)
return minfo