#include "xfs_platform.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_quota.h"
#include "xfs_bmap_util.h"
#include "xfs_reflink.h"
#include "xfs_trace.h"
#include "xfs_exchrange.h"
#include "xfs_exchmaps.h"
#include "xfs_sb.h"
#include "xfs_icache.h"
#include "xfs_log.h"
#include "xfs_rtbitmap.h"
#include <linux/fsnotify.h>
void
xfs_exchrange_ilock(
struct xfs_trans *tp,
struct xfs_inode *ip1,
struct xfs_inode *ip2)
{
if (ip1 != ip2)
xfs_lock_two_inodes(ip1, XFS_ILOCK_EXCL,
ip2, XFS_ILOCK_EXCL);
else
xfs_ilock(ip1, XFS_ILOCK_EXCL);
if (tp) {
xfs_trans_ijoin(tp, ip1, 0);
if (ip2 != ip1)
xfs_trans_ijoin(tp, ip2, 0);
}
}
void
xfs_exchrange_iunlock(
struct xfs_inode *ip1,
struct xfs_inode *ip2)
{
if (ip2 != ip1)
xfs_iunlock(ip2, XFS_ILOCK_EXCL);
xfs_iunlock(ip1, XFS_ILOCK_EXCL);
}
int
xfs_exchrange_estimate(
struct xfs_exchmaps_req *req)
{
int error;
xfs_exchrange_ilock(NULL, req->ip1, req->ip2);
error = xfs_exchmaps_estimate(req);
xfs_exchrange_iunlock(req->ip1, req->ip2);
return error;
}
STATIC int
xfs_exchrange_check_freshness(
const struct xfs_exchrange *fxr,
struct xfs_inode *ip2)
{
struct inode *inode2 = VFS_I(ip2);
struct timespec64 ctime = inode_get_ctime(inode2);
struct timespec64 mtime = inode_get_mtime(inode2);
trace_xfs_exchrange_freshness(fxr, ip2);
if (fxr->file2_ino != ip2->i_ino ||
fxr->file2_gen != inode2->i_generation ||
!timespec64_equal(&fxr->file2_ctime, &ctime) ||
!timespec64_equal(&fxr->file2_mtime, &mtime))
return -EBUSY;
return 0;
}
#define QRETRY_IP1 (0x1)
#define QRETRY_IP2 (0x2)
STATIC int
xfs_exchrange_reserve_quota(
struct xfs_trans *tp,
const struct xfs_exchmaps_req *req,
unsigned int *qretry)
{
int64_t ddelta, rdelta;
int ip1_error = 0;
int error;
ASSERT(!xfs_is_metadir_inode(req->ip1));
ASSERT(!xfs_is_metadir_inode(req->ip2));
if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
(req->ip1->i_udquot == req->ip2->i_udquot &&
req->ip1->i_gdquot == req->ip2->i_gdquot &&
req->ip1->i_pdquot == req->ip2->i_pdquot))
return 0;
*qretry = 0;
ddelta = req->ip2_bcount - req->ip1_bcount;
rdelta = req->ip2_rtbcount - req->ip1_rtbcount;
if (ddelta > 0 || rdelta > 0) {
error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
ddelta > 0 ? ddelta : 0,
rdelta > 0 ? rdelta : 0,
false);
if (error == -EDQUOT || error == -ENOSPC) {
*qretry |= QRETRY_IP1;
ip1_error = error;
error = 0;
}
if (error)
return error;
}
if (ddelta < 0 || rdelta < 0) {
error = xfs_trans_reserve_quota_nblks(tp, req->ip2,
ddelta < 0 ? -ddelta : 0,
rdelta < 0 ? -rdelta : 0,
false);
if (error == -EDQUOT || error == -ENOSPC)
*qretry |= QRETRY_IP2;
if (error)
return error;
}
if (ip1_error)
return ip1_error;
error = xfs_trans_reserve_quota_nblks(tp, req->ip1, req->ip1_bcount,
req->ip1_rtbcount, true);
if (error)
return error;
return xfs_trans_reserve_quota_nblks(tp, req->ip2, req->ip2_bcount,
req->ip2_rtbcount, true);
}
STATIC int
xfs_exchrange_mappings(
const struct xfs_exchrange *fxr,
struct xfs_inode *ip1,
struct xfs_inode *ip2)
{
struct xfs_mount *mp = ip1->i_mount;
struct xfs_exchmaps_req req = {
.ip1 = ip1,
.ip2 = ip2,
.startoff1 = XFS_B_TO_FSBT(mp, fxr->file1_offset),
.startoff2 = XFS_B_TO_FSBT(mp, fxr->file2_offset),
.blockcount = XFS_B_TO_FSB(mp, fxr->length),
};
struct xfs_trans *tp;
unsigned int qretry;
bool retried = false;
int error;
trace_xfs_exchrange_mappings(fxr, ip1, ip2);
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
req.flags |= XFS_EXCHMAPS_SET_SIZES;
if (fxr->flags & XFS_EXCHANGE_RANGE_FILE1_WRITTEN)
req.flags |= XFS_EXCHMAPS_INO1_WRITTEN;
if (xfs_inode_has_bigrtalloc(ip2))
req.blockcount = xfs_blen_roundup_rtx(mp, req.blockcount);
error = xfs_exchrange_estimate(&req);
if (error)
return error;
retry:
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, req.resblks, 0,
XFS_TRANS_RES_FDBLKS, &tp);
if (error)
return error;
xfs_exchrange_ilock(tp, ip1, ip2);
trace_xfs_exchrange_before(ip2, 2);
trace_xfs_exchrange_before(ip1, 1);
error = xfs_exchmaps_check_forks(mp, &req);
if (error)
goto out_trans_cancel;
error = xfs_exchrange_reserve_quota(tp, &req, &qretry);
if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
xfs_trans_cancel(tp);
xfs_exchrange_iunlock(ip1, ip2);
if (qretry & QRETRY_IP1)
xfs_blockgc_free_quota(ip1, 0);
if (qretry & QRETRY_IP2)
xfs_blockgc_free_quota(ip2, 0);
retried = true;
goto retry;
}
if (error)
goto out_trans_cancel;
if (fxr->flags & XFS_EXCHANGE_RANGE_DRY_RUN)
goto out_trans_cancel;
if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME1)
xfs_trans_ichgtime(tp, ip1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME2)
xfs_trans_ichgtime(tp, ip2, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_exchange_mappings(tp, &req);
if (xfs_has_wsync(mp) || (fxr->flags & XFS_EXCHANGE_RANGE_DSYNC))
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp);
trace_xfs_exchrange_after(ip2, 2);
trace_xfs_exchrange_after(ip1, 1);
if (error)
goto out_unlock;
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
loff_t temp;
temp = i_size_read(VFS_I(ip2));
i_size_write(VFS_I(ip2), i_size_read(VFS_I(ip1)));
i_size_write(VFS_I(ip1), temp);
}
out_unlock:
xfs_exchrange_iunlock(ip1, ip2);
return error;
out_trans_cancel:
xfs_trans_cancel(tp);
goto out_unlock;
}
static inline int
xfs_exchange_range_checks(
struct xfs_exchrange *fxr,
unsigned int alloc_unit)
{
struct inode *inode1 = file_inode(fxr->file1);
loff_t size1 = i_size_read(inode1);
struct inode *inode2 = file_inode(fxr->file2);
loff_t size2 = i_size_read(inode2);
uint64_t allocmask = alloc_unit - 1;
int64_t test_len;
uint64_t blen;
loff_t tmp;
int error;
if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2))
return -EPERM;
if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
return -ETXTBSY;
if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
return -EINVAL;
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
size2 - fxr->file2_offset);
} else {
if (fxr->file1_offset + fxr->length > size1 ||
fxr->file2_offset + fxr->length > size2)
return -EINVAL;
}
if (!IS_ALIGNED(fxr->file1_offset, alloc_unit) ||
!IS_ALIGNED(fxr->file2_offset, alloc_unit))
return -EINVAL;
if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) ||
check_add_overflow(fxr->file2_offset, fxr->length, &tmp))
return -EINVAL;
test_len = fxr->length;
error = generic_write_check_limits(fxr->file2, fxr->file2_offset,
&test_len);
if (error)
return error;
error = generic_write_check_limits(fxr->file1, fxr->file1_offset,
&test_len);
if (error)
return error;
if (test_len != fxr->length)
return -EINVAL;
if (fxr->file1_offset + fxr->length == size1)
blen = ALIGN(size1, alloc_unit) - fxr->file1_offset;
else if (fxr->file2_offset + fxr->length == size2)
blen = ALIGN(size2, alloc_unit) - fxr->file2_offset;
else if (!IS_ALIGNED(fxr->length, alloc_unit))
return -EINVAL;
else
blen = fxr->length;
if (inode1 == inode2 &&
fxr->file2_offset + blen > fxr->file1_offset &&
fxr->file1_offset + blen > fxr->file2_offset)
return -EINVAL;
if ((fxr->length & allocmask) == 0)
return 0;
blen = fxr->length;
if (fxr->file2_offset + blen < size2)
blen &= ~allocmask;
if (fxr->file1_offset + blen < size1)
blen &= ~allocmask;
return blen == fxr->length ? 0 : -EINVAL;
}
static inline int
xfs_exchange_range_prep(
struct xfs_exchrange *fxr,
unsigned int alloc_unit)
{
struct inode *inode1 = file_inode(fxr->file1);
struct inode *inode2 = file_inode(fxr->file2);
bool same_inode = (inode1 == inode2);
int error;
error = xfs_exchange_range_checks(fxr, alloc_unit);
if (error || fxr->length == 0)
return error;
inode_dio_wait(inode1);
if (!same_inode)
inode_dio_wait(inode2);
error = filemap_write_and_wait_range(inode1->i_mapping,
fxr->file1_offset,
fxr->file1_offset + fxr->length - 1);
if (error)
return error;
error = filemap_write_and_wait_range(inode2->i_mapping,
fxr->file2_offset,
fxr->file2_offset + fxr->length - 1);
if (error)
return error;
if (((fxr->file1->f_flags | fxr->file2->f_flags) & O_SYNC) ||
IS_SYNC(inode1) || IS_SYNC(inode2))
fxr->flags |= XFS_EXCHANGE_RANGE_DSYNC;
return 0;
}
static inline int
xfs_exchange_range_finish(
struct xfs_exchrange *fxr)
{
int error;
error = file_remove_privs(fxr->file1);
if (error)
return error;
if (file_inode(fxr->file1) == file_inode(fxr->file2))
return 0;
return file_remove_privs(fxr->file2);
}
static int
xfs_exchrange_check_rtalign(
const struct xfs_exchrange *fxr,
struct xfs_inode *ip1,
struct xfs_inode *ip2,
unsigned int alloc_unit)
{
uint64_t length = fxr->length;
uint64_t blen;
loff_t size1, size2;
size1 = i_size_read(VFS_I(ip1));
size2 = i_size_read(VFS_I(ip2));
if (!isaligned_64(fxr->file1_offset, alloc_unit) ||
!isaligned_64(fxr->file2_offset, alloc_unit))
return -EINVAL;
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
length = max_t(int64_t, size1 - fxr->file1_offset,
size2 - fxr->file2_offset);
if (fxr->file1_offset + length == size1)
blen = roundup_64(size1, alloc_unit) - fxr->file1_offset;
else if (fxr->file2_offset + length == size2)
blen = roundup_64(size2, alloc_unit) - fxr->file2_offset;
else if (!isaligned_64(length, alloc_unit))
return -EINVAL;
else
blen = length;
if (ip1 == ip2 &&
fxr->file2_offset + blen > fxr->file1_offset &&
fxr->file1_offset + blen > fxr->file2_offset)
return -EINVAL;
if (isaligned_64(length, alloc_unit))
return 0;
blen = length;
if (fxr->file2_offset + length < size2)
blen = rounddown_64(blen, alloc_unit);
if (fxr->file1_offset + blen < size1)
blen = rounddown_64(blen, alloc_unit);
return blen == length ? 0 : -EINVAL;
}
STATIC int
xfs_exchrange_prep(
struct xfs_exchrange *fxr,
struct xfs_inode *ip1,
struct xfs_inode *ip2)
{
struct xfs_mount *mp = ip2->i_mount;
unsigned int alloc_unit = xfs_inode_alloc_unitsize(ip2);
int error;
trace_xfs_exchrange_prep(fxr, ip1, ip2);
if (XFS_IS_REALTIME_INODE(ip1) != XFS_IS_REALTIME_INODE(ip2))
return -EINVAL;
if (!is_power_of_2(alloc_unit)) {
error = xfs_exchrange_check_rtalign(fxr, ip1, ip2, alloc_unit);
if (error)
return error;
alloc_unit = mp->m_sb.sb_blocksize;
}
error = xfs_exchange_range_prep(fxr, alloc_unit);
if (error || fxr->length == 0)
return error;
if (fxr->flags & __XFS_EXCHANGE_RANGE_CHECK_FRESH2) {
error = xfs_exchrange_check_freshness(fxr, ip2);
if (error)
return error;
}
error = xfs_qm_dqattach(ip2);
if (error)
return error;
error = xfs_qm_dqattach(ip1);
if (error)
return error;
trace_xfs_exchrange_flush(fxr, ip1, ip2);
error = xfs_flush_unmap_range(ip2, fxr->file2_offset, fxr->length);
if (error)
return error;
error = xfs_flush_unmap_range(ip1, fxr->file1_offset, fxr->length);
if (error)
return error;
if (xfs_inode_has_cow_data(ip1)) {
error = xfs_reflink_cancel_cow_range(ip1, fxr->file1_offset,
fxr->length, true);
if (error)
return error;
}
if (xfs_inode_has_cow_data(ip2)) {
error = xfs_reflink_cancel_cow_range(ip2, fxr->file2_offset,
fxr->length, true);
if (error)
return error;
}
return 0;
}
STATIC int
xfs_exchrange_contents(
struct xfs_exchrange *fxr)
{
struct inode *inode1 = file_inode(fxr->file1);
struct inode *inode2 = file_inode(fxr->file2);
struct xfs_inode *ip1 = XFS_I(inode1);
struct xfs_inode *ip2 = XFS_I(inode2);
struct xfs_mount *mp = ip1->i_mount;
int error;
if (!xfs_has_exchange_range(mp))
return -EOPNOTSUPP;
if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS |
XFS_EXCHANGE_RANGE_PRIV_FLAGS))
return -EINVAL;
if (xfs_is_shutdown(mp))
return -EIO;
error = xfs_ilock2_io_mmap(ip1, ip2);
if (error)
goto out_err;
error = xfs_exchrange_prep(fxr, ip1, ip2);
if (error)
goto out_unlock;
error = xfs_exchrange_mappings(fxr, ip1, ip2);
if (error)
goto out_unlock;
error = xfs_exchange_range_finish(fxr);
if (error)
goto out_unlock;
out_unlock:
xfs_iunlock2_io_mmap(ip1, ip2);
out_err:
if (error)
trace_xfs_exchrange_error(ip2, error, _RET_IP_);
return error;
}
static int
xfs_exchange_range(
struct xfs_exchrange *fxr)
{
struct inode *inode1 = file_inode(fxr->file1);
struct inode *inode2 = file_inode(fxr->file2);
loff_t check_len = fxr->length;
int ret;
BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
XFS_EXCHANGE_RANGE_PRIV_FLAGS);
if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt)
return -EXDEV;
if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS |
__XFS_EXCHANGE_RANGE_CHECK_FRESH2))
return -EINVAL;
if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode))
return -EISDIR;
if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
return -EINVAL;
if (!(fxr->file1->f_mode & FMODE_READ) ||
!(fxr->file1->f_mode & FMODE_WRITE) ||
!(fxr->file2->f_mode & FMODE_READ) ||
!(fxr->file2->f_mode & FMODE_WRITE))
return -EBADF;
if ((fxr->file1->f_flags & O_APPEND) ||
(fxr->file2->f_flags & O_APPEND))
return -EBADF;
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)
check_len = 0;
ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true);
if (ret)
return ret;
ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true);
if (ret)
return ret;
if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1;
if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2))
fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2;
file_start_write(fxr->file2);
ret = xfs_exchrange_contents(fxr);
file_end_write(fxr->file2);
if (ret)
return ret;
fsnotify_modify(fxr->file1);
if (fxr->file2 != fxr->file1)
fsnotify_modify(fxr->file2);
return 0;
}
long
xfs_ioc_exchange_range(
struct file *file,
struct xfs_exchange_range __user *argp)
{
struct xfs_exchrange fxr = {
.file2 = file,
};
struct xfs_exchange_range args;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
if (memchr_inv(&args.pad, 0, sizeof(args.pad)))
return -EINVAL;
if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
return -EINVAL;
fxr.file1_offset = args.file1_offset;
fxr.file2_offset = args.file2_offset;
fxr.length = args.length;
fxr.flags = args.flags;
CLASS(fd, file1)(args.file1_fd);
if (fd_empty(file1))
return -EBADF;
fxr.file1 = fd_file(file1);
return xfs_exchange_range(&fxr);
}
struct xfs_commit_range_fresh {
xfs_fsid_t fsid;
__u64 file2_ino;
__s64 file2_mtime;
__s64 file2_ctime;
__s32 file2_mtime_nsec;
__s32 file2_ctime_nsec;
__u32 file2_gen;
__u32 magic;
};
#define XCR_FRESH_MAGIC 0x444F524B
long
xfs_ioc_start_commit(
struct file *file,
struct xfs_commit_range __user *argp)
{
struct xfs_commit_range args = { };
struct kstat kstat = { };
struct xfs_commit_range_fresh *kern_f;
struct xfs_commit_range_fresh __user *user_f;
struct inode *inode2 = file_inode(file);
struct xfs_inode *ip2 = XFS_I(inode2);
const unsigned int lockflags = XFS_IOLOCK_SHARED |
XFS_MMAPLOCK_SHARED |
XFS_ILOCK_SHARED;
BUILD_BUG_ON(sizeof(struct xfs_commit_range_fresh) !=
sizeof(args.file2_freshness));
kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
xfs_ilock(ip2, lockflags);
fill_mg_cmtime(&kstat, STATX_CTIME | STATX_MTIME, inode2);
kern_f->file2_ctime = kstat.ctime.tv_sec;
kern_f->file2_ctime_nsec = kstat.ctime.tv_nsec;
kern_f->file2_mtime = kstat.mtime.tv_sec;
kern_f->file2_mtime_nsec = kstat.mtime.tv_nsec;
kern_f->file2_ino = ip2->i_ino;
kern_f->file2_gen = inode2->i_generation;
kern_f->magic = XCR_FRESH_MAGIC;
xfs_iunlock(ip2, lockflags);
user_f = (struct xfs_commit_range_fresh __user *)&argp->file2_freshness;
if (copy_to_user(user_f, kern_f, sizeof(*kern_f)))
return -EFAULT;
return 0;
}
long
xfs_ioc_commit_range(
struct file *file,
struct xfs_commit_range __user *argp)
{
struct xfs_exchrange fxr = {
.file2 = file,
};
struct xfs_commit_range args;
struct xfs_commit_range_fresh *kern_f;
struct xfs_inode *ip2 = XFS_I(file_inode(file));
struct xfs_mount *mp = ip2->i_mount;
kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
return -EINVAL;
if (kern_f->magic != XCR_FRESH_MAGIC)
return -EBUSY;
if (memcmp(&kern_f->fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t)))
return -EBUSY;
fxr.file1_offset = args.file1_offset;
fxr.file2_offset = args.file2_offset;
fxr.length = args.length;
fxr.flags = args.flags | __XFS_EXCHANGE_RANGE_CHECK_FRESH2;
fxr.file2_ino = kern_f->file2_ino;
fxr.file2_gen = kern_f->file2_gen;
fxr.file2_mtime.tv_sec = kern_f->file2_mtime;
fxr.file2_mtime.tv_nsec = kern_f->file2_mtime_nsec;
fxr.file2_ctime.tv_sec = kern_f->file2_ctime;
fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec;
CLASS(fd, file1)(args.file1_fd);
if (fd_empty(file1))
return -EBADF;
fxr.file1 = fd_file(file1);
return xfs_exchange_range(&fxr);
}