root/fs/netfs/write_issue.c
// SPDX-License-Identifier: GPL-2.0-only
/* Network filesystem high-level (buffered) writeback.
 *
 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 *
 * To support network filesystems with local caching, we manage a situation
 * that can be envisioned like the following:
 *
 *               +---+---+-----+-----+---+----------+
 *    Folios:    |   |   |     |     |   |          |
 *               +---+---+-----+-----+---+----------+
 *
 *                 +------+------+     +----+----+
 *    Upload:      |      |      |.....|    |    |
 *  (Stream 0)     +------+------+     +----+----+
 *
 *               +------+------+------+------+------+
 *    Cache:     |      |      |      |      |      |
 *  (Stream 1)   +------+------+------+------+------+
 *
 * Where we have a sequence of folios of varying sizes that we need to overlay
 * with multiple parallel streams of I/O requests, where the I/O requests in a
 * stream may also be of various sizes (in cifs, for example, the sizes are
 * negotiated with the server; in something like ceph, they may represent the
 * sizes of storage objects).
 *
 * The sequence in each stream may contain gaps and noncontiguous subrequests
 * may be glued together into single vectored write RPCs.
 */

#include <linux/export.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include "internal.h"

/*
 * Kill all dirty folios in the event of an unrecoverable error, starting with
 * a locked folio we've already obtained from writeback_iter().
 */
static void netfs_kill_dirty_pages(struct address_space *mapping,
                                   struct writeback_control *wbc,
                                   struct folio *folio)
{
        int error = 0;

        do {
                enum netfs_folio_trace why = netfs_folio_trace_kill;
                struct netfs_group *group = NULL;
                struct netfs_folio *finfo = NULL;
                void *priv;

                priv = folio_detach_private(folio);
                if (priv) {
                        finfo = __netfs_folio_info(priv);
                        if (finfo) {
                                /* Kill folio from streaming write. */
                                group = finfo->netfs_group;
                                why = netfs_folio_trace_kill_s;
                        } else {
                                group = priv;
                                if (group == NETFS_FOLIO_COPY_TO_CACHE) {
                                        /* Kill copy-to-cache folio */
                                        why = netfs_folio_trace_kill_cc;
                                        group = NULL;
                                } else {
                                        /* Kill folio with group */
                                        why = netfs_folio_trace_kill_g;
                                }
                        }
                }

                trace_netfs_folio(folio, why);

                folio_start_writeback(folio);
                folio_unlock(folio);
                folio_end_writeback(folio);

                netfs_put_group(group);
                kfree(finfo);

        } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
}

/*
 * Create a write request and set it up appropriately for the origin type.
 */
struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
                                                struct file *file,
                                                loff_t start,
                                                enum netfs_io_origin origin)
{
        struct netfs_io_request *wreq;
        struct netfs_inode *ictx;
        bool is_cacheable = (origin == NETFS_WRITEBACK ||
                             origin == NETFS_WRITEBACK_SINGLE ||
                             origin == NETFS_WRITETHROUGH ||
                             origin == NETFS_PGPRIV2_COPY_TO_CACHE);

        wreq = netfs_alloc_request(mapping, file, start, 0, origin);
        if (IS_ERR(wreq))
                return wreq;

        _enter("R=%x", wreq->debug_id);

        ictx = netfs_inode(wreq->inode);
        if (is_cacheable && netfs_is_cache_enabled(ictx))
                fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
        if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0)
                goto nomem;

        wreq->cleaned_to = wreq->start;

        wreq->io_streams[0].stream_nr           = 0;
        wreq->io_streams[0].source              = NETFS_UPLOAD_TO_SERVER;
        wreq->io_streams[0].prepare_write       = ictx->ops->prepare_write;
        wreq->io_streams[0].issue_write         = ictx->ops->issue_write;
        wreq->io_streams[0].collected_to        = start;
        wreq->io_streams[0].transferred         = 0;

        wreq->io_streams[1].stream_nr           = 1;
        wreq->io_streams[1].source              = NETFS_WRITE_TO_CACHE;
        wreq->io_streams[1].collected_to        = start;
        wreq->io_streams[1].transferred         = 0;
        if (fscache_resources_valid(&wreq->cache_resources)) {
                wreq->io_streams[1].avail       = true;
                wreq->io_streams[1].active      = true;
                wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
                wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
        }

        return wreq;
nomem:
        netfs_put_failed_request(wreq);
        return ERR_PTR(-ENOMEM);
}

/**
 * netfs_prepare_write_failed - Note write preparation failed
 * @subreq: The subrequest to mark
 *
 * Mark a subrequest to note that preparation for write failed.
 */
void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq)
{
        __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
        trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed);
}
EXPORT_SYMBOL(netfs_prepare_write_failed);

/*
 * Prepare a write subrequest.  We need to allocate a new subrequest
 * if we don't have one.
 */
void netfs_prepare_write(struct netfs_io_request *wreq,
                         struct netfs_io_stream *stream,
                         loff_t start)
{
        struct netfs_io_subrequest *subreq;
        struct iov_iter *wreq_iter = &wreq->buffer.iter;

        /* Make sure we don't point the iterator at a used-up folio_queue
         * struct being used as a placeholder to prevent the queue from
         * collapsing.  In such a case, extend the queue.
         */
        if (iov_iter_is_folioq(wreq_iter) &&
            wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq))
                rolling_buffer_make_space(&wreq->buffer);

        subreq = netfs_alloc_subrequest(wreq);
        subreq->source          = stream->source;
        subreq->start           = start;
        subreq->stream_nr       = stream->stream_nr;
        subreq->io_iter         = *wreq_iter;

        _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);

        trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);

        stream->sreq_max_len    = UINT_MAX;
        stream->sreq_max_segs   = INT_MAX;
        switch (stream->source) {
        case NETFS_UPLOAD_TO_SERVER:
                netfs_stat(&netfs_n_wh_upload);
                stream->sreq_max_len = wreq->wsize;
                break;
        case NETFS_WRITE_TO_CACHE:
                netfs_stat(&netfs_n_wh_write);
                break;
        default:
                WARN_ON_ONCE(1);
                break;
        }

        if (stream->prepare_write)
                stream->prepare_write(subreq);

        __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);

        /* We add to the end of the list whilst the collector may be walking
         * the list.  The collector only goes nextwards and uses the lock to
         * remove entries off of the front.
         */
        spin_lock(&wreq->lock);
        list_add_tail(&subreq->rreq_link, &stream->subrequests);
        if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
                if (!stream->active) {
                        stream->collected_to = subreq->start;
                        /* Write list pointers before active flag */
                        smp_store_release(&stream->active, true);
                }
        }

        spin_unlock(&wreq->lock);

        stream->construct = subreq;
}

/*
 * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O
 * operation.  The operation may be asynchronous and should call
 * netfs_write_subrequest_terminated() when complete.
 */
static void netfs_do_issue_write(struct netfs_io_stream *stream,
                                 struct netfs_io_subrequest *subreq)
{
        struct netfs_io_request *wreq = subreq->rreq;

        _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);

        if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
                return netfs_write_subrequest_terminated(subreq, subreq->error);

        trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
        stream->issue_write(subreq);
}

void netfs_reissue_write(struct netfs_io_stream *stream,
                         struct netfs_io_subrequest *subreq,
                         struct iov_iter *source)
{
        size_t size = subreq->len - subreq->transferred;

        // TODO: Use encrypted buffer
        subreq->io_iter = *source;
        iov_iter_advance(source, size);
        iov_iter_truncate(&subreq->io_iter, size);

        subreq->retry_count++;
        subreq->error = 0;
        __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
        __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
        netfs_stat(&netfs_n_wh_retry_write_subreq);
        netfs_do_issue_write(stream, subreq);
}

void netfs_issue_write(struct netfs_io_request *wreq,
                       struct netfs_io_stream *stream)
{
        struct netfs_io_subrequest *subreq = stream->construct;

        if (!subreq)
                return;
        stream->construct = NULL;
        subreq->io_iter.count = subreq->len;
        netfs_do_issue_write(stream, subreq);
}

/*
 * Add data to the write subrequest, dispatching each as we fill it up or if it
 * is discontiguous with the previous.  We only fill one part at a time so that
 * we can avoid overrunning the credits obtained (cifs) and try to parallelise
 * content-crypto preparation with network writes.
 */
size_t netfs_advance_write(struct netfs_io_request *wreq,
                           struct netfs_io_stream *stream,
                           loff_t start, size_t len, bool to_eof)
{
        struct netfs_io_subrequest *subreq = stream->construct;
        size_t part;

        if (!stream->avail) {
                _leave("no write");
                return len;
        }

        _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);

        if (subreq && start != subreq->start + subreq->len) {
                netfs_issue_write(wreq, stream);
                subreq = NULL;
        }

        if (!stream->construct)
                netfs_prepare_write(wreq, stream, start);
        subreq = stream->construct;

        part = umin(stream->sreq_max_len - subreq->len, len);
        _debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len);
        subreq->len += part;
        subreq->nr_segs++;
        stream->submit_extendable_to -= part;

        if (subreq->len >= stream->sreq_max_len ||
            subreq->nr_segs >= stream->sreq_max_segs ||
            to_eof) {
                netfs_issue_write(wreq, stream);
                subreq = NULL;
        }

        return part;
}

/*
 * Write some of a pending folio data back to the server.
 */
static int netfs_write_folio(struct netfs_io_request *wreq,
                             struct writeback_control *wbc,
                             struct folio *folio)
{
        struct netfs_io_stream *upload = &wreq->io_streams[0];
        struct netfs_io_stream *cache  = &wreq->io_streams[1];
        struct netfs_io_stream *stream;
        struct netfs_group *fgroup; /* TODO: Use this with ceph */
        struct netfs_folio *finfo;
        size_t iter_off = 0;
        size_t fsize = folio_size(folio), flen = fsize, foff = 0;
        loff_t fpos = folio_pos(folio), i_size;
        bool to_eof = false, streamw = false;
        bool debug = false;

        _enter("");

        if (rolling_buffer_make_space(&wreq->buffer) < 0)
                return -ENOMEM;

        /* netfs_perform_write() may shift i_size around the page or from out
         * of the page to beyond it, but cannot move i_size into or through the
         * page since we have it locked.
         */
        i_size = i_size_read(wreq->inode);

        if (fpos >= i_size) {
                /* mmap beyond eof. */
                _debug("beyond eof");
                folio_start_writeback(folio);
                folio_unlock(folio);
                wreq->nr_group_rel += netfs_folio_written_back(folio);
                netfs_put_group_many(wreq->group, wreq->nr_group_rel);
                wreq->nr_group_rel = 0;
                return 0;
        }

        if (fpos + fsize > wreq->i_size)
                wreq->i_size = i_size;

        fgroup = netfs_folio_group(folio);
        finfo = netfs_folio_info(folio);
        if (finfo) {
                foff = finfo->dirty_offset;
                flen = foff + finfo->dirty_len;
                streamw = true;
        }

        if (wreq->origin == NETFS_WRITETHROUGH) {
                to_eof = false;
                if (flen > i_size - fpos)
                        flen = i_size - fpos;
        } else if (flen > i_size - fpos) {
                flen = i_size - fpos;
                if (!streamw)
                        folio_zero_segment(folio, flen, fsize);
                to_eof = true;
        } else if (flen == i_size - fpos) {
                to_eof = true;
        }
        flen -= foff;

        _debug("folio %zx %zx %zx", foff, flen, fsize);

        /* Deal with discontinuities in the stream of dirty pages.  These can
         * arise from a number of sources:
         *
         * (1) Intervening non-dirty pages from random-access writes, multiple
         *     flushers writing back different parts simultaneously and manual
         *     syncing.
         *
         * (2) Partially-written pages from write-streaming.
         *
         * (3) Pages that belong to a different write-back group (eg.  Ceph
         *     snapshots).
         *
         * (4) Actually-clean pages that were marked for write to the cache
         *     when they were read.  Note that these appear as a special
         *     write-back group.
         */
        if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
                netfs_issue_write(wreq, upload);
        } else if (fgroup != wreq->group) {
                /* We can't write this page to the server yet. */
                kdebug("wrong group");
                folio_redirty_for_writepage(wbc, folio);
                folio_unlock(folio);
                netfs_issue_write(wreq, upload);
                netfs_issue_write(wreq, cache);
                return 0;
        }

        if (foff > 0)
                netfs_issue_write(wreq, upload);
        if (streamw)
                netfs_issue_write(wreq, cache);

        /* Flip the page to the writeback state and unlock.  If we're called
         * from write-through, then the page has already been put into the wb
         * state.
         */
        if (wreq->origin == NETFS_WRITEBACK)
                folio_start_writeback(folio);
        folio_unlock(folio);

        if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
                if (!cache->avail) {
                        trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
                        netfs_issue_write(wreq, upload);
                        netfs_folio_written_back(folio);
                        return 0;
                }
                trace_netfs_folio(folio, netfs_folio_trace_store_copy);
        } else if (!upload->avail && !cache->avail) {
                trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
                netfs_folio_written_back(folio);
                return 0;
        } else if (!upload->construct) {
                trace_netfs_folio(folio, netfs_folio_trace_store);
        } else {
                trace_netfs_folio(folio, netfs_folio_trace_store_plus);
        }

        /* Attach the folio to the rolling buffer. */
        rolling_buffer_append(&wreq->buffer, folio, 0);

        /* Move the submission point forward to allow for write-streaming data
         * not starting at the front of the page.  We don't do write-streaming
         * with the cache as the cache requires DIO alignment.
         *
         * Also skip uploading for data that's been read and just needs copying
         * to the cache.
         */
        for (int s = 0; s < NR_IO_STREAMS; s++) {
                stream = &wreq->io_streams[s];
                stream->submit_off = foff;
                stream->submit_len = flen;
                if (!stream->avail ||
                    (stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
                    (stream->source == NETFS_UPLOAD_TO_SERVER &&
                     fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
                        stream->submit_off = UINT_MAX;
                        stream->submit_len = 0;
                }
        }

        /* Attach the folio to one or more subrequests.  For a big folio, we
         * could end up with thousands of subrequests if the wsize is small -
         * but we might need to wait during the creation of subrequests for
         * network resources (eg. SMB credits).
         */
        for (;;) {
                ssize_t part;
                size_t lowest_off = ULONG_MAX;
                int choose_s = -1;

                /* Always add to the lowest-submitted stream first. */
                for (int s = 0; s < NR_IO_STREAMS; s++) {
                        stream = &wreq->io_streams[s];
                        if (stream->submit_len > 0 &&
                            stream->submit_off < lowest_off) {
                                lowest_off = stream->submit_off;
                                choose_s = s;
                        }
                }

                if (choose_s < 0)
                        break;
                stream = &wreq->io_streams[choose_s];

                /* Advance the iterator(s). */
                if (stream->submit_off > iter_off) {
                        rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
                        iter_off = stream->submit_off;
                }

                atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
                stream->submit_extendable_to = fsize - stream->submit_off;
                part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
                                           stream->submit_len, to_eof);
                stream->submit_off += part;
                if (part > stream->submit_len)
                        stream->submit_len = 0;
                else
                        stream->submit_len -= part;
                if (part > 0)
                        debug = true;
        }

        if (fsize > iter_off)
                rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
        atomic64_set(&wreq->issued_to, fpos + fsize);

        if (!debug)
                kdebug("R=%x: No submit", wreq->debug_id);

        if (foff + flen < fsize)
                for (int s = 0; s < NR_IO_STREAMS; s++)
                        netfs_issue_write(wreq, &wreq->io_streams[s]);

        _leave(" = 0");
        return 0;
}

/*
 * End the issuing of writes, letting the collector know we're done.
 */
static void netfs_end_issue_write(struct netfs_io_request *wreq)
{
        bool needs_poke = true;

        smp_wmb(); /* Write subreq lists before ALL_QUEUED. */
        set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);

        for (int s = 0; s < NR_IO_STREAMS; s++) {
                struct netfs_io_stream *stream = &wreq->io_streams[s];

                if (!stream->active)
                        continue;
                if (!list_empty(&stream->subrequests))
                        needs_poke = false;
                netfs_issue_write(wreq, stream);
        }

        if (needs_poke)
                netfs_wake_collector(wreq);
}

/*
 * Write some of the pending data back to the server
 */
int netfs_writepages(struct address_space *mapping,
                     struct writeback_control *wbc)
{
        struct netfs_inode *ictx = netfs_inode(mapping->host);
        struct netfs_io_request *wreq = NULL;
        struct folio *folio;
        int error = 0;

        if (!mutex_trylock(&ictx->wb_lock)) {
                if (wbc->sync_mode == WB_SYNC_NONE) {
                        netfs_stat(&netfs_n_wb_lock_skip);
                        return 0;
                }
                netfs_stat(&netfs_n_wb_lock_wait);
                mutex_lock(&ictx->wb_lock);
        }

        /* Need the first folio to be able to set up the op. */
        folio = writeback_iter(mapping, wbc, NULL, &error);
        if (!folio)
                goto out;

        wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
        if (IS_ERR(wreq)) {
                error = PTR_ERR(wreq);
                goto couldnt_start;
        }

        __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
        trace_netfs_write(wreq, netfs_write_trace_writeback);
        netfs_stat(&netfs_n_wh_writepages);

        do {
                _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));

                /* It appears we don't have to handle cyclic writeback wrapping. */
                WARN_ON_ONCE(wreq && folio_pos(folio) < atomic64_read(&wreq->issued_to));

                if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE &&
                    unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) {
                        set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
                        wreq->netfs_ops->begin_writeback(wreq);
                }

                error = netfs_write_folio(wreq, wbc, folio);
                if (error < 0)
                        break;
        } while ((folio = writeback_iter(mapping, wbc, folio, &error)));

        netfs_end_issue_write(wreq);

        mutex_unlock(&ictx->wb_lock);
        netfs_wake_collector(wreq);

        netfs_put_request(wreq, netfs_rreq_trace_put_return);
        _leave(" = %d", error);
        return error;

couldnt_start:
        netfs_kill_dirty_pages(mapping, wbc, folio);
out:
        mutex_unlock(&ictx->wb_lock);
        _leave(" = %d", error);
        return error;
}
EXPORT_SYMBOL(netfs_writepages);

/*
 * Begin a write operation for writing through the pagecache.
 */
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
{
        struct netfs_io_request *wreq = NULL;
        struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp));

        mutex_lock(&ictx->wb_lock);

        wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp,
                                      iocb->ki_pos, NETFS_WRITETHROUGH);
        if (IS_ERR(wreq)) {
                mutex_unlock(&ictx->wb_lock);
                return wreq;
        }

        wreq->io_streams[0].avail = true;
        trace_netfs_write(wreq, netfs_write_trace_writethrough);
        return wreq;
}

/*
 * Advance the state of the write operation used when writing through the
 * pagecache.  Data has been copied into the pagecache that we need to append
 * to the request.  If we've added more than wsize then we need to create a new
 * subrequest.
 */
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
                               struct folio *folio, size_t copied, bool to_page_end,
                               struct folio **writethrough_cache)
{
        _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
               wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);

        if (!*writethrough_cache) {
                if (folio_test_dirty(folio))
                        /* Sigh.  mmap. */
                        folio_clear_dirty_for_io(folio);

                /* We can make multiple writes to the folio... */
                folio_start_writeback(folio);
                if (wreq->len == 0)
                        trace_netfs_folio(folio, netfs_folio_trace_wthru);
                else
                        trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
                *writethrough_cache = folio;
        }

        wreq->len += copied;
        if (!to_page_end)
                return 0;

        *writethrough_cache = NULL;
        return netfs_write_folio(wreq, wbc, folio);
}

/*
 * End a write operation used when writing through the pagecache.
 */
ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
                               struct folio *writethrough_cache)
{
        struct netfs_inode *ictx = netfs_inode(wreq->inode);
        ssize_t ret;

        _enter("R=%x", wreq->debug_id);

        if (writethrough_cache)
                netfs_write_folio(wreq, wbc, writethrough_cache);

        netfs_end_issue_write(wreq);

        mutex_unlock(&ictx->wb_lock);

        if (wreq->iocb)
                ret = -EIOCBQUEUED;
        else
                ret = netfs_wait_for_write(wreq);
        netfs_put_request(wreq, netfs_rreq_trace_put_return);
        return ret;
}

/*
 * Write some of a pending folio data back to the server and/or the cache.
 */
static int netfs_write_folio_single(struct netfs_io_request *wreq,
                                    struct folio *folio)
{
        struct netfs_io_stream *upload = &wreq->io_streams[0];
        struct netfs_io_stream *cache  = &wreq->io_streams[1];
        struct netfs_io_stream *stream;
        size_t iter_off = 0;
        size_t fsize = folio_size(folio), flen;
        loff_t fpos = folio_pos(folio);
        bool to_eof = false;
        bool no_debug = false;

        _enter("");

        flen = folio_size(folio);
        if (flen > wreq->i_size - fpos) {
                flen = wreq->i_size - fpos;
                folio_zero_segment(folio, flen, fsize);
                to_eof = true;
        } else if (flen == wreq->i_size - fpos) {
                to_eof = true;
        }

        _debug("folio %zx/%zx", flen, fsize);

        if (!upload->avail && !cache->avail) {
                trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
                return 0;
        }

        if (!upload->construct)
                trace_netfs_folio(folio, netfs_folio_trace_store);
        else
                trace_netfs_folio(folio, netfs_folio_trace_store_plus);

        /* Attach the folio to the rolling buffer. */
        folio_get(folio);
        rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK);

        /* Move the submission point forward to allow for write-streaming data
         * not starting at the front of the page.  We don't do write-streaming
         * with the cache as the cache requires DIO alignment.
         *
         * Also skip uploading for data that's been read and just needs copying
         * to the cache.
         */
        for (int s = 0; s < NR_IO_STREAMS; s++) {
                stream = &wreq->io_streams[s];
                stream->submit_off = 0;
                stream->submit_len = flen;
                if (!stream->avail) {
                        stream->submit_off = UINT_MAX;
                        stream->submit_len = 0;
                }
        }

        /* Attach the folio to one or more subrequests.  For a big folio, we
         * could end up with thousands of subrequests if the wsize is small -
         * but we might need to wait during the creation of subrequests for
         * network resources (eg. SMB credits).
         */
        for (;;) {
                ssize_t part;
                size_t lowest_off = ULONG_MAX;
                int choose_s = -1;

                /* Always add to the lowest-submitted stream first. */
                for (int s = 0; s < NR_IO_STREAMS; s++) {
                        stream = &wreq->io_streams[s];
                        if (stream->submit_len > 0 &&
                            stream->submit_off < lowest_off) {
                                lowest_off = stream->submit_off;
                                choose_s = s;
                        }
                }

                if (choose_s < 0)
                        break;
                stream = &wreq->io_streams[choose_s];

                /* Advance the iterator(s). */
                if (stream->submit_off > iter_off) {
                        rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
                        iter_off = stream->submit_off;
                }

                atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
                stream->submit_extendable_to = fsize - stream->submit_off;
                part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
                                           stream->submit_len, to_eof);
                stream->submit_off += part;
                if (part > stream->submit_len)
                        stream->submit_len = 0;
                else
                        stream->submit_len -= part;
                if (part > 0)
                        no_debug = true;
        }

        wreq->buffer.iter.iov_offset = 0;
        if (fsize > iter_off)
                rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
        atomic64_set(&wreq->issued_to, fpos + fsize);

        if (!no_debug)
                kdebug("R=%x: No submit", wreq->debug_id);
        _leave(" = 0");
        return 0;
}

/**
 * netfs_writeback_single - Write back a monolithic payload
 * @mapping: The mapping to write from
 * @wbc: Hints from the VM
 * @iter: Data to write, must be ITER_FOLIOQ.
 *
 * Write a monolithic, non-pagecache object back to the server and/or
 * the cache.
 */
int netfs_writeback_single(struct address_space *mapping,
                           struct writeback_control *wbc,
                           struct iov_iter *iter)
{
        struct netfs_io_request *wreq;
        struct netfs_inode *ictx = netfs_inode(mapping->host);
        struct folio_queue *fq;
        size_t size = iov_iter_count(iter);
        int ret;

        if (WARN_ON_ONCE(!iov_iter_is_folioq(iter)))
                return -EIO;

        if (!mutex_trylock(&ictx->wb_lock)) {
                if (wbc->sync_mode == WB_SYNC_NONE) {
                        netfs_stat(&netfs_n_wb_lock_skip);
                        return 0;
                }
                netfs_stat(&netfs_n_wb_lock_wait);
                mutex_lock(&ictx->wb_lock);
        }

        wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE);
        if (IS_ERR(wreq)) {
                ret = PTR_ERR(wreq);
                goto couldnt_start;
        }

        __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
        trace_netfs_write(wreq, netfs_write_trace_writeback_single);
        netfs_stat(&netfs_n_wh_writepages);

        if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
                wreq->netfs_ops->begin_writeback(wreq);

        for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) {
                for (int slot = 0; slot < folioq_count(fq); slot++) {
                        struct folio *folio = folioq_folio(fq, slot);
                        size_t part = umin(folioq_folio_size(fq, slot), size);

                        _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));

                        ret = netfs_write_folio_single(wreq, folio);
                        if (ret < 0)
                                goto stop;
                        size -= part;
                        if (size <= 0)
                                goto stop;
                }
        }

stop:
        for (int s = 0; s < NR_IO_STREAMS; s++)
                netfs_issue_write(wreq, &wreq->io_streams[s]);
        smp_wmb(); /* Write lists before ALL_QUEUED. */
        set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);

        mutex_unlock(&ictx->wb_lock);
        netfs_wake_collector(wreq);

        netfs_put_request(wreq, netfs_rreq_trace_put_return);
        _leave(" = %d", ret);
        return ret;

couldnt_start:
        mutex_unlock(&ictx->wb_lock);
        _leave(" = %d", ret);
        return ret;
}
EXPORT_SYMBOL(netfs_writeback_single);