root/sys/geom/geom_vfs.c
/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2004 Poul-Henning Kamp
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#define EXTERR_CATEGORY EXTERR_CAT_GEOMVFS
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bio.h>
#include <sys/exterrvar.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/sbuf.h>
#include <sys/vnode.h>
#include <sys/mount.h>

#include <geom/geom.h>
#include <geom/geom_vfs.h>

/*
 * subroutines for use by filesystems.
 *
 * XXX: should maybe live somewhere else ?
 */
#include <sys/buf.h>

struct g_vfs_softc {
        struct mtx       sc_mtx;
        struct bufobj   *sc_bo;
        struct g_event  *sc_event;
        int              sc_active;
        bool             sc_orphaned;
        int              sc_enxio_active;
        int              sc_enxio_reported;
};

static struct buf_ops __g_vfs_bufops = {
        .bop_name =     "GEOM_VFS",
        .bop_write =    bufwrite,
        .bop_strategy = g_vfs_strategy, 
        .bop_sync =     bufsync,        
        .bop_bdflush =  bufbdflush
};

struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;

static g_orphan_t g_vfs_orphan;

static struct g_class g_vfs_class = {
        .name =         "VFS",
        .version =      G_VERSION,
        .orphan =       g_vfs_orphan,
};

DECLARE_GEOM_CLASS(g_vfs_class, g_vfs);

static void
g_vfs_destroy(void *arg, int flags __unused)
{
        struct g_consumer *cp;

        g_topology_assert();
        cp = arg;
        if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
                g_access(cp, -cp->acr, -cp->acw, -cp->ace);
        g_detach(cp);
        if (cp->geom->softc == NULL)
                g_wither_geom(cp->geom, ENXIO);
}

static void
g_vfs_done(struct bio *bip)
{
        struct g_consumer *cp;
        struct g_event *event;
        struct g_vfs_softc *sc;
        struct buf *bp;
        int destroy;
        struct mount *mp;
        struct vnode *vp;
        struct cdev *cdevp;

        /*
         * Collect statistics on synchronous and asynchronous read
         * and write counts for disks that have associated filesystems.
         */
        bp = bip->bio_caller2;
        vp = bp->b_vp;
        if (vp != NULL) {
                /*
                 * If not a disk vnode, use its associated mount point
                 * otherwise use the mountpoint associated with the disk.
                 */
                VI_LOCK(vp);
                if (vp->v_type != VCHR ||
                    (cdevp = vp->v_rdev) == NULL ||
                    cdevp->si_devsw == NULL ||
                    (cdevp->si_devsw->d_flags & D_DISK) == 0)
                        mp = vp->v_mount;
                else
                        mp = cdevp->si_mountpt;
                if (mp != NULL) {
                        if (bp->b_iocmd == BIO_READ) {
                                if (BUF_DISOWNED(bp))
                                        mp->mnt_stat.f_asyncreads++;
                                else
                                        mp->mnt_stat.f_syncreads++;
                        } else if (bp->b_iocmd == BIO_WRITE) {
                                if (BUF_DISOWNED(bp))
                                        mp->mnt_stat.f_asyncwrites++;
                                else
                                        mp->mnt_stat.f_syncwrites++;
                        }
                }
                VI_UNLOCK(vp);
        }

        cp = bip->bio_from;
        sc = cp->geom->softc;
        if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) {
                if ((bp->b_xflags & BX_CVTENXIO) != 0) {
                        if (atomic_cmpset_int(&sc->sc_enxio_active, 0, 1))
                                printf("g_vfs_done(): %s converting all errors to ENXIO\n",
                                    bip->bio_to->name);
                }
                if (sc->sc_enxio_active)
                        bip->bio_error = ENXIO;
                if (bip->bio_error != ENXIO ||
                    atomic_cmpset_int(&sc->sc_enxio_reported, 0, 1)) {
                        g_print_bio("g_vfs_done():", bip, "error = %d%s",
                            bip->bio_error,
                            bip->bio_error != ENXIO ? "" :
                            " suppressing further ENXIO");
                }
        }
        bp->b_ioflags = bip->bio_flags;
        if (bip->bio_error)
                bp->b_ioflags |= BIO_ERROR;
        if ((bp->b_ioflags & BIO_EXTERR) != 0)
                bp->b_exterr = bip->bio_exterr;
        else
                bp->b_error = bip->bio_error;
        bp->b_resid = bp->b_bcount - bip->bio_completed;
        g_destroy_bio(bip);

        mtx_lock(&sc->sc_mtx);
        destroy = ((--sc->sc_active) == 0 && sc->sc_orphaned);
        if (destroy) {
                event = sc->sc_event;
                sc->sc_event = NULL;
        } else
                event = NULL;
        mtx_unlock(&sc->sc_mtx);
        if (destroy)
                g_post_event_ep(g_vfs_destroy, cp, event, NULL);

        bufdone(bp);
}

void
g_vfs_strategy(struct bufobj *bo, struct buf *bp)
{
        struct g_vfs_softc *sc;
        struct g_consumer *cp;
        struct bio *bip;

        cp = bo->bo_private;
        sc = cp->geom->softc;

        /*
         * If the provider has orphaned us, just return ENXIO.
         */
        mtx_lock(&sc->sc_mtx);
        if (sc->sc_orphaned || sc->sc_enxio_active) {
                mtx_unlock(&sc->sc_mtx);
                bp->b_error = ENXIO;
                bp->b_ioflags |= BIO_ERROR;
                EXTERROR_KE(&bp->b_exterr, ENXIO,
                    "orphaned or enxio active");
                bufdone(bp);
                return;
        }
        sc->sc_active++;
        mtx_unlock(&sc->sc_mtx);

        bip = g_alloc_bio();
        bip->bio_cmd = bp->b_iocmd;
        bip->bio_offset = bp->b_iooffset;
        bip->bio_length = bp->b_bcount;
        bdata2bio(bp, bip);
        if ((bp->b_flags & B_BARRIER) != 0) {
                bip->bio_flags |= BIO_ORDERED;
                bp->b_flags &= ~B_BARRIER;
        }
        if (bp->b_iocmd == BIO_SPEEDUP)
                bip->bio_flags |= bp->b_ioflags;
        bip->bio_done = g_vfs_done;
        bip->bio_caller2 = bp;
#if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
        buf_track(bp, __func__);
        bip->bio_track_bp = bp;
#endif
        g_io_request(bip, cp);
}

static void
g_vfs_orphan(struct g_consumer *cp)
{
        struct g_geom *gp;
        struct g_event *event;
        struct g_vfs_softc *sc;
        int destroy;

        g_topology_assert();

        gp = cp->geom;
        g_trace(G_T_TOPOLOGY, "g_vfs_orphan(%p(%s))", cp, gp->name);
        sc = gp->softc;
        if (sc == NULL)
                return;
        event = g_alloc_event(M_WAITOK);
        mtx_lock(&sc->sc_mtx);
        KASSERT(sc->sc_event == NULL, ("g_vfs %p already has an event", sc));
        sc->sc_orphaned = true;
        destroy = (sc->sc_active == 0);
        if (!destroy) {
                sc->sc_event = event;
                event = NULL;
        }
        mtx_unlock(&sc->sc_mtx);
        if (destroy) {
                g_free(event);
                g_vfs_destroy(cp, 0);
        }

        /*
         * Do not destroy the geom.  Filesystem will do that during unmount.
         */
}

int
g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr)
{
        struct g_geom *gp;
        struct g_provider *pp;
        struct g_consumer *cp;
        struct g_vfs_softc *sc;
        struct bufobj *bo;
        int error;

        g_topology_assert();

        *cpp = NULL;
        bo = &vp->v_bufobj;
        if (bo->bo_private != vp)
                return (EBUSY);

        pp = g_dev_getprovider(vp->v_rdev);
        if (pp == NULL)
                return (ENOENT);
        gp = g_new_geomf(&g_vfs_class, "%s.%s", fsname, pp->name);
        sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
        mtx_init(&sc->sc_mtx, "g_vfs", NULL, MTX_DEF);
        sc->sc_bo = bo;
        gp->softc = sc;
        cp = g_new_consumer(gp);
        error = g_attach(cp, pp);
        if (error) {
                g_wither_geom(gp, ENXIO);
                return (error);
        }
        error = g_access(cp, 1, wr, wr);
        if (error) {
                g_wither_geom(gp, ENXIO);
                return (error);
        }
        /*
         * Mediasize might not be set until first access (see g_disk_access()),
         * That's why we check it here and not earlier.
         */
        if (pp->mediasize == 0) {
                (void)g_access(cp, -1, -wr, -wr);
                g_wither_geom(gp, ENXIO);
                return (ENXIO);
        }
        vnode_create_disk_vobject(vp, pp->mediasize, curthread);
        *cpp = cp;
        cp->private = vp;
        cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
        bo->bo_ops = g_vfs_bufops;
        bo->bo_private = cp;
        bo->bo_bsize = pp->sectorsize;

        return (error);
}

void
g_vfs_close(struct g_consumer *cp)
{
        struct g_geom *gp;
        struct g_vfs_softc *sc;

        g_topology_assert();

        gp = cp->geom;
        sc = gp->softc;
        bufobj_invalbuf(sc->sc_bo, V_SAVE, 0, 0);
        sc->sc_bo->bo_private = cp->private;
        gp->softc = NULL;
        mtx_destroy(&sc->sc_mtx);
        if (!sc->sc_orphaned || cp->provider == NULL)
                g_wither_geom_close(gp, ENXIO);
        KASSERT(sc->sc_event == NULL, ("g_vfs %p event is non-NULL", sc));
        g_free(sc);
}