root/sys/net/altq/altq_rmclass.c
/*-
 * Copyright (c) 1991-1997 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the Network Research
 *      Group at Lawrence Berkeley Laboratory.
 * 4. Neither the name of the University nor of the Laboratory may be used
 *    to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * LBL code modified by speer@eng.sun.com, May 1977.
 * For questions and/or comments, please send mail to cbq@ee.lbl.gov
 * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
 */
#include "opt_altq.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */

#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/time.h>

#include <net/if.h>
#include <net/if_var.h>
#include <net/if_private.h>

#include <net/altq/if_altq.h>
#include <net/altq/altq.h>
#include <net/altq/altq_codel.h>
#include <net/altq/altq_rmclass.h>
#include <net/altq/altq_rmclass_debug.h>
#include <net/altq/altq_red.h>
#include <net/altq/altq_rio.h>

/*
 * Local Macros
 */
#define reset_cutoff(ifd)       { ifd->cutoff_ = RM_MAXDEPTH; }

/*
 * Local routines.
 */

static int      rmc_satisfied(struct rm_class *, struct timeval *);
static void     rmc_wrr_set_weights(struct rm_ifdat *);
static void     rmc_depth_compute(struct rm_class *);
static void     rmc_depth_recompute(rm_class_t *);

static mbuf_t   *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
static mbuf_t   *_rmc_prr_dequeue_next(struct rm_ifdat *, int);

static int      _rmc_addq(rm_class_t *, mbuf_t *);
static void     _rmc_dropq(rm_class_t *);
static mbuf_t   *_rmc_getq(rm_class_t *);
static mbuf_t   *_rmc_pollq(rm_class_t *);

static int      rmc_under_limit(struct rm_class *, struct timeval *);
static void     rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
static void     rmc_drop_action(struct rm_class *);
static void     rmc_restart(void *);
static void     rmc_root_overlimit(struct rm_class *, struct rm_class *);

#define BORROW_OFFTIME
/*
 * BORROW_OFFTIME (experimental):
 * borrow the offtime of the class borrowing from.
 * the reason is that when its own offtime is set, the class is unable
 * to borrow much, especially when cutoff is taking effect.
 * but when the borrowed class is overloaded (advidle is close to minidle),
 * use the borrowing class's offtime to avoid overload.
 */
#define ADJUST_CUTOFF
/*
 * ADJUST_CUTOFF (experimental):
 * if no underlimit class is found due to cutoff, increase cutoff and
 * retry the scheduling loop.
 * also, don't invoke delay_actions while cutoff is taking effect,
 * since a sleeping class won't have a chance to be scheduled in the
 * next loop.
 *
 * now heuristics for setting the top-level variable (cutoff_) becomes:
 *      1. if a packet arrives for a not-overlimit class, set cutoff
 *         to the depth of the class.
 *      2. if cutoff is i, and a packet arrives for an overlimit class
 *         with an underlimit ancestor at a lower level than i (say j),
 *         then set cutoff to j.
 *      3. at scheduling a packet, if there is no underlimit class
 *         due to the current cutoff level, increase cutoff by 1 and
 *         then try to schedule again.
 */

/*
 * rm_class_t *
 * rmc_newclass(...) - Create a new resource management class at priority
 * 'pri' on the interface given by 'ifd'.
 *
 * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
 *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
 *              than 100% of the bandwidth, this number should be the
 *              'effective' rate for the class.  Let f be the
 *              bandwidth fraction allocated to this class, and let
 *              nsPerByte be the data rate of the output link in
 *              nanoseconds/byte.  Then nsecPerByte is set to
 *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
 *              for a class that gets 50% of an ethernet's bandwidth.
 *
 * action       the routine to call when the class is over limit.
 *
 * maxq         max allowable queue size for class (in packets).
 *
 * parent       parent class pointer.
 *
 * borrow       class to borrow from (should be either 'parent' or null).
 *
 * maxidle      max value allowed for class 'idle' time estimate (this
 *              parameter determines how large an initial burst of packets
 *              can be before overlimit action is invoked.
 *
 * offtime      how long 'delay' action will delay when class goes over
 *              limit (this parameter determines the steady-state burst
 *              size when a class is running over its limit).
 *
 * Maxidle and offtime have to be computed from the following:  If the
 * average packet size is s, the bandwidth fraction allocated to this
 * class is f, we want to allow b packet bursts, and the gain of the
 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
 *
 *   ptime = s * nsPerByte * (1 - f) / f
 *   maxidle = ptime * (1 - g^b) / g^b
 *   minidle = -ptime * (1 / (f - 1))
 *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
 *
 * Operationally, it's convenient to specify maxidle & offtime in units
 * independent of the link bandwidth so the maxidle & offtime passed to
 * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
 * (The constant factor is a scale factor needed to make the parameters
 * integers.  This scaling also means that the 'unscaled' values of
 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
 * not nanoseconds.)  Also note that the 'idle' filter computation keeps
 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
 * maxidle also must be scaled upward by this value.  Thus, the passed
 * values for maxidle and offtime can be computed as follows:
 *
 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
 * offtime = offtime * 8 / (1000 * nsecPerByte)
 *
 * When USE_HRTIME is employed, then maxidle and offtime become:
 *      maxidle = maxilde * (8.0 / nsecPerByte);
 *      offtime = offtime * (8.0 / nsecPerByte);
 */
struct rm_class *
rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
    void (*action)(rm_class_t *, rm_class_t *), int maxq,
    struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
    int minidle, u_int offtime, int pktsize, int flags)
{
        struct rm_class *cl;
        struct rm_class *peer;
        int              s;

        if (pri >= RM_MAXPRIO)
                return (NULL);
#ifndef ALTQ_RED
        if (flags & RMCF_RED) {
#ifdef ALTQ_DEBUG
                printf("rmc_newclass: RED not configured for CBQ!\n");
#endif
                return (NULL);
        }
#endif
#ifndef ALTQ_RIO
        if (flags & RMCF_RIO) {
#ifdef ALTQ_DEBUG
                printf("rmc_newclass: RIO not configured for CBQ!\n");
#endif
                return (NULL);
        }
#endif
#ifndef ALTQ_CODEL
        if (flags & RMCF_CODEL) {
#ifdef ALTQ_DEBUG
                printf("rmc_newclass: CODEL not configured for CBQ!\n");
#endif
                return (NULL);
        }
#endif

        cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
        if (cl == NULL)
                return (NULL);
        CALLOUT_INIT(&cl->callout_);
        cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
        if (cl->q_ == NULL) {
                free(cl, M_DEVBUF);
                return (NULL);
        }

        /*
         * Class initialization.
         */
        cl->children_ = NULL;
        cl->parent_ = parent;
        cl->borrow_ = borrow;
        cl->leaf_ = 1;
        cl->ifdat_ = ifd;
        cl->pri_ = pri;
        cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
        cl->depth_ = 0;
        cl->qthresh_ = 0;
        cl->ns_per_byte_ = nsecPerByte;

        qlimit(cl->q_) = maxq;
        qtype(cl->q_) = Q_DROPHEAD;
        qlen(cl->q_) = 0;
        cl->flags_ = flags;

#if 1 /* minidle is also scaled in ALTQ */
        cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
        if (cl->minidle_ > 0)
                cl->minidle_ = 0;
#else
        cl->minidle_ = minidle;
#endif
        cl->maxidle_ = (maxidle * nsecPerByte) / 8;
        if (cl->maxidle_ == 0)
                cl->maxidle_ = 1;
#if 1 /* offtime is also scaled in ALTQ */
        cl->avgidle_ = cl->maxidle_;
        cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
        if (cl->offtime_ == 0)
                cl->offtime_ = 1;
#else
        cl->avgidle_ = 0;
        cl->offtime_ = (offtime * nsecPerByte) / 8;
#endif
        cl->overlimit = action;

#ifdef ALTQ_RED
        if (flags & (RMCF_RED|RMCF_RIO)) {
                int red_flags, red_pkttime;

                red_flags = 0;
                if (flags & RMCF_ECN)
                        red_flags |= REDF_ECN;
                if (flags & RMCF_FLOWVALVE)
                        red_flags |= REDF_FLOWVALVE;
#ifdef ALTQ_RIO
                if (flags & RMCF_CLEARDSCP)
                        red_flags |= RIOF_CLEARDSCP;
#endif
                red_pkttime = nsecPerByte * pktsize  / 1000;

                if (flags & RMCF_RED) {
                        cl->red_ = red_alloc(0, 0,
                            qlimit(cl->q_) * 10/100,
                            qlimit(cl->q_) * 30/100,
                            red_flags, red_pkttime);
                        if (cl->red_ != NULL)
                                qtype(cl->q_) = Q_RED;
                }
#ifdef ALTQ_RIO
                else {
                        cl->red_ = (red_t *)rio_alloc(0, NULL,
                                                      red_flags, red_pkttime);
                        if (cl->red_ != NULL)
                                qtype(cl->q_) = Q_RIO;
                }
#endif
        }
#endif /* ALTQ_RED */
#ifdef ALTQ_CODEL
        if (flags & RMCF_CODEL) {
                cl->codel_ = codel_alloc(5, 100, 0);
                if (cl->codel_ != NULL)
                        qtype(cl->q_) = Q_CODEL;
        }
#endif

        /*
         * put the class into the class tree
         */
        s = splnet();
        IFQ_LOCK(ifd->ifq_);
        if ((peer = ifd->active_[pri]) != NULL) {
                /* find the last class at this pri */
                cl->peer_ = peer;
                while (peer->peer_ != ifd->active_[pri])
                        peer = peer->peer_;
                peer->peer_ = cl;
        } else {
                ifd->active_[pri] = cl;
                cl->peer_ = cl;
        }

        if (cl->parent_) {
                cl->next_ = parent->children_;
                parent->children_ = cl;
                parent->leaf_ = 0;
        }

        /*
         * Compute the depth of this class and its ancestors in the class
         * hierarchy.
         */
        rmc_depth_compute(cl);

        /*
         * If CBQ's WRR is enabled, then initialize the class WRR state.
         */
        if (ifd->wrr_) {
                ifd->num_[pri]++;
                ifd->alloc_[pri] += cl->allotment_;
                rmc_wrr_set_weights(ifd);
        }
        IFQ_UNLOCK(ifd->ifq_);
        splx(s);
        return (cl);
}

int
rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
    int minidle, u_int offtime, int pktsize)
{
        struct rm_ifdat *ifd;
        u_int            old_allotment;
        int              s;

        ifd = cl->ifdat_;
        old_allotment = cl->allotment_;

        s = splnet();
        IFQ_LOCK(ifd->ifq_);
        cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
        cl->qthresh_ = 0;
        cl->ns_per_byte_ = nsecPerByte;

        qlimit(cl->q_) = maxq;

#if 1 /* minidle is also scaled in ALTQ */
        cl->minidle_ = (minidle * nsecPerByte) / 8;
        if (cl->minidle_ > 0)
                cl->minidle_ = 0;
#else
        cl->minidle_ = minidle;
#endif
        cl->maxidle_ = (maxidle * nsecPerByte) / 8;
        if (cl->maxidle_ == 0)
                cl->maxidle_ = 1;
#if 1 /* offtime is also scaled in ALTQ */
        cl->avgidle_ = cl->maxidle_;
        cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
        if (cl->offtime_ == 0)
                cl->offtime_ = 1;
#else
        cl->avgidle_ = 0;
        cl->offtime_ = (offtime * nsecPerByte) / 8;
#endif

        /*
         * If CBQ's WRR is enabled, then initialize the class WRR state.
         */
        if (ifd->wrr_) {
                ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
                rmc_wrr_set_weights(ifd);
        }
        IFQ_UNLOCK(ifd->ifq_);
        splx(s);
        return (0);
}

/*
 * static void
 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
 *      the appropriate run robin weights for the CBQ weighted round robin
 *      algorithm.
 *
 *      Returns: NONE
 */

static void
rmc_wrr_set_weights(struct rm_ifdat *ifd)
{
        int             i;
        struct rm_class *cl, *clh;

        for (i = 0; i < RM_MAXPRIO; i++) {
                /*
                 * This is inverted from that of the simulator to
                 * maintain precision.
                 */
                if (ifd->num_[i] == 0)
                        ifd->M_[i] = 0;
                else
                        ifd->M_[i] = ifd->alloc_[i] /
                                (ifd->num_[i] * ifd->maxpkt_);
                /*
                 * Compute the weighted allotment for each class.
                 * This takes the expensive div instruction out
                 * of the main loop for the wrr scheduling path.
                 * These only get recomputed when a class comes or
                 * goes.
                 */
                if (ifd->active_[i] != NULL) {
                        clh = cl = ifd->active_[i];
                        do {
                                /* safe-guard for slow link or alloc_ == 0 */
                                if (ifd->M_[i] == 0)
                                        cl->w_allotment_ = 0;
                                else
                                        cl->w_allotment_ = cl->allotment_ /
                                                ifd->M_[i];
                                cl = cl->peer_;
                        } while ((cl != NULL) && (cl != clh));
                }
        }
}

int
rmc_get_weight(struct rm_ifdat *ifd, int pri)
{
        if ((pri >= 0) && (pri < RM_MAXPRIO))
                return (ifd->M_[pri]);
        else
                return (0);
}

/*
 * static void
 * rmc_depth_compute(struct rm_class *cl) - This function computes the
 *      appropriate depth of class 'cl' and its ancestors.
 *
 *      Returns:        NONE
 */

static void
rmc_depth_compute(struct rm_class *cl)
{
        rm_class_t      *t = cl, *p;

        /*
         * Recompute the depth for the branch of the tree.
         */
        while (t != NULL) {
                p = t->parent_;
                if (p && (t->depth_ >= p->depth_)) {
                        p->depth_ = t->depth_ + 1;
                        t = p;
                } else
                        t = NULL;
        }
}

/*
 * static void
 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
 *      the depth of the tree after a class has been deleted.
 *
 *      Returns:        NONE
 */

static void
rmc_depth_recompute(rm_class_t *cl)
{
#if 1 /* ALTQ */
        rm_class_t      *p, *t;

        p = cl;
        while (p != NULL) {
                if ((t = p->children_) == NULL) {
                        p->depth_ = 0;
                } else {
                        int cdepth = 0;

                        while (t != NULL) {
                                if (t->depth_ > cdepth)
                                        cdepth = t->depth_;
                                t = t->next_;
                        }

                        if (p->depth_ == cdepth + 1)
                                /* no change to this parent */
                                return;

                        p->depth_ = cdepth + 1;
                }

                p = p->parent_;
        }
#else
        rm_class_t      *t;

        if (cl->depth_ >= 1) {
                if (cl->children_ == NULL) {
                        cl->depth_ = 0;
                } else if ((t = cl->children_) != NULL) {
                        while (t != NULL) {
                                if (t->children_ != NULL)
                                        rmc_depth_recompute(t);
                                t = t->next_;
                        }
                } else
                        rmc_depth_compute(cl);
        }
#endif
}

/*
 * void
 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
 *      function deletes a class from the link-sharing structure and frees
 *      all resources associated with the class.
 *
 *      Returns: NONE
 */

void
rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
{
        struct rm_class *p, *head, *previous;
        int              s;

        ASSERT(cl->children_ == NULL);

        if (cl->sleeping_)
                CALLOUT_STOP(&cl->callout_);

        s = splnet();
        IFQ_LOCK(ifd->ifq_);
        /*
         * Free packets in the packet queue.
         * XXX - this may not be a desired behavior.  Packets should be
         *              re-queued.
         */
        rmc_dropall(cl);

        /*
         * If the class has a parent, then remove the class from the
         * class from the parent's children chain.
         */
        if (cl->parent_ != NULL) {
                head = cl->parent_->children_;
                p = previous = head;
                if (head->next_ == NULL) {
                        ASSERT(head == cl);
                        cl->parent_->children_ = NULL;
                        cl->parent_->leaf_ = 1;
                } else while (p != NULL) {
                        if (p == cl) {
                                if (cl == head)
                                        cl->parent_->children_ = cl->next_;
                                else
                                        previous->next_ = cl->next_;
                                cl->next_ = NULL;
                                p = NULL;
                        } else {
                                previous = p;
                                p = p->next_;
                        }
                }
        }

        /*
         * Delete class from class priority peer list.
         */
        if ((p = ifd->active_[cl->pri_]) != NULL) {
                /*
                 * If there is more than one member of this priority
                 * level, then look for class(cl) in the priority level.
                 */
                if (p != p->peer_) {
                        while (p->peer_ != cl)
                                p = p->peer_;
                        p->peer_ = cl->peer_;

                        if (ifd->active_[cl->pri_] == cl)
                                ifd->active_[cl->pri_] = cl->peer_;
                } else {
                        ASSERT(p == cl);
                        ifd->active_[cl->pri_] = NULL;
                }
        }

        /*
         * Recompute the WRR weights.
         */
        if (ifd->wrr_) {
                ifd->alloc_[cl->pri_] -= cl->allotment_;
                ifd->num_[cl->pri_]--;
                rmc_wrr_set_weights(ifd);
        }

        /*
         * Re-compute the depth of the tree.
         */
#if 1 /* ALTQ */
        rmc_depth_recompute(cl->parent_);
#else
        rmc_depth_recompute(ifd->root_);
#endif

        IFQ_UNLOCK(ifd->ifq_);
        splx(s);

        /*
         * Free the class structure.
         */
        if (cl->red_ != NULL) {
#ifdef ALTQ_RIO
                if (q_is_rio(cl->q_))
                        rio_destroy((rio_t *)cl->red_);
#endif
#ifdef ALTQ_RED
                if (q_is_red(cl->q_))
                        red_destroy(cl->red_);
#endif
#ifdef ALTQ_CODEL
                if (q_is_codel(cl->q_))
                        codel_destroy(cl->codel_);
#endif
        }
        free(cl->q_, M_DEVBUF);
        free(cl, M_DEVBUF);
}

/*
 * void
 * rmc_init(...) - Initialize the resource management data structures
 *      associated with the output portion of interface 'ifp'.  'ifd' is
 *      where the structures will be built (for backwards compatibility, the
 *      structures aren't kept in the ifnet struct).  'nsecPerByte'
 *      gives the link speed (inverse of bandwidth) in nanoseconds/byte.
 *      'restart' is the driver-specific routine that the generic 'delay
 *      until under limit' action will call to restart output.  `maxq'
 *      is the queue size of the 'link' & 'default' classes.  'maxqueued'
 *      is the maximum number of packets that the resource management
 *      code will allow to be queued 'downstream' (this is typically 1).
 *
 *      Returns:        NONE
 */

void
rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
    void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
    int minidle, u_int offtime, int flags)
{
        int             i, mtu;

        /*
         * Initialize the CBQ tracing/debug facility.
         */
        CBQTRACEINIT();

        bzero((char *)ifd, sizeof (*ifd));
        mtu = ifq->altq_ifp->if_mtu;
        ifd->ifq_ = ifq;
        ifd->restart = restart;
        ifd->maxqueued_ = maxqueued;
        ifd->ns_per_byte_ = nsecPerByte;
        ifd->maxpkt_ = mtu;
        ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
        ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
#if 1
        ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
        if (mtu * nsecPerByte > 10 * 1000000)
                ifd->maxiftime_ /= 4;
#endif

        reset_cutoff(ifd);
        CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);

        /*
         * Initialize the CBQ's WRR state.
         */
        for (i = 0; i < RM_MAXPRIO; i++) {
                ifd->alloc_[i] = 0;
                ifd->M_[i] = 0;
                ifd->num_[i] = 0;
                ifd->na_[i] = 0;
                ifd->active_[i] = NULL;
        }

        /*
         * Initialize current packet state.
         */
        ifd->qi_ = 0;
        ifd->qo_ = 0;
        for (i = 0; i < RM_MAXQUEUED; i++) {
                ifd->class_[i] = NULL;
                ifd->curlen_[i] = 0;
                ifd->borrowed_[i] = NULL;
        }

        /*
         * Create the root class of the link-sharing structure.
         */
        if ((ifd->root_ = rmc_newclass(0, ifd,
                                       nsecPerByte,
                                       rmc_root_overlimit, maxq, 0, 0,
                                       maxidle, minidle, offtime,
                                       0, 0)) == NULL) {
                printf("rmc_init: root class not allocated\n");
                return ;
        }
        ifd->root_->depth_ = 0;
}

/*
 * void
 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
 *      mbuf 'm' to queue for resource class 'cl'.  This routine is called
 *      by a driver's if_output routine.  This routine must be called with
 *      output packet completion interrupts locked out (to avoid racing with
 *      rmc_dequeue_next).
 *
 *      Returns:        0 on successful queueing
 *                      -1 when packet drop occurs
 */
int
rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
{
        struct timeval   now;
        struct rm_ifdat *ifd = cl->ifdat_;
        int              cpri = cl->pri_;
        int              is_empty = qempty(cl->q_);

        RM_GETTIME(now);
        if (ifd->cutoff_ > 0) {
                if (TV_LT(&cl->undertime_, &now)) {
                        if (ifd->cutoff_ > cl->depth_)
                                ifd->cutoff_ = cl->depth_;
                        CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
                }
#if 1 /* ALTQ */
                else {
                        /*
                         * the class is overlimit. if the class has
                         * underlimit ancestors, set cutoff to the lowest
                         * depth among them.
                         */
                        struct rm_class *borrow = cl->borrow_;

                        while (borrow != NULL &&
                               borrow->depth_ < ifd->cutoff_) {
                                if (TV_LT(&borrow->undertime_, &now)) {
                                        ifd->cutoff_ = borrow->depth_;
                                        CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
                                        break;
                                }
                                borrow = borrow->borrow_;
                        }
                }
#else /* !ALTQ */
                else if ((ifd->cutoff_ > 1) && cl->borrow_) {
                        if (TV_LT(&cl->borrow_->undertime_, &now)) {
                                ifd->cutoff_ = cl->borrow_->depth_;
                                CBQTRACE(rmc_queue_packet, 'ffob',
                                         cl->borrow_->depth_);
                        }
                }
#endif /* !ALTQ */
        }

        if (_rmc_addq(cl, m) < 0)
                /* failed */
                return (-1);

        if (is_empty) {
                CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
                ifd->na_[cpri]++;
        }

        if (qlen(cl->q_) > qlimit(cl->q_)) {
                /* note: qlimit can be set to 0 or 1 */
                rmc_drop_action(cl);
                return (-1);
        }
        return (0);
}

/*
 * void
 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
 *      classes to see if there are satified.
 */

static void
rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
{
        int              i;
        rm_class_t      *p, *bp;

        for (i = RM_MAXPRIO - 1; i >= 0; i--) {
                if ((bp = ifd->active_[i]) != NULL) {
                        p = bp;
                        do {
                                if (!rmc_satisfied(p, now)) {
                                        ifd->cutoff_ = p->depth_;
                                        return;
                                }
                                p = p->peer_;
                        } while (p != bp);
                }
        }

        reset_cutoff(ifd);
}

/*
 * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
 */

static int
rmc_satisfied(struct rm_class *cl, struct timeval *now)
{
        rm_class_t      *p;

        if (cl == NULL)
                return (1);
        if (TV_LT(now, &cl->undertime_))
                return (1);
        if (cl->depth_ == 0) {
                if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
                        return (0);
                else
                        return (1);
        }
        if (cl->children_ != NULL) {
                p = cl->children_;
                while (p != NULL) {
                        if (!rmc_satisfied(p, now))
                                return (0);
                        p = p->next_;
                }
        }

        return (1);
}

/*
 * Return 1 if class 'cl' is under limit or can borrow from a parent,
 * 0 if overlimit.  As a side-effect, this routine will invoke the
 * class overlimit action if the class if overlimit.
 */

static int
rmc_under_limit(struct rm_class *cl, struct timeval *now)
{
        rm_class_t      *p = cl;
        rm_class_t      *top;
        struct rm_ifdat *ifd = cl->ifdat_;

        ifd->borrowed_[ifd->qi_] = NULL;
        /*
         * If cl is the root class, then always return that it is
         * underlimit.  Otherwise, check to see if the class is underlimit.
         */
        if (cl->parent_ == NULL)
                return (1);

        if (cl->sleeping_) {
                if (TV_LT(now, &cl->undertime_))
                        return (0);

                CALLOUT_STOP(&cl->callout_);
                cl->sleeping_ = 0;
                cl->undertime_.tv_sec = 0;
                return (1);
        }

        top = NULL;
        while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
                if (((cl = cl->borrow_) == NULL) ||
                    (cl->depth_ > ifd->cutoff_)) {
#ifdef ADJUST_CUTOFF
                        if (cl != NULL)
                                /* cutoff is taking effect, just
                                   return false without calling
                                   the delay action. */
                                return (0);
#endif
#ifdef BORROW_OFFTIME
                        /*
                         * check if the class can borrow offtime too.
                         * borrow offtime from the top of the borrow
                         * chain if the top class is not overloaded.
                         */
                        if (cl != NULL) {
                                /* cutoff is taking effect, use this class as top. */
                                top = cl;
                                CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
                        }
                        if (top != NULL && top->avgidle_ == top->minidle_)
                                top = NULL;
                        p->overtime_ = *now;
                        (p->overlimit)(p, top);
#else
                        p->overtime_ = *now;
                        (p->overlimit)(p, NULL);
#endif
                        return (0);
                }
                top = cl;
        }

        if (cl != p)
                ifd->borrowed_[ifd->qi_] = cl;
        return (1);
}

/*
 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
 *      Packet-by-packet round robin.
 *
 * The heart of the weighted round-robin scheduler, which decides which
 * class next gets to send a packet.  Highest priority first, then
 * weighted round-robin within priorites.
 *
 * Each able-to-send class gets to send until its byte allocation is
 * exhausted.  Thus, the active pointer is only changed after a class has
 * exhausted its allocation.
 *
 * If the scheduler finds no class that is underlimit or able to borrow,
 * then the first class found that had a nonzero queue and is allowed to
 * borrow gets to send.
 */

static mbuf_t *
_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
{
        struct rm_class *cl = NULL, *first = NULL;
        u_int            deficit;
        int              cpri;
        mbuf_t          *m;
        struct timeval   now;

        RM_GETTIME(now);

        /*
         * if the driver polls the top of the queue and then removes
         * the polled packet, we must return the same packet.
         */
        if (op == ALTDQ_REMOVE && ifd->pollcache_) {
                cl = ifd->pollcache_;
                cpri = cl->pri_;
                if (ifd->efficient_) {
                        /* check if this class is overlimit */
                        if (cl->undertime_.tv_sec != 0 &&
                            rmc_under_limit(cl, &now) == 0)
                                first = cl;
                }
                ifd->pollcache_ = NULL;
                goto _wrr_out;
        }
        else {
                /* mode == ALTDQ_POLL || pollcache == NULL */
                ifd->pollcache_ = NULL;
                ifd->borrowed_[ifd->qi_] = NULL;
        }
#ifdef ADJUST_CUTOFF
 _again:
#endif
        for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
                if (ifd->na_[cpri] == 0)
                        continue;
                deficit = 0;
                /*
                 * Loop through twice for a priority level, if some class
                 * was unable to send a packet the first round because
                 * of the weighted round-robin mechanism.
                 * During the second loop at this level, deficit==2.
                 * (This second loop is not needed if for every class,
                 * "M[cl->pri_])" times "cl->allotment" is greater than
                 * the byte size for the largest packet in the class.)
                 */
 _wrr_loop:
                cl = ifd->active_[cpri];
                ASSERT(cl != NULL);
                do {
                        if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
                                cl->bytes_alloc_ += cl->w_allotment_;
                        if (!qempty(cl->q_)) {
                                if ((cl->undertime_.tv_sec == 0) ||
                                    rmc_under_limit(cl, &now)) {
                                        if (cl->bytes_alloc_ > 0 || deficit > 1)
                                                goto _wrr_out;

                                        /* underlimit but no alloc */
                                        deficit = 1;
#if 1
                                        ifd->borrowed_[ifd->qi_] = NULL;
#endif
                                }
                                else if (first == NULL && cl->borrow_ != NULL)
                                        first = cl; /* borrowing candidate */
                        }

                        cl->bytes_alloc_ = 0;
                        cl = cl->peer_;
                } while (cl != ifd->active_[cpri]);

                if (deficit == 1) {
                        /* first loop found an underlimit class with deficit */
                        /* Loop on same priority level, with new deficit.  */
                        deficit = 2;
                        goto _wrr_loop;
                }
        }

#ifdef ADJUST_CUTOFF
        /*
         * no underlimit class found.  if cutoff is taking effect,
         * increase cutoff and try again.
         */
        if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
                ifd->cutoff_++;
                CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
                goto _again;
        }
#endif /* ADJUST_CUTOFF */
        /*
         * If LINK_EFFICIENCY is turned on, then the first overlimit
         * class we encounter will send a packet if all the classes
         * of the link-sharing structure are overlimit.
         */
        reset_cutoff(ifd);
        CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);

        if (!ifd->efficient_ || first == NULL)
                return (NULL);

        cl = first;
        cpri = cl->pri_;
#if 0   /* too time-consuming for nothing */
        if (cl->sleeping_)
                CALLOUT_STOP(&cl->callout_);
        cl->sleeping_ = 0;
        cl->undertime_.tv_sec = 0;
#endif
        ifd->borrowed_[ifd->qi_] = cl->borrow_;
        ifd->cutoff_ = cl->borrow_->depth_;

        /*
         * Deque the packet and do the book keeping...
         */
 _wrr_out:
        if (op == ALTDQ_REMOVE) {
                m = _rmc_getq(cl);
                if (m == NULL)
                        panic("_rmc_wrr_dequeue_next");
                if (qempty(cl->q_))
                        ifd->na_[cpri]--;

                /*
                 * Update class statistics and link data.
                 */
                if (cl->bytes_alloc_ > 0)
                        cl->bytes_alloc_ -= m_pktlen(m);

                if ((cl->bytes_alloc_ <= 0) || first == cl)
                        ifd->active_[cl->pri_] = cl->peer_;
                else
                        ifd->active_[cl->pri_] = cl;

                ifd->class_[ifd->qi_] = cl;
                ifd->curlen_[ifd->qi_] = m_pktlen(m);
                ifd->now_[ifd->qi_] = now;
                ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
                ifd->queued_++;
        } else {
                /* mode == ALTDQ_PPOLL */
                m = _rmc_pollq(cl);
                ifd->pollcache_ = cl;
        }
        return (m);
}

/*
 * Dequeue & return next packet from the highest priority class that
 * has a packet to send & has enough allocation to send it.  This
 * routine is called by a driver whenever it needs a new packet to
 * output.
 */
static mbuf_t *
_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
{
        mbuf_t          *m;
        int              cpri;
        struct rm_class *cl, *first = NULL;
        struct timeval   now;

        RM_GETTIME(now);

        /*
         * if the driver polls the top of the queue and then removes
         * the polled packet, we must return the same packet.
         */
        if (op == ALTDQ_REMOVE && ifd->pollcache_) {
                cl = ifd->pollcache_;
                cpri = cl->pri_;
                ifd->pollcache_ = NULL;
                goto _prr_out;
        } else {
                /* mode == ALTDQ_POLL || pollcache == NULL */
                ifd->pollcache_ = NULL;
                ifd->borrowed_[ifd->qi_] = NULL;
        }
#ifdef ADJUST_CUTOFF
 _again:
#endif
        for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
                if (ifd->na_[cpri] == 0)
                        continue;
                cl = ifd->active_[cpri];
                ASSERT(cl != NULL);
                do {
                        if (!qempty(cl->q_)) {
                                if ((cl->undertime_.tv_sec == 0) ||
                                    rmc_under_limit(cl, &now))
                                        goto _prr_out;
                                if (first == NULL && cl->borrow_ != NULL)
                                        first = cl;
                        }
                        cl = cl->peer_;
                } while (cl != ifd->active_[cpri]);
        }

#ifdef ADJUST_CUTOFF
        /*
         * no underlimit class found.  if cutoff is taking effect, increase
         * cutoff and try again.
         */
        if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
                ifd->cutoff_++;
                goto _again;
        }
#endif /* ADJUST_CUTOFF */
        /*
         * If LINK_EFFICIENCY is turned on, then the first overlimit
         * class we encounter will send a packet if all the classes
         * of the link-sharing structure are overlimit.
         */
        reset_cutoff(ifd);
        if (!ifd->efficient_ || first == NULL)
                return (NULL);

        cl = first;
        cpri = cl->pri_;
#if 0   /* too time-consuming for nothing */
        if (cl->sleeping_)
                CALLOUT_STOP(&cl->callout_);
        cl->sleeping_ = 0;
        cl->undertime_.tv_sec = 0;
#endif
        ifd->borrowed_[ifd->qi_] = cl->borrow_;
        ifd->cutoff_ = cl->borrow_->depth_;

        /*
         * Deque the packet and do the book keeping...
         */
 _prr_out:
        if (op == ALTDQ_REMOVE) {
                m = _rmc_getq(cl);
                if (m == NULL)
                        panic("_rmc_prr_dequeue_next");
                if (qempty(cl->q_))
                        ifd->na_[cpri]--;

                ifd->active_[cpri] = cl->peer_;

                ifd->class_[ifd->qi_] = cl;
                ifd->curlen_[ifd->qi_] = m_pktlen(m);
                ifd->now_[ifd->qi_] = now;
                ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
                ifd->queued_++;
        } else {
                /* mode == ALTDQ_POLL */
                m = _rmc_pollq(cl);
                ifd->pollcache_ = cl;
        }
        return (m);
}

/*
 * mbuf_t *
 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
 *      is invoked by the packet driver to get the next packet to be
 *      dequeued and output on the link.  If WRR is enabled, then the
 *      WRR dequeue next routine will determine the next packet to sent.
 *      Otherwise, packet-by-packet round robin is invoked.
 *
 *      Returns:        NULL, if a packet is not available or if all
 *                      classes are overlimit.
 *
 *                      Otherwise, Pointer to the next packet.
 */

mbuf_t *
rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
{
        if (ifd->queued_ >= ifd->maxqueued_)
                return (NULL);
        else if (ifd->wrr_)
                return (_rmc_wrr_dequeue_next(ifd, mode));
        else
                return (_rmc_prr_dequeue_next(ifd, mode));
}

/*
 * Update the utilization estimate for the packet that just completed.
 * The packet's class & the parent(s) of that class all get their
 * estimators updated.  This routine is called by the driver's output-
 * packet-completion interrupt service routine.
 */

/*
 * a macro to approximate "divide by 1000" that gives 0.000999,
 * if a value has enough effective digits.
 * (on pentium, mul takes 9 cycles but div takes 46!)
 */
#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
void
rmc_update_class_util(struct rm_ifdat *ifd)
{
        int              idle, avgidle, pktlen;
        int              pkt_time, tidle;
        rm_class_t      *cl, *borrowed;
        rm_class_t      *borrows;
        struct timeval  *nowp;

        /*
         * Get the most recent completed class.
         */
        if ((cl = ifd->class_[ifd->qo_]) == NULL)
                return;

        pktlen = ifd->curlen_[ifd->qo_];
        borrowed = ifd->borrowed_[ifd->qo_];
        borrows = borrowed;

        PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);

        /*
         * Run estimator on class and its ancestors.
         */
        /*
         * rm_update_class_util is designed to be called when the
         * transfer is completed from a xmit complete interrupt,
         * but most drivers don't implement an upcall for that.
         * so, just use estimated completion time.
         * as a result, ifd->qi_ and ifd->qo_ are always synced.
         */
        nowp = &ifd->now_[ifd->qo_];
        /* get pkt_time (for link) in usec */
#if 1  /* use approximation */
        pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
        pkt_time = NSEC_TO_USEC(pkt_time);
#else
        pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
#endif
#if 1 /* ALTQ4PPP */
        if (TV_LT(nowp, &ifd->ifnow_)) {
                int iftime;

                /*
                 * make sure the estimated completion time does not go
                 * too far.  it can happen when the link layer supports
                 * data compression or the interface speed is set to
                 * a much lower value.
                 */
                TV_DELTA(&ifd->ifnow_, nowp, iftime);
                if (iftime+pkt_time < ifd->maxiftime_) {
                        TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
                } else {
                        TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
                }
        } else {
                TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
        }
#else
        if (TV_LT(nowp, &ifd->ifnow_)) {
                TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
        } else {
                TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
        }
#endif

        while (cl != NULL) {
                TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
                if (idle >= 2000000)
                        /*
                         * this class is idle enough, reset avgidle.
                         * (TV_DELTA returns 2000000 us when delta is large.)
                         */
                        cl->avgidle_ = cl->maxidle_;

                /* get pkt_time (for class) in usec */
#if 1  /* use approximation */
                pkt_time = pktlen * cl->ns_per_byte_;
                pkt_time = NSEC_TO_USEC(pkt_time);
#else
                pkt_time = pktlen * cl->ns_per_byte_ / 1000;
#endif
                idle -= pkt_time;

                avgidle = cl->avgidle_;
                avgidle += idle - (avgidle >> RM_FILTER_GAIN);
                cl->avgidle_ = avgidle;

                /* Are we overlimit ? */
                if (avgidle <= 0) {
                        CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
#if 1 /* ALTQ */
                        /*
                         * need some lower bound for avgidle, otherwise
                         * a borrowing class gets unbounded penalty.
                         */
                        if (avgidle < cl->minidle_)
                                avgidle = cl->avgidle_ = cl->minidle_;
#endif
                        /* set next idle to make avgidle 0 */
                        tidle = pkt_time +
                                (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
                        TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
                        ++cl->stats_.over;
                } else {
                        cl->avgidle_ =
                            (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
                        cl->undertime_.tv_sec = 0;
                        if (cl->sleeping_) {
                                CALLOUT_STOP(&cl->callout_);
                                cl->sleeping_ = 0;
                        }
                }

                if (borrows != NULL) {
                        if (borrows != cl)
                                ++cl->stats_.borrows;
                        else
                                borrows = NULL;
                }
                cl->last_ = ifd->ifnow_;
                cl->last_pkttime_ = pkt_time;

#if 1
                if (cl->parent_ == NULL) {
                        /* take stats of root class */
                        PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
                }
#endif

                cl = cl->parent_;
        }

        /*
         * Check to see if cutoff needs to set to a new level.
         */
        cl = ifd->class_[ifd->qo_];
        if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
#if 1 /* ALTQ */
                if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
                        rmc_tl_satisfied(ifd, nowp);
                        CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
                } else {
                        ifd->cutoff_ = borrowed->depth_;
                        CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
                }
#else /* !ALTQ */
                if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
                        reset_cutoff(ifd);
#ifdef notdef
                        rmc_tl_satisfied(ifd, &now);
#endif
                        CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
                } else {
                        ifd->cutoff_ = borrowed->depth_;
                        CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
                }
#endif /* !ALTQ */
        }

        /*
         * Release class slot
         */
        ifd->borrowed_[ifd->qo_] = NULL;
        ifd->class_[ifd->qo_] = NULL;
        ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
        ifd->queued_--;
}

/*
 * void
 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
 *      over-limit action routines.  These get invoked by rmc_under_limit()
 *      if a class with packets to send if over its bandwidth limit & can't
 *      borrow from a parent class.
 *
 *      Returns: NONE
 */

static void
rmc_drop_action(struct rm_class *cl)
{
        struct rm_ifdat *ifd = cl->ifdat_;

        ASSERT(qlen(cl->q_) > 0);
        _rmc_dropq(cl);
        if (qempty(cl->q_))
                ifd->na_[cl->pri_]--;
}

void rmc_dropall(struct rm_class *cl)
{
        struct rm_ifdat *ifd = cl->ifdat_;

        if (!qempty(cl->q_)) {
                _flushq(cl->q_);

                ifd->na_[cl->pri_]--;
        }
}

static int
hzto(struct timeval *tv)
{
        struct timeval t2;

        getmicrotime(&t2);
        t2.tv_sec = tv->tv_sec - t2.tv_sec;
        t2.tv_usec = tv->tv_usec - t2.tv_usec;
        return (tvtohz(&t2));
}

/*
 * void
 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
 *      delay action routine.  It is invoked via rmc_under_limit when the
 *      packet is discoverd to be overlimit.
 *
 *      If the delay action is result of borrow class being overlimit, then
 *      delay for the offtime of the borrowing class that is overlimit.
 *
 *      Returns: NONE
 */

void
rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
{
        int     delay, t, extradelay;

        cl->stats_.overactions++;
        TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
#ifndef BORROW_OFFTIME
        delay += cl->offtime_;
#endif

        if (!cl->sleeping_) {
                CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
#ifdef BORROW_OFFTIME
                if (borrow != NULL)
                        extradelay = borrow->offtime_;
                else
#endif
                        extradelay = cl->offtime_;

#ifdef ALTQ
                /*
                 * XXX recalculate suspend time:
                 * current undertime is (tidle + pkt_time) calculated
                 * from the last transmission.
                 *      tidle: time required to bring avgidle back to 0
                 *      pkt_time: target waiting time for this class
                 * we need to replace pkt_time by offtime
                 */
                extradelay -= cl->last_pkttime_;
#endif
                if (extradelay > 0) {
                        TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
                        delay += extradelay;
                }

                cl->sleeping_ = 1;
                cl->stats_.delays++;

                /*
                 * Since packets are phased randomly with respect to the
                 * clock, 1 tick (the next clock tick) can be an arbitrarily
                 * short time so we have to wait for at least two ticks.
                 * NOTE:  If there's no other traffic, we need the timer as
                 * a 'backstop' to restart this class.
                 */
                if (delay > tick * 2) {
                        /* FreeBSD rounds up the tick */
                        t = hzto(&cl->undertime_);
                } else
                        t = 2;
                CALLOUT_RESET(&cl->callout_, t, rmc_restart, cl);
        }
}

/*
 * void
 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
 *      called by the system timer code & is responsible checking if the
 *      class is still sleeping (it might have been restarted as a side
 *      effect of the queue scan on a packet arrival) and, if so, restarting
 *      output for the class.  Inspecting the class state & restarting output
 *      require locking the class structure.  In general the driver is
 *      responsible for locking but this is the only routine that is not
 *      called directly or indirectly from the interface driver so it has
 *      know about system locking conventions.  Under bsd, locking is done
 *      by raising IPL to splimp so that's what's implemented here.  On a
 *      different system this would probably need to be changed.
 *
 *      Returns:        NONE
 */

static void
rmc_restart(void *arg)
{
        struct rm_class *cl = arg;
        struct rm_ifdat *ifd = cl->ifdat_;
        struct epoch_tracker et;
        int              s;

        s = splnet();
        NET_EPOCH_ENTER(et);
        IFQ_LOCK(ifd->ifq_);
        CURVNET_SET(ifd->ifq_->altq_ifp->if_vnet);
        if (cl->sleeping_) {
                cl->sleeping_ = 0;
                cl->undertime_.tv_sec = 0;

                if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
                        CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
                        (ifd->restart)(ifd->ifq_);
                }
        }
        CURVNET_RESTORE();
        IFQ_UNLOCK(ifd->ifq_);
        NET_EPOCH_EXIT(et);
        splx(s);
}

/*
 * void
 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
 *      handling routine for the root class of the link sharing structure.
 *
 *      Returns: NONE
 */

static void
rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
{
    panic("rmc_root_overlimit");
}

/*
 * Packet Queue handling routines.  Eventually, this is to localize the
 *      effects on the code whether queues are red queues or droptail
 *      queues.
 */

static int
_rmc_addq(rm_class_t *cl, mbuf_t *m)
{
#ifdef ALTQ_RIO
        if (q_is_rio(cl->q_))
                return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
#endif
#ifdef ALTQ_RED
        if (q_is_red(cl->q_))
                return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
#endif /* ALTQ_RED */
#ifdef ALTQ_CODEL
        if (q_is_codel(cl->q_))
                return codel_addq(cl->codel_, cl->q_, m);
#endif

        if (cl->flags_ & RMCF_CLEARDSCP)
                write_dsfield(m, cl->pktattr_, 0);

        _addq(cl->q_, m);
        return (0);
}

/* note: _rmc_dropq is not called for red */
static void
_rmc_dropq(rm_class_t *cl)
{
        mbuf_t  *m;

        if ((m = _getq(cl->q_)) != NULL)
                m_freem(m);
}

static mbuf_t *
_rmc_getq(rm_class_t *cl)
{
#ifdef ALTQ_RIO
        if (q_is_rio(cl->q_))
                return rio_getq((rio_t *)cl->red_, cl->q_);
#endif
#ifdef ALTQ_RED
        if (q_is_red(cl->q_))
                return red_getq(cl->red_, cl->q_);
#endif
#ifdef ALTQ_CODEL
        if (q_is_codel(cl->q_))
                return codel_getq(cl->codel_, cl->q_);
#endif
        return _getq(cl->q_);
}

static mbuf_t *
_rmc_pollq(rm_class_t *cl)
{
        return qhead(cl->q_);
}

#ifdef CBQ_TRACE

struct cbqtrace          cbqtrace_buffer[NCBQTRACE+1];
struct cbqtrace         *cbqtrace_ptr = NULL;
int                      cbqtrace_count;

/*
 * DDB hook to trace cbq events:
 *  the last 1024 events are held in a circular buffer.
 *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
 */
void cbqtrace_dump(int);
static char *rmc_funcname(void *);

static struct rmc_funcs {
        void    *func;
        char    *name;
} rmc_funcs[] =
{
        rmc_init,               "rmc_init",
        rmc_queue_packet,       "rmc_queue_packet",
        rmc_under_limit,        "rmc_under_limit",
        rmc_update_class_util,  "rmc_update_class_util",
        rmc_delay_action,       "rmc_delay_action",
        rmc_restart,            "rmc_restart",
        _rmc_wrr_dequeue_next,  "_rmc_wrr_dequeue_next",
        NULL,                   NULL
};

static char *rmc_funcname(void *func)
{
        struct rmc_funcs *fp;

        for (fp = rmc_funcs; fp->func != NULL; fp++)
                if (fp->func == func)
                        return (fp->name);
        return ("unknown");
}

void cbqtrace_dump(int counter)
{
        int      i, *p;
        char    *cp;

        counter = counter % NCBQTRACE;
        p = (int *)&cbqtrace_buffer[counter];

        for (i=0; i<20; i++) {
                printf("[0x%x] ", *p++);
                printf("%s: ", rmc_funcname((void *)*p++));
                cp = (char *)p++;
                printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
                printf("%d\n",*p++);

                if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
                        p = (int *)cbqtrace_buffer;
        }
}
#endif /* CBQ_TRACE */
#endif /* ALTQ_CBQ */

#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \
    defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)
#if !defined(__GNUC__) || defined(ALTQ_DEBUG)

void
_addq(class_queue_t *q, mbuf_t *m)
{
        mbuf_t  *m0;

        if ((m0 = qtail(q)) != NULL)
                m->m_nextpkt = m0->m_nextpkt;
        else
                m0 = m;
        m0->m_nextpkt = m;
        qtail(q) = m;
        qlen(q)++;
}

mbuf_t *
_getq(class_queue_t *q)
{
        mbuf_t  *m, *m0;

        if ((m = qtail(q)) == NULL)
                return (NULL);
        if ((m0 = m->m_nextpkt) != m)
                m->m_nextpkt = m0->m_nextpkt;
        else {
                ASSERT(qlen(q) == 1);
                qtail(q) = NULL;
        }
        qlen(q)--;
        m0->m_nextpkt = NULL;
        return (m0);
}

/* drop a packet at the tail of the queue */
mbuf_t *
_getq_tail(class_queue_t *q)
{
        mbuf_t  *m, *m0, *prev;

        if ((m = m0 = qtail(q)) == NULL)
                return NULL;
        do {
                prev = m0;
                m0 = m0->m_nextpkt;
        } while (m0 != m);
        prev->m_nextpkt = m->m_nextpkt;
        if (prev == m)  {
                ASSERT(qlen(q) == 1);
                qtail(q) = NULL;
        } else
                qtail(q) = prev;
        qlen(q)--;
        m->m_nextpkt = NULL;
        return (m);
}

/* randomly select a packet in the queue */
mbuf_t *
_getq_random(class_queue_t *q)
{
        struct mbuf     *m;
        int              i, n;

        if ((m = qtail(q)) == NULL)
                return NULL;
        if (m->m_nextpkt == m) {
                ASSERT(qlen(q) == 1);
                qtail(q) = NULL;
        } else {
                struct mbuf *prev = NULL;

                n = arc4random() % qlen(q) + 1;
                for (i = 0; i < n; i++) {
                        prev = m;
                        m = m->m_nextpkt;
                }
                prev->m_nextpkt = m->m_nextpkt;
                if (m == qtail(q))
                        qtail(q) = prev;
        }
        qlen(q)--;
        m->m_nextpkt = NULL;
        return (m);
}

void
_removeq(class_queue_t *q, mbuf_t *m)
{
        mbuf_t  *m0, *prev;

        m0 = qtail(q);
        do {
                prev = m0;
                m0 = m0->m_nextpkt;
        } while (m0 != m);
        prev->m_nextpkt = m->m_nextpkt;
        if (prev == m)
                qtail(q) = NULL;
        else if (qtail(q) == m)
                qtail(q) = prev;
        qlen(q)--;
}

void
_flushq(class_queue_t *q)
{
        mbuf_t *m;

        while ((m = _getq(q)) != NULL)
                m_freem(m);
        ASSERT(qlen(q) == 0);
}

#endif /* !__GNUC__ || ALTQ_DEBUG */
#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */