root/sys/netpfil/pf/pf_ioctl.c
/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2001 Daniel Hartmeier
 * Copyright (c) 2002,2003 Henning Brauer
 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *    - Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *    - Redistributions in binary form must reproduce the above
 *      copyright notice, this list of conditions and the following
 *      disclaimer in the documentation and/or other materials provided
 *      with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * Effort sponsored in part by the Defense Advanced Research Projects
 * Agency (DARPA) and Air Force Research Laboratory, Air Force
 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
 *
 *      $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $
 */

#include <sys/cdefs.h>
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_bpf.h"
#include "opt_pf.h"

#include <sys/param.h>
#include <sys/_bitset.h>
#include <sys/bitset.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/endian.h>
#include <sys/fcntl.h>
#include <sys/filio.h>
#include <sys/hash.h>
#include <sys/interrupt.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/nv.h>
#include <sys/proc.h>
#include <sys/sdt.h>
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/md5.h>
#include <sys/ucred.h>

#include <net/if.h>
#include <net/if_var.h>
#include <net/if_private.h>
#include <net/vnet.h>
#include <net/route.h>
#include <net/pfil.h>
#include <net/pfvar.h>
#include <net/if_pfsync.h>
#include <net/if_pflog.h>

#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet6/ip6_var.h>
#include <netinet/ip_icmp.h>
#include <netpfil/pf/pf_nl.h>
#include <netpfil/pf/pf_nv.h>

#ifdef INET6
#include <netinet/ip6.h>
#endif /* INET6 */

#ifdef ALTQ
#include <net/altq/altq.h>
#endif

SDT_PROBE_DEFINE3(pf, ioctl, ioctl, error, "int", "int", "int");
SDT_PROBE_DEFINE3(pf, ioctl, function, error, "char *", "int", "int");
SDT_PROBE_DEFINE2(pf, ioctl, addrule, error, "int", "int");
SDT_PROBE_DEFINE2(pf, ioctl, nvchk, error, "int", "int");

static struct pf_kpool  *pf_get_kpool(const char *, u_int32_t, u_int8_t,
                            u_int32_t, u_int8_t, u_int8_t, u_int8_t, int);

static void              pf_mv_kpool(struct pf_kpalist *, struct pf_kpalist *);
static void              pf_empty_kpool(struct pf_kpalist *);
static int               pfioctl(struct cdev *, u_long, caddr_t, int,
                            struct thread *);
static int               pf_begin_eth(uint32_t *, const char *);
static int               pf_rollback_eth(uint32_t, const char *);
static int               pf_commit_eth(uint32_t, const char *);
static void              pf_free_eth_rule(struct pf_keth_rule *);
#ifdef ALTQ
static int               pf_begin_altq(u_int32_t *);
static int               pf_rollback_altq(u_int32_t);
static int               pf_commit_altq(u_int32_t);
static int               pf_enable_altq(struct pf_altq *);
static int               pf_disable_altq(struct pf_altq *);
static void              pf_qid_unref(uint16_t);
#endif /* ALTQ */
static int               pf_begin_rules(u_int32_t *, int, const char *);
static int               pf_rollback_rules(u_int32_t, int, char *);
static int               pf_setup_pfsync_matching(struct pf_kruleset *);
static void              pf_hash_rule_rolling(MD5_CTX *, struct pf_krule *);
static void              pf_hash_rule(struct pf_krule *);
static void              pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
static int               pf_commit_rules(u_int32_t, int, char *);
static int               pf_addr_setup(struct pf_kruleset *,
                            struct pf_addr_wrap *, sa_family_t);
static void              pf_src_node_copy(const struct pf_ksrc_node *,
                            struct pf_src_node *);
#ifdef ALTQ
static int               pf_export_kaltq(struct pf_altq *,
                            struct pfioc_altq_v1 *, size_t);
static int               pf_import_kaltq(struct pfioc_altq_v1 *,
                            struct pf_altq *, size_t);
#endif /* ALTQ */

static void              pf_statelim_commit(void);
static void              pf_statelim_rollback(void);
static int               pf_sourcelim_check(void);
static void              pf_sourcelim_commit(void);
static void              pf_sourcelim_rollback(void);

VNET_DEFINE(struct pf_krule,    pf_default_rule);

static __inline int             pf_krule_compare(struct pf_krule *,
                                    struct pf_krule *);

RB_GENERATE(pf_krule_global, pf_krule, entry_global, pf_krule_compare);

#ifdef ALTQ
VNET_DEFINE_STATIC(int,         pf_altq_running);
#define V_pf_altq_running       VNET(pf_altq_running)
#endif

#define TAGID_MAX        50000
struct pf_tagname {
        TAILQ_ENTRY(pf_tagname) namehash_entries;
        TAILQ_ENTRY(pf_tagname) taghash_entries;
        char                    name[PF_TAG_NAME_SIZE];
        uint16_t                tag;
        int                     ref;
};

struct pf_tagset {
        TAILQ_HEAD(, pf_tagname)        *namehash;
        TAILQ_HEAD(, pf_tagname)        *taghash;
        unsigned int                     mask;
        uint32_t                         seed;
        BITSET_DEFINE(, TAGID_MAX)       avail;
};

VNET_DEFINE(struct pf_tagset, pf_tags);
#define V_pf_tags       VNET(pf_tags)
static unsigned int     pf_rule_tag_hashsize;
#define PF_RULE_TAG_HASH_SIZE_DEFAULT   128
SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN,
    &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT,
    "Size of pf(4) rule tag hashtable");

#ifdef ALTQ
VNET_DEFINE(struct pf_tagset, pf_qids);
#define V_pf_qids       VNET(pf_qids)
static unsigned int     pf_queue_tag_hashsize;
#define PF_QUEUE_TAG_HASH_SIZE_DEFAULT  128
SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN,
    &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT,
    "Size of pf(4) queue tag hashtable");
#endif
VNET_DEFINE(uma_zone_t,  pf_tag_z);
#define V_pf_tag_z               VNET(pf_tag_z)
static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
MALLOC_DEFINE(M_PF, "pf", "pf(4)");
MALLOC_DEFINE(M_PF_STATE_LIM, "pf_state_lim", "pf(4) state limiter");

#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
#endif

VNET_DEFINE_STATIC(bool, pf_filter_local) = false;
#define V_pf_filter_local       VNET(pf_filter_local)
SYSCTL_BOOL(_net_pf, OID_AUTO, filter_local, CTLFLAG_VNET | CTLFLAG_RW,
    &VNET_NAME(pf_filter_local), false,
    "Enable filtering for packets delivered to local network stack");

#ifdef PF_DEFAULT_TO_DROP
VNET_DEFINE_STATIC(bool, default_to_drop) = true;
#else
VNET_DEFINE_STATIC(bool, default_to_drop);
#endif
#define V_default_to_drop VNET(default_to_drop)
SYSCTL_BOOL(_net_pf, OID_AUTO, default_to_drop, CTLFLAG_RDTUN | CTLFLAG_VNET,
    &VNET_NAME(default_to_drop), false,
    "Make the default rule drop all packets.");

static void              pf_init_tagset(struct pf_tagset *, unsigned int *,
                            unsigned int);
static void              pf_cleanup_tagset(struct pf_tagset *);
static uint16_t          tagname2hashindex(const struct pf_tagset *, const char *);
static uint16_t          tag2hashindex(const struct pf_tagset *, uint16_t);
static u_int16_t         tagname2tag(struct pf_tagset *, const char *, bool);
static void              tag_unref(struct pf_tagset *, u_int16_t);

struct cdev *pf_dev;

/*
 * XXX - These are new and need to be checked when moveing to a new version
 */
static void              pf_clear_all_states(void);
static int               pf_killstates_row(struct pf_kstate_kill *,
                            struct pf_idhash *);
static int               pf_killstates_nv(struct pfioc_nv *);
static int               pf_clearstates_nv(struct pfioc_nv *);
static int               pf_getstate(struct pfioc_nv *);
static int               pf_getstatus(struct pfioc_nv *);
static int               pf_clear_tables(void);
static void              pf_kill_srcnodes(struct pfioc_src_node_kill *);
static int               pf_keepcounters(struct pfioc_nv *);
static void              pf_tbladdr_copyout(struct pf_addr_wrap *);

/*
 * Wrapper functions for pfil(9) hooks
 */
static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp,
    int flags, void *ruleset __unused, struct inpcb *inp);
static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp,
    int flags, void *ruleset __unused, struct inpcb *inp);
#ifdef INET
static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp,
    int flags, void *ruleset __unused, struct inpcb *inp);
static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp,
    int flags, void *ruleset __unused, struct inpcb *inp);
#endif
#ifdef INET6
static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp,
    int flags, void *ruleset __unused, struct inpcb *inp);
static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp,
    int flags, void *ruleset __unused, struct inpcb *inp);
#endif

static void             hook_pf_eth(void);
static void             hook_pf(void);
static void             dehook_pf_eth(void);
static void             dehook_pf(void);
static int              shutdown_pf(void);
static int              pf_load(void);
static void             pf_unload(void *);

static struct cdevsw pf_cdevsw = {
        .d_ioctl =      pfioctl,
        .d_name =       PF_NAME,
        .d_version =    D_VERSION,
};

VNET_DEFINE_STATIC(bool, pf_pfil_hooked);
#define V_pf_pfil_hooked        VNET(pf_pfil_hooked)
VNET_DEFINE_STATIC(bool, pf_pfil_eth_hooked);
#define V_pf_pfil_eth_hooked    VNET(pf_pfil_eth_hooked)

/*
 * We need a flag that is neither hooked nor running to know when
 * the VNET is "valid".  We primarily need this to control (global)
 * external event, e.g., eventhandlers.
 */
VNET_DEFINE(int, pf_vnet_active);
#define V_pf_vnet_active        VNET(pf_vnet_active)

int pf_end_threads;
struct proc *pf_purge_proc;

VNET_DEFINE(struct rmlock, pf_rules_lock);
VNET_DEFINE(struct rmlock, pf_tags_lock);
VNET_DEFINE_STATIC(struct sx, pf_ioctl_lock);
#define V_pf_ioctl_lock         VNET(pf_ioctl_lock)
struct sx                       pf_end_lock;

/* pfsync */
VNET_DEFINE(pfsync_state_import_t *, pfsync_state_import_ptr);
VNET_DEFINE(pfsync_insert_state_t *, pfsync_insert_state_ptr);
VNET_DEFINE(pfsync_update_state_t *, pfsync_update_state_ptr);
VNET_DEFINE(pfsync_delete_state_t *, pfsync_delete_state_ptr);
VNET_DEFINE(pfsync_clear_states_t *, pfsync_clear_states_ptr);
VNET_DEFINE(pfsync_defer_t *, pfsync_defer_ptr);
VNET_DEFINE(pflow_export_state_t *, pflow_export_state_ptr);
pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr;

/* pflog */
pflog_packet_t                  *pflog_packet_ptr = NULL;

/*
 * Copy a user-provided string, returning an error if truncation would occur.
 * Avoid scanning past "sz" bytes in the source string since there's no
 * guarantee that it's nul-terminated.
 */
static int
pf_user_strcpy(char *dst, const char *src, size_t sz)
{
        if (strnlen(src, sz) == sz)
                return (EINVAL);
        (void)strlcpy(dst, src, sz);
        return (0);
}

static void
pfattach_vnet(void)
{
        u_int32_t *my_timeout = V_pf_default_rule.timeout;

        bzero(&V_pf_status, sizeof(V_pf_status));

        pf_initialize();
        pfr_initialize();
        pfi_initialize_vnet();
        pf_normalize_init();
        pf_syncookies_init();

        V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
        V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
        V_pf_limits[PF_LIMIT_ANCHORS].limit = PF_ANCHOR_HIWAT;
        V_pf_limits[PF_LIMIT_ETH_ANCHORS].limit = PF_ANCHOR_HIWAT;

        RB_INIT(&V_pf_anchors);
        pf_init_kruleset(&pf_main_ruleset);

        pf_init_keth(V_pf_keth);

        /* default rule should never be garbage collected */
        V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
        V_pf_default_rule.action = V_default_to_drop ? PF_DROP : PF_PASS;
        V_pf_default_rule.nr = (uint32_t)-1;
        V_pf_default_rule.rtableid = -1;

        pf_counter_u64_init(&V_pf_default_rule.evaluations, M_WAITOK);
        for (int i = 0; i < 2; i++) {
                pf_counter_u64_init(&V_pf_default_rule.packets[i], M_WAITOK);
                pf_counter_u64_init(&V_pf_default_rule.bytes[i], M_WAITOK);
        }
        V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
        V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
        for (pf_sn_types_t sn_type = 0; sn_type<PF_SN_MAX; sn_type++)
                V_pf_default_rule.src_nodes[sn_type] = counter_u64_alloc(M_WAITOK);

        V_pf_default_rule.timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
            M_WAITOK | M_ZERO);

#ifdef PF_WANT_32_TO_64_COUNTER
        V_pf_kifmarker = malloc(sizeof(*V_pf_kifmarker), PFI_MTYPE, M_WAITOK | M_ZERO);
        V_pf_rulemarker = malloc(sizeof(*V_pf_rulemarker), M_PFRULE, M_WAITOK | M_ZERO);
        PF_RULES_WLOCK();
        LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
        LIST_INSERT_HEAD(&V_pf_allrulelist, &V_pf_default_rule, allrulelist);
        V_pf_allrulecount++;
        LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
        PF_RULES_WUNLOCK();
#endif

        /* initialize default timeouts */
        my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
        my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
        my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
        my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
        my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
        my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
        my_timeout[PFTM_SCTP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
        my_timeout[PFTM_SCTP_OPENING] = PFTM_TCP_OPENING_VAL;
        my_timeout[PFTM_SCTP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
        my_timeout[PFTM_SCTP_CLOSING] = PFTM_TCP_CLOSING_VAL;
        my_timeout[PFTM_SCTP_CLOSED] = PFTM_TCP_CLOSED_VAL;
        my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
        my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
        my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
        my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
        my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
        my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
        my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
        my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
        my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
        my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
        my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
        my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
        my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
        my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;

        V_pf_status.debug = PF_DEBUG_URGENT;
        /*
         * XXX This is different than in OpenBSD where reassembly is enabled by
         * defult. In FreeBSD we expect people to still use scrub rules and
         * switch to the new syntax later. Only when they switch they must
         * explicitly enable reassemle. We could change the default once the
         * scrub rule functionality is hopefully removed some day in future.
         */
        V_pf_status.reass = 0;

        V_pf_pfil_hooked = false;
        V_pf_pfil_eth_hooked = false;

        /* XXX do our best to avoid a conflict */
        V_pf_status.hostid = arc4random();

        for (int i = 0; i < PFRES_MAX; i++)
                V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK);
        for (int i = 0; i < KLCNT_MAX; i++)
                V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK);
        for (int i = 0; i < FCNT_MAX; i++)
                pf_counter_u64_init(&V_pf_status.fcounters[i], M_WAITOK);
        for (int i = 0; i < SCNT_MAX; i++)
                V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
        for (int i = 0; i < NCNT_MAX; i++)
                V_pf_status.ncounters[i] = counter_u64_alloc(M_WAITOK);

        if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET,
            INTR_MPSAFE, &V_pf_swi_cookie) != 0)
                /* XXXGL: leaked all above. */
                return;
}

static struct pf_kpool *
pf_get_kpool(const char *anchor, u_int32_t ticket, u_int8_t rule_action,
    u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
    u_int8_t check_ticket, int which)
{
        struct pf_kruleset      *ruleset;
        struct pf_krule         *rule;
        int                      rs_num;

        MPASS(which == PF_RDR || which == PF_NAT || which == PF_RT);

        ruleset = pf_find_kruleset(anchor);
        if (ruleset == NULL)
                return (NULL);
        rs_num = pf_get_ruleset_number(rule_action);
        if (rs_num >= PF_RULESET_MAX)
                return (NULL);
        if (active) {
                if (check_ticket && ticket !=
                    ruleset->rules[rs_num].active.ticket)
                        return (NULL);
                if (r_last)
                        rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
                            pf_krulequeue);
                else
                        rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
        } else {
                if (check_ticket && ticket !=
                    ruleset->rules[rs_num].inactive.ticket)
                        return (NULL);
                if (r_last)
                        rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
                            pf_krulequeue);
                else
                        rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
        }
        if (!r_last) {
                while ((rule != NULL) && (rule->nr != rule_number))
                        rule = TAILQ_NEXT(rule, entries);
        }
        if (rule == NULL)
                return (NULL);

        switch (which) {
        case PF_RDR:
                return (&rule->rdr);
        case PF_NAT:
                return (&rule->nat);
        case PF_RT:
                return (&rule->route);
        default:
                panic("Unknow pool type %d", which);
        }
}

static void
pf_mv_kpool(struct pf_kpalist *poola, struct pf_kpalist *poolb)
{
        struct pf_kpooladdr     *mv_pool_pa;

        while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
                TAILQ_REMOVE(poola, mv_pool_pa, entries);
                TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
        }
}

static void
pf_empty_kpool(struct pf_kpalist *poola)
{
        struct pf_kpooladdr *pa;

        while ((pa = TAILQ_FIRST(poola)) != NULL) {
                switch (pa->addr.type) {
                case PF_ADDR_DYNIFTL:
                        pfi_dynaddr_remove(pa->addr.p.dyn);
                        break;
                case PF_ADDR_TABLE:
                        /* XXX: this could be unfinished pooladdr on pabuf */
                        if (pa->addr.p.tbl != NULL)
                                pfr_detach_table(pa->addr.p.tbl);
                        break;
                }
                if (pa->kif)
                        pfi_kkif_unref(pa->kif);
                TAILQ_REMOVE(poola, pa, entries);
                free(pa, M_PFRULE);
        }
}

static void
pf_unlink_rule_locked(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
{

        PF_RULES_WASSERT();
        PF_UNLNKDRULES_ASSERT();

        TAILQ_REMOVE(rulequeue, rule, entries);

        rule->rule_ref |= PFRULE_REFS;
        TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries);
}

static void
pf_unlink_rule(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
{

        PF_RULES_WASSERT();

        PF_UNLNKDRULES_LOCK();
        pf_unlink_rule_locked(rulequeue, rule);
        PF_UNLNKDRULES_UNLOCK();
}

static void
pf_free_eth_rule(struct pf_keth_rule *rule)
{
        PF_RULES_WASSERT();

        if (rule == NULL)
                return;

        if (rule->tag)
                tag_unref(&V_pf_tags, rule->tag);
        if (rule->match_tag)
                tag_unref(&V_pf_tags, rule->match_tag);
#ifdef ALTQ
        pf_qid_unref(rule->qid);
#endif

        if (rule->bridge_to)
                pfi_kkif_unref(rule->bridge_to);
        if (rule->kif)
                pfi_kkif_unref(rule->kif);

        if (rule->ipsrc.addr.type == PF_ADDR_TABLE)
                pfr_detach_table(rule->ipsrc.addr.p.tbl);
        if (rule->ipdst.addr.type == PF_ADDR_TABLE)
                pfr_detach_table(rule->ipdst.addr.p.tbl);

        counter_u64_free(rule->evaluations);
        for (int i = 0; i < 2; i++) {
                counter_u64_free(rule->packets[i]);
                counter_u64_free(rule->bytes[i]);
        }
        uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp);
        pf_keth_anchor_remove(rule);

        free(rule, M_PFRULE);
}

void
pf_free_rule(struct pf_krule *rule)
{

        PF_RULES_WASSERT();
        PF_CONFIG_ASSERT();

        if (rule->tag)
                tag_unref(&V_pf_tags, rule->tag);
        if (rule->match_tag)
                tag_unref(&V_pf_tags, rule->match_tag);
#ifdef ALTQ
        if (rule->pqid != rule->qid)
                pf_qid_unref(rule->pqid);
        pf_qid_unref(rule->qid);
#endif
        switch (rule->src.addr.type) {
        case PF_ADDR_DYNIFTL:
                pfi_dynaddr_remove(rule->src.addr.p.dyn);
                break;
        case PF_ADDR_TABLE:
                pfr_detach_table(rule->src.addr.p.tbl);
                break;
        }
        switch (rule->dst.addr.type) {
        case PF_ADDR_DYNIFTL:
                pfi_dynaddr_remove(rule->dst.addr.p.dyn);
                break;
        case PF_ADDR_TABLE:
                pfr_detach_table(rule->dst.addr.p.tbl);
                break;
        }
        if (rule->overload_tbl)
                pfr_detach_table(rule->overload_tbl);
        if (rule->kif)
                pfi_kkif_unref(rule->kif);
        if (rule->rcv_kif)
                pfi_kkif_unref(rule->rcv_kif);
        pf_remove_kanchor(rule);
        pf_empty_kpool(&rule->rdr.list);
        pf_empty_kpool(&rule->nat.list);
        pf_empty_kpool(&rule->route.list);

        pf_krule_free(rule);
}

static void
pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size,
    unsigned int default_size)
{
        unsigned int i;
        unsigned int hashsize;

        if (*tunable_size == 0 || !powerof2(*tunable_size))
                *tunable_size = default_size;

        hashsize = *tunable_size;
        ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH,
            M_WAITOK);
        ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH,
            M_WAITOK);
        ts->mask = hashsize - 1;
        ts->seed = arc4random();
        for (i = 0; i < hashsize; i++) {
                TAILQ_INIT(&ts->namehash[i]);
                TAILQ_INIT(&ts->taghash[i]);
        }
        BIT_FILL(TAGID_MAX, &ts->avail);
}

static void
pf_cleanup_tagset(struct pf_tagset *ts)
{
        unsigned int i;
        unsigned int hashsize;
        struct pf_tagname *t, *tmp;

        /*
         * Only need to clean up one of the hashes as each tag is hashed
         * into each table.
         */
        hashsize = ts->mask + 1;
        for (i = 0; i < hashsize; i++)
                TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp)
                        uma_zfree(V_pf_tag_z, t);

        free(ts->namehash, M_PFHASH);
        free(ts->taghash, M_PFHASH);
}

static uint16_t
tagname2hashindex(const struct pf_tagset *ts, const char *tagname)
{
        size_t len;

        len = strnlen(tagname, PF_TAG_NAME_SIZE - 1);
        return (murmur3_32_hash(tagname, len, ts->seed) & ts->mask);
}

static uint16_t
tag2hashindex(const struct pf_tagset *ts, uint16_t tag)
{

        return (tag & ts->mask);
}

static u_int16_t
tagname2tag(struct pf_tagset *ts, const char *tagname, bool add_new)
{
        struct pf_tagname       *tag;
        u_int32_t                index;
        u_int16_t                new_tagid;

        PF_TAGS_RLOCK_TRACKER;

        PF_TAGS_RLOCK();

        index = tagname2hashindex(ts, tagname);
        TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries)
                if (strcmp(tagname, tag->name) == 0) {
                        tag->ref++;
                        new_tagid = tag->tag;
                        PF_TAGS_RUNLOCK();
                        return (new_tagid);
                }

        /*
         * When used for pfsync with queues we must not create new entries.
         * Pf tags can be created just fine by this function, but queues
         * require additional configuration. If they are missing on the target
         * system we just ignore them
         */
        if (add_new == false) {
                printf("%s: Not creating a new tag\n", __func__);
                PF_TAGS_RUNLOCK();
                return (0);
        }

        /*
         * If a new entry must be created do it under a write lock.
         * But first search again, somebody could have created the tag
         * between unlocking the read lock and locking the write lock.
         */
        PF_TAGS_RUNLOCK();
        PF_TAGS_WLOCK();
        TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries)
                if (strcmp(tagname, tag->name) == 0) {
                        tag->ref++;
                        new_tagid = tag->tag;
                        PF_TAGS_WUNLOCK();
                        return (new_tagid);
                }

        /*
         * new entry
         *
         * to avoid fragmentation, we do a linear search from the beginning
         * and take the first free slot we find.
         */
        new_tagid = BIT_FFS(TAGID_MAX, &ts->avail);
        /*
         * Tags are 1-based, with valid tags in the range [1..TAGID_MAX].
         * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits
         * set.  It may also return a bit number greater than TAGID_MAX due
         * to rounding of the number of bits in the vector up to a multiple
         * of the vector word size at declaration/allocation time.
         */
        if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) {
                PF_TAGS_WUNLOCK();
                return (0);
        }

        /* Mark the tag as in use.  Bits are 0-based for BIT_CLR() */
        BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail);

        /* allocate and fill new struct pf_tagname */
        tag = uma_zalloc(V_pf_tag_z, M_NOWAIT);
        if (tag == NULL) {
                PF_TAGS_WUNLOCK();
                return (0);
        }
        strlcpy(tag->name, tagname, sizeof(tag->name));
        tag->tag = new_tagid;
        tag->ref = 1;

        /* Insert into namehash */
        TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries);

        /* Insert into taghash */
        index = tag2hashindex(ts, new_tagid);
        TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries);

        PF_TAGS_WUNLOCK();
        return (new_tagid);
}

static char *
tag2tagname(struct pf_tagset *ts, u_int16_t tag)
{
        struct pf_tagname       *t;
        uint16_t                 index;

        PF_TAGS_RLOCK_TRACKER;

        PF_TAGS_RLOCK();

        index = tag2hashindex(ts, tag);
        TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries)
                if (tag == t->tag) {
                        PF_TAGS_RUNLOCK();
                        return (t->name);
                }

        PF_TAGS_RUNLOCK();
        return (NULL);
}

static void
tag_unref(struct pf_tagset *ts, u_int16_t tag)
{
        struct pf_tagname       *t;
        uint16_t                 index;

        PF_TAGS_WLOCK();

        index = tag2hashindex(ts, tag);
        TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries)
                if (tag == t->tag) {
                        if (--t->ref == 0) {
                                TAILQ_REMOVE(&ts->taghash[index], t,
                                    taghash_entries);
                                index = tagname2hashindex(ts, t->name);
                                TAILQ_REMOVE(&ts->namehash[index], t,
                                    namehash_entries);
                                /* Bits are 0-based for BIT_SET() */
                                BIT_SET(TAGID_MAX, tag - 1, &ts->avail);
                                uma_zfree(V_pf_tag_z, t);
                        }
                        break;
                }

        PF_TAGS_WUNLOCK();
}

uint16_t
pf_tagname2tag(const char *tagname)
{
        return (tagname2tag(&V_pf_tags, tagname, true));
}

static const char *
pf_tag2tagname(uint16_t tag)
{
        return (tag2tagname(&V_pf_tags, tag));
}

static int
pf_begin_eth(uint32_t *ticket, const char *anchor)
{
        struct pf_keth_rule *rule, *tmp;
        struct pf_keth_ruleset *rs;

        PF_RULES_WASSERT();

        rs = pf_find_or_create_keth_ruleset(anchor);
        if (rs == NULL)
                return (EINVAL);

        /* Purge old inactive rules. */
        TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries,
            tmp) {
                TAILQ_REMOVE(rs->inactive.rules, rule,
                    entries);
                pf_free_eth_rule(rule);
        }

        *ticket = ++rs->inactive.ticket;
        rs->inactive.open = 1;

        return (0);
}

static int
pf_rollback_eth(uint32_t ticket, const char *anchor)
{
        struct pf_keth_rule *rule, *tmp;
        struct pf_keth_ruleset *rs;

        PF_RULES_WASSERT();

        rs = pf_find_keth_ruleset(anchor);
        if (rs == NULL)
                return (EINVAL);

        if (!rs->inactive.open ||
            ticket != rs->inactive.ticket)
                return (0);

        /* Purge old inactive rules. */
        TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries,
            tmp) {
                TAILQ_REMOVE(rs->inactive.rules, rule, entries);
                pf_free_eth_rule(rule);
        }

        rs->inactive.open = 0;

        pf_remove_if_empty_keth_ruleset(rs);

        return (0);
}

#define PF_SET_SKIP_STEPS(i)                                    \
        do {                                                    \
                while (head[i] != cur) {                        \
                        head[i]->skip[i].ptr = cur;             \
                        head[i] = TAILQ_NEXT(head[i], entries); \
                }                                               \
        } while (0)

static void
pf_eth_calc_skip_steps(struct pf_keth_ruleq *rules)
{
        struct pf_keth_rule *cur, *prev, *head[PFE_SKIP_COUNT];
        int i;

        cur = TAILQ_FIRST(rules);
        prev = cur;
        for (i = 0; i < PFE_SKIP_COUNT; ++i)
                head[i] = cur;
        while (cur != NULL) {
                if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
                        PF_SET_SKIP_STEPS(PFE_SKIP_IFP);
                if (cur->direction != prev->direction)
                        PF_SET_SKIP_STEPS(PFE_SKIP_DIR);
                if (cur->proto != prev->proto)
                        PF_SET_SKIP_STEPS(PFE_SKIP_PROTO);
                if (memcmp(&cur->src, &prev->src, sizeof(cur->src)) != 0)
                        PF_SET_SKIP_STEPS(PFE_SKIP_SRC_ADDR);
                if (memcmp(&cur->dst, &prev->dst, sizeof(cur->dst)) != 0)
                        PF_SET_SKIP_STEPS(PFE_SKIP_DST_ADDR);
                if (cur->ipsrc.neg != prev->ipsrc.neg ||
                    pf_addr_wrap_neq(&cur->ipsrc.addr, &prev->ipsrc.addr))
                        PF_SET_SKIP_STEPS(PFE_SKIP_SRC_IP_ADDR);
                if (cur->ipdst.neg != prev->ipdst.neg ||
                    pf_addr_wrap_neq(&cur->ipdst.addr, &prev->ipdst.addr))
                        PF_SET_SKIP_STEPS(PFE_SKIP_DST_IP_ADDR);

                prev = cur;
                cur = TAILQ_NEXT(cur, entries);
        }
        for (i = 0; i < PFE_SKIP_COUNT; ++i)
                PF_SET_SKIP_STEPS(i);
}

static int
pf_commit_eth(uint32_t ticket, const char *anchor)
{
        struct pf_keth_ruleq *rules;
        struct pf_keth_ruleset *rs;

        rs = pf_find_keth_ruleset(anchor);
        if (rs == NULL) {
                return (EINVAL);
        }

        if (!rs->inactive.open ||
            ticket != rs->inactive.ticket)
                return (EBUSY);

        PF_RULES_WASSERT();

        pf_eth_calc_skip_steps(rs->inactive.rules);

        rules = rs->active.rules;
        atomic_store_ptr(&rs->active.rules, rs->inactive.rules);
        rs->inactive.rules = rules;
        rs->inactive.ticket = rs->active.ticket;

        return (pf_rollback_eth(rs->inactive.ticket,
            rs->anchor ? rs->anchor->path : ""));
}

#ifdef ALTQ
uint16_t
pf_qname2qid(const char *qname, bool add_new)
{
        return (tagname2tag(&V_pf_qids, qname, add_new));
}

static void
pf_qid_unref(uint16_t qid)
{
        tag_unref(&V_pf_qids, qid);
}

static int
pf_begin_altq(u_int32_t *ticket)
{
        struct pf_altq  *altq, *tmp;
        int              error = 0;

        PF_RULES_WASSERT();

        /* Purge the old altq lists */
        TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
                if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
                        /* detach and destroy the discipline */
                        error = altq_remove(altq);
                }
                free(altq, M_PFALTQ);
        }
        TAILQ_INIT(V_pf_altq_ifs_inactive);
        TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
                pf_qid_unref(altq->qid);
                free(altq, M_PFALTQ);
        }
        TAILQ_INIT(V_pf_altqs_inactive);
        if (error)
                return (error);
        *ticket = ++V_ticket_altqs_inactive;
        V_altqs_inactive_open = 1;
        return (0);
}

static int
pf_rollback_altq(u_int32_t ticket)
{
        struct pf_altq  *altq, *tmp;
        int              error = 0;

        PF_RULES_WASSERT();

        if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
                return (0);
        /* Purge the old altq lists */
        TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
                if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
                        /* detach and destroy the discipline */
                        error = altq_remove(altq);
                }
                free(altq, M_PFALTQ);
        }
        TAILQ_INIT(V_pf_altq_ifs_inactive);
        TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
                pf_qid_unref(altq->qid);
                free(altq, M_PFALTQ);
        }
        TAILQ_INIT(V_pf_altqs_inactive);
        V_altqs_inactive_open = 0;
        return (error);
}

static int
pf_commit_altq(u_int32_t ticket)
{
        struct pf_altqqueue     *old_altqs, *old_altq_ifs;
        struct pf_altq          *altq, *tmp;
        int                      err, error = 0;

        PF_RULES_WASSERT();

        if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
                return (EBUSY);

        /* swap altqs, keep the old. */
        old_altqs = V_pf_altqs_active;
        old_altq_ifs = V_pf_altq_ifs_active;
        V_pf_altqs_active = V_pf_altqs_inactive;
        V_pf_altq_ifs_active = V_pf_altq_ifs_inactive;
        V_pf_altqs_inactive = old_altqs;
        V_pf_altq_ifs_inactive = old_altq_ifs;
        V_ticket_altqs_active = V_ticket_altqs_inactive;

        /* Attach new disciplines */
        TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
                if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
                        /* attach the discipline */
                        error = altq_pfattach(altq);
                        if (error == 0 && V_pf_altq_running)
                                error = pf_enable_altq(altq);
                        if (error != 0)
                                return (error);
                }
        }

        /* Purge the old altq lists */
        TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
                if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
                        /* detach and destroy the discipline */
                        if (V_pf_altq_running)
                                error = pf_disable_altq(altq);
                        err = altq_pfdetach(altq);
                        if (err != 0 && error == 0)
                                error = err;
                        err = altq_remove(altq);
                        if (err != 0 && error == 0)
                                error = err;
                }
                free(altq, M_PFALTQ);
        }
        TAILQ_INIT(V_pf_altq_ifs_inactive);
        TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
                pf_qid_unref(altq->qid);
                free(altq, M_PFALTQ);
        }
        TAILQ_INIT(V_pf_altqs_inactive);

        V_altqs_inactive_open = 0;
        return (error);
}

static int
pf_enable_altq(struct pf_altq *altq)
{
        struct ifnet            *ifp;
        struct tb_profile        tb;
        int                      error = 0;

        if ((ifp = ifunit(altq->ifname)) == NULL)
                return (EINVAL);

        if (ifp->if_snd.altq_type != ALTQT_NONE)
                error = altq_enable(&ifp->if_snd);

        /* set tokenbucket regulator */
        if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
                tb.rate = altq->ifbandwidth;
                tb.depth = altq->tbrsize;
                error = tbr_set(&ifp->if_snd, &tb);
        }

        return (error);
}

static int
pf_disable_altq(struct pf_altq *altq)
{
        struct ifnet            *ifp;
        struct tb_profile        tb;
        int                      error;

        if ((ifp = ifunit(altq->ifname)) == NULL)
                return (EINVAL);

        /*
         * when the discipline is no longer referenced, it was overridden
         * by a new one.  if so, just return.
         */
        if (altq->altq_disc != ifp->if_snd.altq_disc)
                return (0);

        error = altq_disable(&ifp->if_snd);

        if (error == 0) {
                /* clear tokenbucket regulator */
                tb.rate = 0;
                error = tbr_set(&ifp->if_snd, &tb);
        }

        return (error);
}

static int
pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket,
    struct pf_altq *altq)
{
        struct ifnet    *ifp1;
        int              error = 0;

        /* Deactivate the interface in question */
        altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
        if ((ifp1 = ifunit(altq->ifname)) == NULL ||
            (remove && ifp1 == ifp)) {
                altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
        } else {
                error = altq_add(ifp1, altq);

                if (ticket != V_ticket_altqs_inactive)
                        error = EBUSY;

                if (error)
                        free(altq, M_PFALTQ);
        }

        return (error);
}

void
pf_altq_ifnet_event(struct ifnet *ifp, int remove)
{
        struct pf_altq  *a1, *a2, *a3;
        u_int32_t        ticket;
        int              error = 0;

        /*
         * No need to re-evaluate the configuration for events on interfaces
         * that do not support ALTQ, as it's not possible for such
         * interfaces to be part of the configuration.
         */
        if (!ALTQ_IS_READY(&ifp->if_snd))
                return;

        /* Interrupt userland queue modifications */
        if (V_altqs_inactive_open)
                pf_rollback_altq(V_ticket_altqs_inactive);

        /* Start new altq ruleset */
        if (pf_begin_altq(&ticket))
                return;

        /* Copy the current active set */
        TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) {
                a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
                if (a2 == NULL) {
                        error = ENOMEM;
                        break;
                }
                bcopy(a1, a2, sizeof(struct pf_altq));

                error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
                if (error)
                        break;

                TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries);
        }
        if (error)
                goto out;
        TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
                a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
                if (a2 == NULL) {
                        error = ENOMEM;
                        break;
                }
                bcopy(a1, a2, sizeof(struct pf_altq));

                if ((a2->qid = pf_qname2qid(a2->qname, true)) == 0) {
                        error = EBUSY;
                        free(a2, M_PFALTQ);
                        break;
                }
                a2->altq_disc = NULL;
                TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) {
                        if (strncmp(a3->ifname, a2->ifname,
                                IFNAMSIZ) == 0) {
                                a2->altq_disc = a3->altq_disc;
                                break;
                        }
                }
                error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
                if (error)
                        break;

                TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
        }

out:
        if (error != 0)
                pf_rollback_altq(ticket);
        else
                pf_commit_altq(ticket);
}
#endif /* ALTQ */

static struct pf_krule_global *
pf_rule_tree_alloc(int flags)
{
        struct pf_krule_global *tree;

        tree = malloc(sizeof(struct pf_krule_global), M_PF, flags);
        if (tree == NULL)
                return (NULL);
        RB_INIT(tree);
        return (tree);
}

void
pf_rule_tree_free(struct pf_krule_global *tree)
{

        free(tree, M_PF);
}

static int
pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
{
        struct pf_krule_global *tree;
        struct pf_kruleset      *rs;
        struct pf_krule         *rule;

        PF_RULES_WASSERT();

        if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
                return (EINVAL);
        tree = pf_rule_tree_alloc(M_NOWAIT);
        if (tree == NULL)
                return (ENOMEM);
        rs = pf_find_or_create_kruleset(anchor);
        if (rs == NULL) {
                pf_rule_tree_free(tree);
                return (EINVAL);
        }
        pf_rule_tree_free(rs->rules[rs_num].inactive.tree);
        rs->rules[rs_num].inactive.tree = tree;

        while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
                pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
                rs->rules[rs_num].inactive.rcount--;
        }
        *ticket = ++rs->rules[rs_num].inactive.ticket;
        rs->rules[rs_num].inactive.open = 1;
        return (0);
}

static int
pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
{
        struct pf_kruleset      *rs;
        struct pf_krule         *rule;

        PF_RULES_WASSERT();

        if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
                return (EINVAL);
        rs = pf_find_kruleset(anchor);
        if (rs == NULL || !rs->rules[rs_num].inactive.open ||
            rs->rules[rs_num].inactive.ticket != ticket)
                return (0);
        while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
                pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
                rs->rules[rs_num].inactive.rcount--;
        }
        rs->rules[rs_num].inactive.open = 0;

        if (anchor[0])
                return (0);

        pf_statelim_rollback();
        pf_sourcelim_rollback();
        return (0);
}

#define PF_MD5_UPD(st, elm)                                             \
                MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))

#define PF_MD5_UPD_STR(st, elm)                                         \
                MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))

#define PF_MD5_UPD_HTONL(st, elm, stor) do {                            \
                (stor) = htonl((st)->elm);                              \
                MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
} while (0)

#define PF_MD5_UPD_HTONS(st, elm, stor) do {                            \
                (stor) = htons((st)->elm);                              \
                MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
} while (0)

static void
pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
{
        PF_MD5_UPD(pfr, addr.type);
        switch (pfr->addr.type) {
                case PF_ADDR_DYNIFTL:
                        PF_MD5_UPD(pfr, addr.v.ifname);
                        PF_MD5_UPD(pfr, addr.iflags);
                        break;
                case PF_ADDR_TABLE:
                        if (strncmp(pfr->addr.v.tblname, PF_OPTIMIZER_TABLE_PFX,
                            strlen(PF_OPTIMIZER_TABLE_PFX)))
                                PF_MD5_UPD(pfr, addr.v.tblname);
                        break;
                case PF_ADDR_ADDRMASK:
                case PF_ADDR_RANGE:
                        /* XXX ignore af? */
                        PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
                        PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
                        break;
                case PF_ADDR_NONE:
                case PF_ADDR_NOROUTE:
                case PF_ADDR_URPFFAILED:
                        /* These do not use any address data. */
                        break;
                default:
                        panic("Unknown address type %d", pfr->addr.type);
        }

        PF_MD5_UPD(pfr, port[0]);
        PF_MD5_UPD(pfr, port[1]);
        PF_MD5_UPD(pfr, neg);
        PF_MD5_UPD(pfr, port_op);
}

static void
pf_hash_pool(MD5_CTX *ctx, struct pf_kpool *pool)
{
        uint16_t x;
        int y;

        if (pool->cur) {
                PF_MD5_UPD(pool, cur->addr);
                PF_MD5_UPD_STR(pool, cur->ifname);
                PF_MD5_UPD(pool, cur->af);
        }
        PF_MD5_UPD(pool, key);
        PF_MD5_UPD(pool, counter);

        PF_MD5_UPD(pool, mape.offset);
        PF_MD5_UPD(pool, mape.psidlen);
        PF_MD5_UPD_HTONS(pool, mape.psid, x);
        PF_MD5_UPD_HTONL(pool, tblidx, y);
        PF_MD5_UPD_HTONS(pool, proxy_port[0], x);
        PF_MD5_UPD_HTONS(pool, proxy_port[1], x);
        PF_MD5_UPD(pool, opts);
        PF_MD5_UPD(pool, ipv6_nexthop_af);
}

static void
pf_hash_rule_rolling(MD5_CTX *ctx, struct pf_krule *rule)
{
        u_int16_t x;
        u_int32_t y;

        pf_hash_rule_addr(ctx, &rule->src);
        pf_hash_rule_addr(ctx, &rule->dst);
        for (int i = 0; i < PF_RULE_MAX_LABEL_COUNT; i++)
                PF_MD5_UPD_STR(rule, label[i]);
        PF_MD5_UPD_HTONL(rule, ridentifier, y);
        PF_MD5_UPD_STR(rule, ifname);
        PF_MD5_UPD_STR(rule, rcv_ifname);
        PF_MD5_UPD_STR(rule, qname);
        PF_MD5_UPD_STR(rule, pqname);
        PF_MD5_UPD_STR(rule, tagname);
        PF_MD5_UPD_STR(rule, match_tagname);

        PF_MD5_UPD_STR(rule, overload_tblname);

        pf_hash_pool(ctx, &rule->nat);
        pf_hash_pool(ctx, &rule->rdr);
        pf_hash_pool(ctx, &rule->route);
        PF_MD5_UPD_HTONL(rule, pktrate.limit, y);
        PF_MD5_UPD_HTONL(rule, pktrate.seconds, y);

        PF_MD5_UPD_HTONL(rule, os_fingerprint, y);

        PF_MD5_UPD_HTONL(rule, rtableid, y);
        for (int i = 0; i < PFTM_MAX; i++)
                PF_MD5_UPD_HTONL(rule, timeout[i], y);
        PF_MD5_UPD_HTONL(rule, max_states, y);
        PF_MD5_UPD_HTONL(rule, max_src_nodes, y);
        PF_MD5_UPD_HTONL(rule, max_src_states, y);
        PF_MD5_UPD_HTONL(rule, max_src_conn, y);
        PF_MD5_UPD_HTONL(rule, max_src_conn_rate.limit, y);
        PF_MD5_UPD_HTONL(rule, max_src_conn_rate.seconds, y);
        PF_MD5_UPD_HTONS(rule, max_pkt_size, y);
        PF_MD5_UPD_HTONS(rule, qid, x);
        PF_MD5_UPD_HTONS(rule, pqid, x);
        PF_MD5_UPD_HTONS(rule, dnpipe, x);
        PF_MD5_UPD_HTONS(rule, dnrpipe, x);
        PF_MD5_UPD_HTONL(rule, free_flags, y);
        PF_MD5_UPD_HTONL(rule, prob, y);

        PF_MD5_UPD_HTONS(rule, return_icmp, x);
        PF_MD5_UPD_HTONS(rule, return_icmp6, x);
        PF_MD5_UPD_HTONS(rule, max_mss, x);
        PF_MD5_UPD_HTONS(rule, tag, x); /* dup? */
        PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
        PF_MD5_UPD_HTONS(rule, scrub_flags, x);

        PF_MD5_UPD(rule, uid.op);
        PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
        PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
        PF_MD5_UPD(rule, gid.op);
        PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
        PF_MD5_UPD_HTONL(rule, gid.gid[1], y);

        PF_MD5_UPD_HTONL(rule, rule_flag, y);
        PF_MD5_UPD_HTONL(rule, rule_ref, y);
        PF_MD5_UPD(rule, action);
        PF_MD5_UPD(rule, direction);
        PF_MD5_UPD(rule, log);
        PF_MD5_UPD(rule, logif);
        PF_MD5_UPD(rule, quick);
        PF_MD5_UPD(rule, ifnot);
        PF_MD5_UPD(rule, match_tag_not);
        PF_MD5_UPD(rule, natpass);

        PF_MD5_UPD(rule, keep_state);
        PF_MD5_UPD(rule, af);
        PF_MD5_UPD(rule, proto);
        PF_MD5_UPD_HTONS(rule, type, x);
        PF_MD5_UPD_HTONS(rule, code, x);
        PF_MD5_UPD(rule, flags);
        PF_MD5_UPD(rule, flagset);
        PF_MD5_UPD(rule, min_ttl);
        PF_MD5_UPD(rule, allow_opts);
        PF_MD5_UPD(rule, rt);
        PF_MD5_UPD(rule, return_ttl);
        PF_MD5_UPD(rule, tos);
        PF_MD5_UPD(rule, set_tos);
        PF_MD5_UPD(rule, anchor_relative);
        PF_MD5_UPD(rule, anchor_wildcard);

        PF_MD5_UPD(rule, flush);
        PF_MD5_UPD(rule, prio);
        PF_MD5_UPD(rule, set_prio[0]);
        PF_MD5_UPD(rule, set_prio[1]);
        PF_MD5_UPD(rule, naf);
        PF_MD5_UPD(rule, rcvifnot);
        PF_MD5_UPD(rule, statelim.id);
        PF_MD5_UPD_HTONL(rule, statelim.limiter_action, y);
        PF_MD5_UPD(rule, sourcelim.id);
        PF_MD5_UPD_HTONL(rule, sourcelim.limiter_action, y);

        PF_MD5_UPD(rule, divert.addr);
        PF_MD5_UPD_HTONS(rule, divert.port, x);

        if (rule->anchor != NULL)
                PF_MD5_UPD_STR(rule, anchor->path);
}

static void
pf_hash_rule(struct pf_krule *rule)
{
        MD5_CTX         ctx;

        MD5Init(&ctx);
        pf_hash_rule_rolling(&ctx, rule);
        MD5Final(rule->md5sum, &ctx);
}

static int
pf_krule_compare(struct pf_krule *a, struct pf_krule *b)
{

        return (memcmp(a->md5sum, b->md5sum, PF_MD5_DIGEST_LENGTH));
}

static int
pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
{
        struct pf_kruleset      *rs;
        struct pf_krule         *rule, *old_rule;
        struct pf_krulequeue    *old_rules;
        struct pf_krule_global  *old_tree;
        int                      error;
        u_int32_t                old_rcount;
        bool                     is_main_ruleset = anchor[0] == '\0';

        PF_RULES_WASSERT();

        if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
                return (EINVAL);
        rs = pf_find_kruleset(anchor);
        if (rs == NULL || !rs->rules[rs_num].inactive.open ||
            ticket != rs->rules[rs_num].inactive.ticket)
                return (EBUSY);

        /* Calculate checksum for the main ruleset */
        if (rs == &pf_main_ruleset) {
                error = pf_sourcelim_check();
                if (error != 0)
                        return (error);
                error = pf_setup_pfsync_matching(rs);
                if (error != 0)
                        return (error);
        }

        /* Swap rules, keep the old. */
        old_rules = rs->rules[rs_num].active.ptr;
        old_rcount = rs->rules[rs_num].active.rcount;
        old_tree = rs->rules[rs_num].active.tree;

        rs->rules[rs_num].active.ptr =
            rs->rules[rs_num].inactive.ptr;
        rs->rules[rs_num].active.tree =
            rs->rules[rs_num].inactive.tree;
        rs->rules[rs_num].active.rcount =
            rs->rules[rs_num].inactive.rcount;

        /* Attempt to preserve counter information. */
        if (V_pf_status.keep_counters && old_tree != NULL) {
                TAILQ_FOREACH(rule, rs->rules[rs_num].active.ptr,
                    entries) {
                        old_rule = RB_FIND(pf_krule_global, old_tree, rule);
                        if (old_rule == NULL) {
                                continue;
                        }
                        pf_counter_u64_critical_enter();
                        pf_counter_u64_rollup_protected(&rule->evaluations,
                            pf_counter_u64_fetch(&old_rule->evaluations));
                        pf_counter_u64_rollup_protected(&rule->packets[0],
                            pf_counter_u64_fetch(&old_rule->packets[0]));
                        pf_counter_u64_rollup_protected(&rule->packets[1],
                            pf_counter_u64_fetch(&old_rule->packets[1]));
                        pf_counter_u64_rollup_protected(&rule->bytes[0],
                            pf_counter_u64_fetch(&old_rule->bytes[0]));
                        pf_counter_u64_rollup_protected(&rule->bytes[1],
                            pf_counter_u64_fetch(&old_rule->bytes[1]));
                        pf_counter_u64_critical_exit();
                }
        }

        rs->rules[rs_num].inactive.ptr = old_rules;
        rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */
        rs->rules[rs_num].inactive.rcount = old_rcount;

        rs->rules[rs_num].active.ticket =
            rs->rules[rs_num].inactive.ticket;
        pf_calc_skip_steps(rs->rules[rs_num].active.ptr);

        /* Purge the old rule list. */
        PF_UNLNKDRULES_LOCK();
        while ((rule = TAILQ_FIRST(old_rules)) != NULL)
                pf_unlink_rule_locked(old_rules, rule);
        PF_UNLNKDRULES_UNLOCK();
        rs->rules[rs_num].inactive.rcount = 0;
        rs->rules[rs_num].inactive.open = 0;
        pf_remove_if_empty_kruleset(rs);
        pf_rule_tree_free(old_tree);

        /* statelim/sourcelim/queue defs only in the main ruleset */
        if (! is_main_ruleset || rs_num != PF_RULESET_FILTER)
                return (0);

        pf_statelim_commit();
        pf_sourcelim_commit();

        return (0);
}

static int
pf_setup_pfsync_matching(struct pf_kruleset *rs)
{
        MD5_CTX                  ctx;
        struct pf_krule         *rule;
        int                      rs_cnt;
        u_int8_t                 digest[PF_MD5_DIGEST_LENGTH];

        MD5Init(&ctx);
        for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
                /* XXX PF_RULESET_SCRUB as well? */
                if (rs_cnt == PF_RULESET_SCRUB)
                        continue;

                if (rs->rules[rs_cnt].inactive.rcount) {
                        TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
                            entries) {
                                pf_hash_rule_rolling(&ctx, rule);
                        }
                }
        }

        MD5Final(digest, &ctx);
        memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
        return (0);
}

static int
pf_eth_addr_setup(struct pf_keth_ruleset *ruleset, struct pf_addr_wrap *addr)
{
        int error = 0;

        switch (addr->type) {
        case PF_ADDR_TABLE:
                addr->p.tbl = pfr_eth_attach_table(ruleset, addr->v.tblname);
                if (addr->p.tbl == NULL)
                        error = ENOMEM;
                break;
        default:
                error = EINVAL;
        }

        return (error);
}

static int
pf_addr_setup(struct pf_kruleset *ruleset, struct pf_addr_wrap *addr,
    sa_family_t af)
{
        int error = 0;

        switch (addr->type) {
        case PF_ADDR_TABLE:
                addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname);
                if (addr->p.tbl == NULL)
                        error = ENOMEM;
                break;
        case PF_ADDR_DYNIFTL:
                error = pfi_dynaddr_setup(addr, af);
                break;
        }

        return (error);
}

void
pf_addr_copyout(struct pf_addr_wrap *addr)
{

        switch (addr->type) {
        case PF_ADDR_DYNIFTL:
                pfi_dynaddr_copyout(addr);
                break;
        case PF_ADDR_TABLE:
                pf_tbladdr_copyout(addr);
                break;
        }
}

int
pf_statelim_add(const struct pfioc_statelim *ioc)
{
        struct pf_statelim      *pfstlim;
        int                      error;
        size_t                   namelen;

        if (ioc->id < PF_STATELIM_ID_MIN ||
            ioc->id > PF_STATELIM_ID_MAX)
                return (EINVAL);

        if (ioc->limit < PF_STATELIM_LIMIT_MIN ||
            ioc->limit > PF_STATELIM_LIMIT_MAX)
                return (EINVAL);

        if ((ioc->rate.limit == 0) != (ioc->rate.seconds == 0))
                return (EINVAL);

        namelen = strnlen(ioc->name, sizeof(ioc->name));
        /* is the name from userland nul terminated? */
        if (namelen == sizeof(ioc->name))
                return (EINVAL);

        pfstlim = malloc(sizeof(*pfstlim), M_PF_STATE_LIM, M_WAITOK | M_ZERO);
        if (pfstlim == NULL)
                return (ENOMEM);

        pfstlim->pfstlim_id = ioc->id;
        if (strlcpy(pfstlim->pfstlim_nm, ioc->name,
            sizeof(pfstlim->pfstlim_nm)) >= sizeof(pfstlim->pfstlim_nm)) {
                error = EINVAL;
                goto free;
        }
        pfstlim->pfstlim_limit = ioc->limit;
        pfstlim->pfstlim_rate.limit = ioc->rate.limit;
        pfstlim->pfstlim_rate.seconds = ioc->rate.seconds;

        if (pfstlim->pfstlim_rate.limit) {
                uint64_t bucket = SEC_TO_NSEC(pfstlim->pfstlim_rate.seconds);
                struct timespec ts;

                getnanouptime(&ts);

                pfstlim->pfstlim_rate_ts = SEC_TO_NSEC(ts.tv_sec) + ts.tv_nsec -
                    bucket;
                pfstlim->pfstlim_rate_token = bucket /
                    pfstlim->pfstlim_rate.limit;
                pfstlim->pfstlim_rate_bucket = bucket;
        }

        TAILQ_INIT(&pfstlim->pfstlim_states);
        mtx_init(&pfstlim->pfstlim_lock, "pf state limit", NULL, MTX_DEF);

        PF_RULES_WLOCK();
        if (ioc->ticket != pf_main_ruleset.rules[PF_RULESET_FILTER].inactive.ticket) {
                error = EBUSY;
                goto unlock;
        }

        if (RB_INSERT(pf_statelim_id_tree, &V_pf_statelim_id_tree_inactive,
                pfstlim) != NULL) {
                error = EBUSY;
                goto unlock;
        }

        if (RB_INSERT(pf_statelim_nm_tree, &V_pf_statelim_nm_tree_inactive,
                pfstlim) != NULL) {
                RB_REMOVE(pf_statelim_id_tree, &V_pf_statelim_id_tree_inactive,
                    pfstlim);
                error = EBUSY;
                goto unlock;
        }

        TAILQ_INSERT_HEAD(&V_pf_statelim_list_inactive, pfstlim, pfstlim_list);

        PF_RULES_WUNLOCK();

        return (0);

unlock:
        PF_RULES_WUNLOCK();

free:
        free(pfstlim, M_PF_STATE_LIM);

        return (error);
}

static void
pf_statelim_unlink(struct pf_statelim *pfstlim,
    struct pf_state_link_list *garbage)
{
        struct pf_state_link *pfl;


        /* unwire the links */
        TAILQ_FOREACH(pfl, &pfstlim->pfstlim_states, pfl_link) {
                struct pf_kstate *s = pfl->pfl_state;

                /* if !rmst */
                PF_STATE_LOCK(s);
                s->statelim = 0;
                SLIST_REMOVE(&s->linkage, pfl, pf_state_link, pfl_linkage);
                PF_STATE_UNLOCK(s);
        }

        /* take the list away */
        TAILQ_CONCAT(garbage, &pfstlim->pfstlim_states, pfl_link);
        pfstlim->pfstlim_inuse = 0;
}

void
pf_statelim_commit(void)
{
        struct pf_statelim *pfstlim, *npfstlim, *opfstlim;
        struct pf_statelim_list l = TAILQ_HEAD_INITIALIZER(l);
        struct pf_state_link_list garbage = TAILQ_HEAD_INITIALIZER(garbage);
        struct pf_state_link *pfl, *npfl;

        PF_RULES_WASSERT();

        /* merge the new statelims into the current set */

        /* start with an empty active list */
        TAILQ_CONCAT(&l, &V_pf_statelim_list_active, pfstlim_list);

        /* beware, the inactive bits gets messed up here */

        /* try putting pending statelims into the active tree */
        TAILQ_FOREACH_SAFE(pfstlim, &V_pf_statelim_list_inactive, pfstlim_list,
            npfstlim) {
                opfstlim = RB_INSERT(pf_statelim_id_tree,
                    &V_pf_statelim_id_tree_active, pfstlim);
                if (opfstlim != NULL) {
                        /* this statelim already exists, merge */
                        opfstlim->pfstlim_limit = pfstlim->pfstlim_limit;
                        opfstlim->pfstlim_rate.limit =
                            pfstlim->pfstlim_rate.limit;
                        opfstlim->pfstlim_rate.seconds =
                            pfstlim->pfstlim_rate.seconds;

                        opfstlim->pfstlim_rate_ts = pfstlim->pfstlim_rate_ts;
                        opfstlim->pfstlim_rate_token =
                            pfstlim->pfstlim_rate_token;
                        opfstlim->pfstlim_rate_bucket =
                            pfstlim->pfstlim_rate_bucket;

                        memcpy(opfstlim->pfstlim_nm, pfstlim->pfstlim_nm,
                            sizeof(opfstlim->pfstlim_nm));

                        /* use the existing statelim instead */
                        free(pfstlim, M_PF_STATE_LIM);
                        TAILQ_REMOVE(&l, opfstlim, pfstlim_list);
                        pfstlim = opfstlim;
                }

                TAILQ_INSERT_TAIL(&V_pf_statelim_list_active, pfstlim,
                    pfstlim_list);
        }

        /* clean up the now unused statelims from the old set */
        TAILQ_FOREACH_SAFE(pfstlim, &l, pfstlim_list, npfstlim) {
                pf_statelim_unlink(pfstlim, &garbage);

                RB_REMOVE(pf_statelim_id_tree, &V_pf_statelim_id_tree_active,
                    pfstlim);

                free(pfstlim, M_PF_STATE_LIM);
        }

        /* fix up the inactive tree */
        RB_INIT(&V_pf_statelim_id_tree_inactive);
        RB_INIT(&V_pf_statelim_nm_tree_inactive);
        TAILQ_INIT(&V_pf_statelim_list_inactive);

        TAILQ_FOREACH_SAFE(pfl, &garbage, pfl_link, npfl)
                free(pfl, M_PF_STATE_LINK);
}

static void
pf_sourcelim_unlink(struct pf_sourcelim *pfsrlim,
    struct pf_state_link_list *garbage)
{
        extern struct pf_source_list pf_source_gc;
        struct pf_source *pfsr;
        struct pf_state_link *pfl;

        PF_RULES_WASSERT();

        while ((pfsr = RB_ROOT(&pfsrlim->pfsrlim_sources)) != NULL) {
                RB_REMOVE(pf_source_tree, &pfsrlim->pfsrlim_sources, pfsr);
                RB_REMOVE(pf_source_ioc_tree, &pfsrlim->pfsrlim_ioc_sources,
                    pfsr);
                if (pfsr->pfsr_inuse == 0)
                        TAILQ_REMOVE(&pf_source_gc, pfsr, pfsr_empty_gc);

                /* unwire the links */
                TAILQ_FOREACH(pfl, &pfsr->pfsr_states, pfl_link) {
                        struct pf_kstate *s = pfl->pfl_state;

                        PF_STATE_LOCK(s);
                        /* if !rmst */
                        s->sourcelim = 0;
                        SLIST_REMOVE(&s->linkage, pfl, pf_state_link,
                            pfl_linkage);
                        PF_STATE_UNLOCK(s);
                }

                /* take the list away */
                TAILQ_CONCAT(garbage, &pfsr->pfsr_states, pfl_link);

                free(pfsr, M_PF_SOURCE_LIM);
        }
}

int
pf_sourcelim_check(void)
{
        struct pf_sourcelim *pfsrlim, *npfsrlim;

        PF_RULES_WASSERT();

        /* check if we can merge */

        TAILQ_FOREACH(pfsrlim, &V_pf_sourcelim_list_inactive, pfsrlim_list) {
                npfsrlim = RB_FIND(pf_sourcelim_id_tree,
                    &V_pf_sourcelim_id_tree_active, pfsrlim);

                /* new config, no conflict */
                if (npfsrlim == NULL)
                        continue;

                /* nothing is tracked at the moment, no conflict */
                if (RB_EMPTY(&npfsrlim->pfsrlim_sources))
                        continue;

                if (strcmp(npfsrlim->pfsrlim_overload.name,
                    pfsrlim->pfsrlim_overload.name) != 0)
                        return (EBUSY);

                /*
                 * we should allow the prefixlens to get shorter
                 * and merge pf_source entries.
                 */

                if ((npfsrlim->pfsrlim_ipv4_prefix !=
                        pfsrlim->pfsrlim_ipv4_prefix) ||
                    (npfsrlim->pfsrlim_ipv6_prefix !=
                        pfsrlim->pfsrlim_ipv6_prefix))
                        return (EBUSY);
        }

        return (0);
}

void
pf_sourcelim_commit(void)
{
        struct pf_sourcelim *pfsrlim, *npfsrlim, *opfsrlim;
        struct pf_sourcelim_list l = TAILQ_HEAD_INITIALIZER(l);
        struct pf_state_link_list garbage = TAILQ_HEAD_INITIALIZER(garbage);
        struct pf_state_link *pfl, *npfl;

        PF_RULES_WASSERT();

        /* merge the new sourcelims into the current set */

        /* start with an empty active list */
        TAILQ_CONCAT(&l, &V_pf_sourcelim_list_active, pfsrlim_list);

        /* beware, the inactive bits gets messed up here */

        /* try putting pending sourcelims into the active tree */
        TAILQ_FOREACH_SAFE(pfsrlim, &V_pf_sourcelim_list_inactive, pfsrlim_list,
            npfsrlim) {
                opfsrlim = RB_INSERT(pf_sourcelim_id_tree,
                    &V_pf_sourcelim_id_tree_active, pfsrlim);
                if (opfsrlim != NULL) {
                        /* this sourcelim already exists, merge */
                        opfsrlim->pfsrlim_entries = pfsrlim->pfsrlim_entries;
                        opfsrlim->pfsrlim_limit = pfsrlim->pfsrlim_limit;
                        opfsrlim->pfsrlim_ipv4_prefix =
                            pfsrlim->pfsrlim_ipv4_prefix;
                        opfsrlim->pfsrlim_ipv6_prefix =
                            pfsrlim->pfsrlim_ipv6_prefix;
                        opfsrlim->pfsrlim_rate.limit =
                            pfsrlim->pfsrlim_rate.limit;
                        opfsrlim->pfsrlim_rate.seconds =
                            pfsrlim->pfsrlim_rate.seconds;

                        opfsrlim->pfsrlim_ipv4_mask =
                            pfsrlim->pfsrlim_ipv4_mask;
                        opfsrlim->pfsrlim_ipv6_mask =
                            pfsrlim->pfsrlim_ipv6_mask;

                        /* keep the existing pfstlim_rate_ts */

                        opfsrlim->pfsrlim_rate_token =
                            pfsrlim->pfsrlim_rate_token;
                        opfsrlim->pfsrlim_rate_bucket =
                            pfsrlim->pfsrlim_rate_bucket;

                        if (opfsrlim->pfsrlim_overload.table != NULL) {
                                pfr_detach_table(
                                    opfsrlim->pfsrlim_overload.table);
                        }

                        strlcpy(opfsrlim->pfsrlim_overload.name,
                            pfsrlim->pfsrlim_overload.name,
                            sizeof(opfsrlim->pfsrlim_overload.name));
                        opfsrlim->pfsrlim_overload.hwm =
                            pfsrlim->pfsrlim_overload.hwm;
                        opfsrlim->pfsrlim_overload.lwm =
                            pfsrlim->pfsrlim_overload.lwm;
                        opfsrlim->pfsrlim_overload.table =
                            pfsrlim->pfsrlim_overload.table,

                        memcpy(opfsrlim->pfsrlim_nm, pfsrlim->pfsrlim_nm,
                            sizeof(opfsrlim->pfsrlim_nm));

                        /* use the existing sourcelim instead */
                        free(pfsrlim, M_PF_SOURCE_LIM);
                        TAILQ_REMOVE(&l, opfsrlim, pfsrlim_list);
                        pfsrlim = opfsrlim;
                }

                TAILQ_INSERT_TAIL(&V_pf_sourcelim_list_active, pfsrlim,
                    pfsrlim_list);
        }

        /* clean up the now unused sourcelims from the old set */
        TAILQ_FOREACH_SAFE(pfsrlim, &l, pfsrlim_list, npfsrlim) {
                pf_sourcelim_unlink(pfsrlim, &garbage);

                RB_REMOVE(pf_sourcelim_id_tree, &V_pf_sourcelim_id_tree_active,
                    pfsrlim);

                if (pfsrlim->pfsrlim_overload.table != NULL)
                        pfr_detach_table(pfsrlim->pfsrlim_overload.table);

                free(pfsrlim, M_PF_SOURCE_LIM);
        }

        /* fix up the inactive tree */
        RB_INIT(&V_pf_sourcelim_id_tree_inactive);
        RB_INIT(&V_pf_sourcelim_nm_tree_inactive);
        TAILQ_INIT(&V_pf_sourcelim_list_inactive);

        TAILQ_FOREACH_SAFE(pfl, &garbage, pfl_link, npfl)
                free(pfl, M_PF_STATE_LINK);
}

void
pf_statelim_rollback(void)
{
        struct pf_statelim *pfstlim, *npfstlim;

        PF_RULES_WASSERT();

        TAILQ_FOREACH_SAFE(pfstlim, &V_pf_statelim_list_inactive, pfstlim_list,
            npfstlim)
                free(pfstlim, M_PF_STATE_LIM);

        TAILQ_INIT(&V_pf_statelim_list_inactive);
        RB_INIT(&V_pf_statelim_id_tree_inactive);
        RB_INIT(&V_pf_statelim_nm_tree_inactive);
}

struct pf_statelim *
pf_statelim_rb_find(struct pf_statelim_id_tree *tree, struct pf_statelim *key)
{
        PF_RULES_ASSERT();

        return (RB_FIND(pf_statelim_id_tree, tree, key));
}

struct pf_statelim *
pf_statelim_rb_nfind(struct pf_statelim_id_tree *tree, struct pf_statelim *key)
{
        PF_RULES_ASSERT();

        return (RB_NFIND(pf_statelim_id_tree, tree, key));
}

int
pf_statelim_get(struct pfioc_statelim *ioc,
    struct pf_statelim *(*rbt_op)(struct pf_statelim_id_tree *,
    struct pf_statelim *))
{
        struct pf_statelim key = { .pfstlim_id = ioc->id };
        struct pf_statelim *pfstlim;
        int error = 0;
        PF_RULES_RLOCK_TRACKER;

        PF_RULES_RLOCK();

        pfstlim = (*rbt_op)(&V_pf_statelim_id_tree_active, &key);
        if (pfstlim == NULL) {
                error = ENOENT;
                goto unlock;
        }

        ioc->id = pfstlim->pfstlim_id;
        ioc->limit = pfstlim->pfstlim_limit;
        ioc->rate.limit = pfstlim->pfstlim_rate.limit;
        ioc->rate.seconds = pfstlim->pfstlim_rate.seconds;
        CTASSERT(sizeof(ioc->name) == sizeof(pfstlim->pfstlim_nm));
        memcpy(ioc->name, pfstlim->pfstlim_nm, sizeof(ioc->name));

        ioc->inuse = pfstlim->pfstlim_inuse;
        ioc->admitted = pfstlim->pfstlim_counters.admitted;
        ioc->hardlimited = pfstlim->pfstlim_counters.hardlimited;
        ioc->ratelimited = pfstlim->pfstlim_counters.ratelimited;

unlock:
        PF_RULES_RUNLOCK();

        return (error);
}

int
pf_sourcelim_add(const struct pfioc_sourcelim *ioc)
{
        struct pf_sourcelim     *pfsrlim;
        int                      error;
        size_t                   namelen, tablelen;
        unsigned int             prefix;
        size_t                   i;

        if (ioc->id < PF_SOURCELIM_ID_MIN ||
            ioc->id > PF_SOURCELIM_ID_MAX)
                return (EINVAL);

        if (ioc->entries < 1)
                return (EINVAL);

        if (ioc->limit < 1)
                return (EINVAL);

        if ((ioc->rate.limit == 0) != (ioc->rate.seconds == 0))
                return (EINVAL);

        if (ioc->inet_prefix > 32)
                return (EINVAL);
        if (ioc->inet6_prefix > 128)
                return (EINVAL);

        namelen = strnlen(ioc->name, sizeof(ioc->name));
        /* is the name from userland nul terminated? */
        if (namelen == sizeof(ioc->name))
                return (EINVAL);

        tablelen = strnlen(ioc->overload_tblname,
            sizeof(ioc->overload_tblname));
        /* is the name from userland nul terminated? */
        if (tablelen == sizeof(ioc->overload_tblname))
                return (EINVAL);
        if (tablelen != 0) {
                if (ioc->overload_hwm == 0)
                        return (EINVAL);

                if (ioc->overload_hwm < ioc->overload_lwm)
                        return (EINVAL);
        }

        pfsrlim = malloc(sizeof(*pfsrlim), M_PF_SOURCE_LIM, M_WAITOK | M_ZERO);
        if (pfsrlim == NULL)
                return (ENOMEM);

        pfsrlim->pfsrlim_id = ioc->id;
        pfsrlim->pfsrlim_entries = ioc->entries;
        pfsrlim->pfsrlim_limit = ioc->limit;
        pfsrlim->pfsrlim_ipv4_prefix = ioc->inet_prefix;
        pfsrlim->pfsrlim_ipv6_prefix = ioc->inet6_prefix;
        pfsrlim->pfsrlim_rate.limit = ioc->rate.limit;
        pfsrlim->pfsrlim_rate.seconds = ioc->rate.seconds;
        if (strlcpy(pfsrlim->pfsrlim_overload.name, ioc->overload_tblname,
            sizeof(pfsrlim->pfsrlim_overload.name)) >=
            sizeof(pfsrlim->pfsrlim_overload.name)) {
                error = EINVAL;
                goto free;
        }
        pfsrlim->pfsrlim_overload.hwm = ioc->overload_hwm;
        pfsrlim->pfsrlim_overload.lwm = ioc->overload_lwm;
        if (strlcpy(pfsrlim->pfsrlim_nm, ioc->name,
            sizeof(pfsrlim->pfsrlim_nm)) >= sizeof(pfsrlim->pfsrlim_nm)) {
                error = EINVAL;
                goto free;
        }

        if (pfsrlim->pfsrlim_rate.limit) {
                uint64_t bucket = pfsrlim->pfsrlim_rate.seconds * 1000000000ULL;

                pfsrlim->pfsrlim_rate_token = bucket /
                    pfsrlim->pfsrlim_rate.limit;
                pfsrlim->pfsrlim_rate_bucket = bucket;
        }

        pfsrlim->pfsrlim_ipv4_mask.v4.s_addr = htonl(
            0xffffffff << (32 - pfsrlim->pfsrlim_ipv4_prefix));

        prefix = pfsrlim->pfsrlim_ipv6_prefix;
        for (i = 0; i < nitems(pfsrlim->pfsrlim_ipv6_mask.addr32); i++) {
                if (prefix == 0) {
                        /* the memory is already zeroed */
                        break;
                }
                if (prefix < 32) {
                        pfsrlim->pfsrlim_ipv6_mask.addr32[i] = htonl(
                            0xffffffff << (32 - prefix));
                        break;
                }

                pfsrlim->pfsrlim_ipv6_mask.addr32[i] = htonl(0xffffffff);
                prefix -= 32;
        }

        RB_INIT(&pfsrlim->pfsrlim_sources);
        mtx_init(&pfsrlim->pfsrlim_lock, "pf source limit", NULL, MTX_DEF);

        PF_RULES_WLOCK();
        if (ioc->ticket != pf_main_ruleset.rules[PF_RULESET_FILTER].inactive.ticket) {
                error = EBUSY;
                goto unlock;
        }

        if (pfsrlim->pfsrlim_overload.name[0] != '\0') {
                pfsrlim->pfsrlim_overload.table = pfr_attach_table(
                    &pf_main_ruleset, pfsrlim->pfsrlim_overload.name);
                if (pfsrlim->pfsrlim_overload.table == NULL) {
                        error = EINVAL;
                        goto unlock;
                }
        }

        if (RB_INSERT(pf_sourcelim_id_tree, &V_pf_sourcelim_id_tree_inactive,
                pfsrlim) != NULL) {
                error = EBUSY;
                goto unlock;
        }

        if (RB_INSERT(pf_sourcelim_nm_tree, &V_pf_sourcelim_nm_tree_inactive,
                pfsrlim) != NULL) {
                RB_INSERT(pf_sourcelim_nm_tree, &V_pf_sourcelim_nm_tree_inactive,
                    pfsrlim);
                error = EBUSY;
                goto unlock;
        }

        TAILQ_INSERT_HEAD(&V_pf_sourcelim_list_inactive, pfsrlim, pfsrlim_list);

        PF_RULES_WUNLOCK();

        return (0);

unlock:
        PF_RULES_WUNLOCK();

free:
        free(pfsrlim, M_PF_SOURCE_LIM);

        return (error);
}

void
pf_sourcelim_rollback(void)
{
        struct pf_sourcelim *pfsrlim, *npfsrlim;

        PF_RULES_WASSERT();

        TAILQ_FOREACH_SAFE(pfsrlim, &V_pf_sourcelim_list_inactive, pfsrlim_list,
            npfsrlim) {
                if (pfsrlim->pfsrlim_overload.table != NULL)
                        pfr_detach_table(pfsrlim->pfsrlim_overload.table);

                free(pfsrlim, M_PF_SOURCE_LIM);
        }

        TAILQ_INIT(&V_pf_sourcelim_list_inactive);
        RB_INIT(&V_pf_sourcelim_id_tree_inactive);
        RB_INIT(&V_pf_sourcelim_nm_tree_inactive);
}

struct pf_sourcelim *
pf_sourcelim_rb_find(struct pf_sourcelim_id_tree *tree,
    struct pf_sourcelim *key)
{
        PF_RULES_ASSERT();
        return (RB_FIND(pf_sourcelim_id_tree, tree, key));
}

struct pf_sourcelim *
pf_sourcelim_rb_nfind(struct pf_sourcelim_id_tree *tree,
    struct pf_sourcelim *key)
{
        PF_RULES_ASSERT();
        return (RB_NFIND(pf_sourcelim_id_tree, tree, key));
}

int
pf_sourcelim_get(struct pfioc_sourcelim *ioc,
    struct pf_sourcelim *(*rbt_op)(struct pf_sourcelim_id_tree *,
    struct pf_sourcelim *))
{
        struct pf_sourcelim key = { .pfsrlim_id = ioc->id };
        struct pf_sourcelim *pfsrlim;
        int error = 0;
        PF_RULES_RLOCK_TRACKER;

        PF_RULES_RLOCK();

        pfsrlim = (*rbt_op)(&V_pf_sourcelim_id_tree_active, &key);
        if (pfsrlim == NULL) {
                error = ESRCH;
                goto unlock;
        }

        ioc->id = pfsrlim->pfsrlim_id;
        ioc->entries = pfsrlim->pfsrlim_entries;
        ioc->limit = pfsrlim->pfsrlim_limit;
        ioc->inet_prefix = pfsrlim->pfsrlim_ipv4_prefix;
        ioc->inet6_prefix = pfsrlim->pfsrlim_ipv6_prefix;
        ioc->rate.limit = pfsrlim->pfsrlim_rate.limit;
        ioc->rate.seconds = pfsrlim->pfsrlim_rate.seconds;

        CTASSERT(sizeof(ioc->overload_tblname) ==
            sizeof(pfsrlim->pfsrlim_overload.name));
        memcpy(ioc->overload_tblname, pfsrlim->pfsrlim_overload.name,
            sizeof(pfsrlim->pfsrlim_overload.name));
        ioc->overload_hwm = pfsrlim->pfsrlim_overload.hwm;
        ioc->overload_lwm = pfsrlim->pfsrlim_overload.lwm;

        CTASSERT(sizeof(ioc->name) == sizeof(pfsrlim->pfsrlim_nm));
        memcpy(ioc->name, pfsrlim->pfsrlim_nm, sizeof(ioc->name));
        /* XXX overload table thing */

        ioc->nentries = pfsrlim->pfsrlim_nsources;

        ioc->inuse = pfsrlim->pfsrlim_counters.inuse;
        ioc->addrallocs = pfsrlim->pfsrlim_counters.addrallocs;
        ioc->addrnomem = pfsrlim->pfsrlim_counters.addrnomem;
        ioc->admitted = pfsrlim->pfsrlim_counters.admitted;
        ioc->addrlimited = pfsrlim->pfsrlim_counters.addrlimited;
        ioc->hardlimited = pfsrlim->pfsrlim_counters.hardlimited;
        ioc->ratelimited = pfsrlim->pfsrlim_counters.ratelimited;

unlock:
        PF_RULES_RUNLOCK();

        return (error);
}

struct pf_source *
pf_source_rb_find(struct pf_source_ioc_tree *tree,
    struct pf_source *key)
{
        PF_RULES_ASSERT();

        return (RB_FIND(pf_source_ioc_tree, tree, key));
}

struct pf_source *
pf_source_rb_nfind(struct pf_source_ioc_tree *tree,
    struct pf_source *key)
{
        PF_RULES_ASSERT();

        return (RB_NFIND(pf_source_ioc_tree, tree, key));
}

int
pf_source_clr(struct pfioc_source_kill *ioc)
{
        extern struct pf_source_list pf_source_gc;
        struct pf_sourcelim plkey = {
                .pfsrlim_id = ioc->id,
        };
        struct pf_source skey = {
                .pfsr_af = ioc->af,
                .pfsr_rdomain = ioc->rdomain,
                .pfsr_addr = ioc->addr,
        };
        struct pf_sourcelim *pfsrlim;
        struct pf_source *pfsr;
        struct pf_state_link *pfl, *npfl;
        int error = 0;
        unsigned int gen;

        if (ioc->rmstates) {
                /* XXX userland wants the states removed too */
                return (EOPNOTSUPP);
        }

        PF_RULES_WLOCK();

        pfsrlim = pf_sourcelim_rb_find(&V_pf_sourcelim_id_tree_active, &plkey);
        if (pfsrlim == NULL) {
                error = ESRCH;
                goto unlock;
        }

        pfsr = pf_source_rb_find(&pfsrlim->pfsrlim_ioc_sources, &skey);
        if (pfsr == NULL) {
                error = ENOENT;
                goto unlock;
        }

        RB_REMOVE(pf_source_tree, &pfsrlim->pfsrlim_sources, pfsr);
        RB_REMOVE(pf_source_ioc_tree, &pfsrlim->pfsrlim_ioc_sources, pfsr);
        if (pfsr->pfsr_inuse == 0)
                TAILQ_REMOVE(&pf_source_gc, pfsr, pfsr_empty_gc);

        gen = pf_sourcelim_enter(pfsrlim);
        pfsrlim->pfsrlim_nsources--;
        pfsrlim->pfsrlim_counters.inuse -= pfsr->pfsr_inuse;
        pf_sourcelim_leave(pfsrlim, gen);

        /* unwire the links */
        TAILQ_FOREACH(pfl, &pfsr->pfsr_states, pfl_link) {
                struct pf_kstate *st = pfl->pfl_state;

                /* if !rmst */
                st->sourcelim = 0;
                SLIST_REMOVE(&st->linkage, pfl, pf_state_link, pfl_linkage);
        }

        PF_RULES_WUNLOCK();

        TAILQ_FOREACH_SAFE(pfl, &pfsr->pfsr_states, pfl_link, npfl)
                free(pfl, M_PF_STATE_LINK);

        free(pfsr, M_PF_SOURCE_LIM);

        return (0);

unlock:
        PF_RULES_WUNLOCK();

        return (error);
}

static void
pf_src_node_copy(const struct pf_ksrc_node *in, struct pf_src_node *out)
{
        int     secs = time_uptime;

        bzero(out, sizeof(struct pf_src_node));

        bcopy(&in->addr, &out->addr, sizeof(struct pf_addr));
        bcopy(&in->raddr, &out->raddr, sizeof(struct pf_addr));

        if (in->rule != NULL)
                out->rule.nr = in->rule->nr;

        for (int i = 0; i < 2; i++) {
                out->bytes[i] = counter_u64_fetch(in->bytes[i]);
                out->packets[i] = counter_u64_fetch(in->packets[i]);
        }

        out->states = in->states;
        out->conn = in->conn;
        out->af = in->af;
        out->ruletype = in->ruletype;

        out->creation = secs - in->creation;
        if (out->expire > secs)
                out->expire -= secs;
        else
                out->expire = 0;

        /* Adjust the connection rate estimate. */
        out->conn_rate.limit = in->conn_rate.limit;
        out->conn_rate.seconds = in->conn_rate.seconds;
        /* If there's no limit there's no counter_rate. */
        if (in->conn_rate.cr != NULL)
                out->conn_rate.count = counter_rate_get(in->conn_rate.cr);
}

#ifdef ALTQ
/*
 * Handle export of struct pf_kaltq to user binaries that may be using any
 * version of struct pf_altq.
 */
static int
pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size)
{
        u_int32_t version;

        if (ioc_size == sizeof(struct pfioc_altq_v0))
                version = 0;
        else
                version = pa->version;

        if (version > PFIOC_ALTQ_VERSION)
                return (EINVAL);

#define ASSIGN(x) exported_q->x = q->x
#define COPY(x) \
        bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x)))
#define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX)
#define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX)

        switch (version) {
        case 0: {
                struct pf_altq_v0 *exported_q =
                    &((struct pfioc_altq_v0 *)pa)->altq;

                COPY(ifname);

                ASSIGN(scheduler);
                ASSIGN(tbrsize);
                exported_q->tbrsize = SATU16(q->tbrsize);
                exported_q->ifbandwidth = SATU32(q->ifbandwidth);

                COPY(qname);
                COPY(parent);
                ASSIGN(parent_qid);
                exported_q->bandwidth = SATU32(q->bandwidth);
                ASSIGN(priority);
                ASSIGN(local_flags);

                ASSIGN(qlimit);
                ASSIGN(flags);

                if (q->scheduler == ALTQT_HFSC) {
#define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x
#define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \
                            SATU32(q->pq_u.hfsc_opts.x)

                        ASSIGN_OPT_SATU32(rtsc_m1);
                        ASSIGN_OPT(rtsc_d);
                        ASSIGN_OPT_SATU32(rtsc_m2);

                        ASSIGN_OPT_SATU32(lssc_m1);
                        ASSIGN_OPT(lssc_d);
                        ASSIGN_OPT_SATU32(lssc_m2);

                        ASSIGN_OPT_SATU32(ulsc_m1);
                        ASSIGN_OPT(ulsc_d);
                        ASSIGN_OPT_SATU32(ulsc_m2);

                        ASSIGN_OPT(flags);

#undef ASSIGN_OPT
#undef ASSIGN_OPT_SATU32
                } else
                        COPY(pq_u);

                ASSIGN(qid);
                break;
        }
        case 1: {
                struct pf_altq_v1 *exported_q =
                    &((struct pfioc_altq_v1 *)pa)->altq;

                COPY(ifname);

                ASSIGN(scheduler);
                ASSIGN(tbrsize);
                ASSIGN(ifbandwidth);

                COPY(qname);
                COPY(parent);
                ASSIGN(parent_qid);
                ASSIGN(bandwidth);
                ASSIGN(priority);
                ASSIGN(local_flags);

                ASSIGN(qlimit);
                ASSIGN(flags);
                COPY(pq_u);

                ASSIGN(qid);
                break;
        }
        default:
                panic("%s: unhandled struct pfioc_altq version", __func__);
                break;
        }

#undef ASSIGN
#undef COPY
#undef SATU16
#undef SATU32

        return (0);
}

/*
 * Handle import to struct pf_kaltq of struct pf_altq from user binaries
 * that may be using any version of it.
 */
static int
pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size)
{
        u_int32_t version;

        if (ioc_size == sizeof(struct pfioc_altq_v0))
                version = 0;
        else
                version = pa->version;

        if (version > PFIOC_ALTQ_VERSION)
                return (EINVAL);

#define ASSIGN(x) q->x = imported_q->x
#define COPY(x) \
        bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x)))

        switch (version) {
        case 0: {
                struct pf_altq_v0 *imported_q =
                    &((struct pfioc_altq_v0 *)pa)->altq;

                COPY(ifname);

                ASSIGN(scheduler);
                ASSIGN(tbrsize); /* 16-bit -> 32-bit */
                ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */

                COPY(qname);
                COPY(parent);
                ASSIGN(parent_qid);
                ASSIGN(bandwidth); /* 32-bit -> 64-bit */
                ASSIGN(priority);
                ASSIGN(local_flags);

                ASSIGN(qlimit);
                ASSIGN(flags);

                if (imported_q->scheduler == ALTQT_HFSC) {
#define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x

                        /*
                         * The m1 and m2 parameters are being copied from
                         * 32-bit to 64-bit.
                         */
                        ASSIGN_OPT(rtsc_m1);
                        ASSIGN_OPT(rtsc_d);
                        ASSIGN_OPT(rtsc_m2);

                        ASSIGN_OPT(lssc_m1);
                        ASSIGN_OPT(lssc_d);
                        ASSIGN_OPT(lssc_m2);

                        ASSIGN_OPT(ulsc_m1);
                        ASSIGN_OPT(ulsc_d);
                        ASSIGN_OPT(ulsc_m2);

                        ASSIGN_OPT(flags);

#undef ASSIGN_OPT
                } else
                        COPY(pq_u);

                ASSIGN(qid);
                break;
        }
        case 1: {
                struct pf_altq_v1 *imported_q =
                    &((struct pfioc_altq_v1 *)pa)->altq;

                COPY(ifname);

                ASSIGN(scheduler);
                ASSIGN(tbrsize);
                ASSIGN(ifbandwidth);

                COPY(qname);
                COPY(parent);
                ASSIGN(parent_qid);
                ASSIGN(bandwidth);
                ASSIGN(priority);
                ASSIGN(local_flags);

                ASSIGN(qlimit);
                ASSIGN(flags);
                COPY(pq_u);

                ASSIGN(qid);
                break;
        }
        default:
                panic("%s: unhandled struct pfioc_altq version", __func__);
                break;
        }

#undef ASSIGN
#undef COPY

        return (0);
}

static struct pf_altq *
pf_altq_get_nth_active(u_int32_t n)
{
        struct pf_altq          *altq;
        u_int32_t                nr;

        nr = 0;
        TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
                if (nr == n)
                        return (altq);
                nr++;
        }

        TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
                if (nr == n)
                        return (altq);
                nr++;
        }

        return (NULL);
}
#endif /* ALTQ */

struct pf_krule *
pf_krule_alloc(void)
{
        struct pf_krule *rule;

        rule = malloc(sizeof(struct pf_krule), M_PFRULE, M_WAITOK | M_ZERO);
        mtx_init(&rule->nat.mtx, "pf_krule_nat_pool", NULL, MTX_DEF);
        mtx_init(&rule->rdr.mtx, "pf_krule_rdr_pool", NULL, MTX_DEF);
        mtx_init(&rule->route.mtx, "pf_krule_route_pool", NULL, MTX_DEF);
        rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
            M_WAITOK | M_ZERO);
        return (rule);
}

void
pf_krule_free(struct pf_krule *rule)
{
#ifdef PF_WANT_32_TO_64_COUNTER
        bool wowned;
#endif

        if (rule == NULL)
                return;

#ifdef PF_WANT_32_TO_64_COUNTER
        if (rule->allrulelinked) {
                wowned = PF_RULES_WOWNED();
                if (!wowned)
                        PF_RULES_WLOCK();
                LIST_REMOVE(rule, allrulelist);
                V_pf_allrulecount--;
                if (!wowned)
                        PF_RULES_WUNLOCK();
        }
#endif

        pf_counter_u64_deinit(&rule->evaluations);
        for (int i = 0; i < 2; i++) {
                pf_counter_u64_deinit(&rule->packets[i]);
                pf_counter_u64_deinit(&rule->bytes[i]);
        }
        counter_u64_free(rule->states_cur);
        counter_u64_free(rule->states_tot);
        for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++)
                counter_u64_free(rule->src_nodes[sn_type]);
        uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp);

        mtx_destroy(&rule->nat.mtx);
        mtx_destroy(&rule->rdr.mtx);
        mtx_destroy(&rule->route.mtx);
        free(rule, M_PFRULE);
}

void
pf_krule_clear_counters(struct pf_krule *rule)
{
        pf_counter_u64_zero(&rule->evaluations);
        for (int i = 0; i < 2; i++) {
                pf_counter_u64_zero(&rule->packets[i]);
                pf_counter_u64_zero(&rule->bytes[i]);
        }
        counter_u64_zero(rule->states_tot);
}

static void
pf_kpooladdr_to_pooladdr(const struct pf_kpooladdr *kpool,
    struct pf_pooladdr *pool)
{

        bzero(pool, sizeof(*pool));
        bcopy(&kpool->addr, &pool->addr, sizeof(pool->addr));
        strlcpy(pool->ifname, kpool->ifname, sizeof(pool->ifname));
}

static int
pf_pooladdr_to_kpooladdr(const struct pf_pooladdr *pool,
    struct pf_kpooladdr *kpool)
{
        int ret;

        bzero(kpool, sizeof(*kpool));
        bcopy(&pool->addr, &kpool->addr, sizeof(kpool->addr));
        ret = pf_user_strcpy(kpool->ifname, pool->ifname,
            sizeof(kpool->ifname));
        return (ret);
}

static void
pf_pool_to_kpool(const struct pf_pool *pool, struct pf_kpool *kpool)
{
        _Static_assert(sizeof(pool->key) == sizeof(kpool->key), "");
        _Static_assert(sizeof(pool->counter) == sizeof(kpool->counter), "");

        bcopy(&pool->key, &kpool->key, sizeof(kpool->key));
        bcopy(&pool->counter, &kpool->counter, sizeof(kpool->counter));

        kpool->tblidx = pool->tblidx;
        kpool->proxy_port[0] = pool->proxy_port[0];
        kpool->proxy_port[1] = pool->proxy_port[1];
        kpool->opts = pool->opts;
}

static int
pf_rule_to_krule(const struct pf_rule *rule, struct pf_krule *krule)
{
        int ret;

#ifndef INET
        if (rule->af == AF_INET) {
                return (EAFNOSUPPORT);
        }
#endif /* INET */
#ifndef INET6
        if (rule->af == AF_INET6) {
                return (EAFNOSUPPORT);
        }
#endif /* INET6 */

        ret = pf_check_rule_addr(&rule->src);
        if (ret != 0)
                return (ret);
        ret = pf_check_rule_addr(&rule->dst);
        if (ret != 0)
                return (ret);

        bcopy(&rule->src, &krule->src, sizeof(rule->src));
        bcopy(&rule->dst, &krule->dst, sizeof(rule->dst));

        ret = pf_user_strcpy(krule->label[0], rule->label, sizeof(rule->label));
        if (ret != 0)
                return (ret);
        ret = pf_user_strcpy(krule->ifname, rule->ifname, sizeof(rule->ifname));
        if (ret != 0)
                return (ret);
        ret = pf_user_strcpy(krule->qname, rule->qname, sizeof(rule->qname));
        if (ret != 0)
                return (ret);
        ret = pf_user_strcpy(krule->pqname, rule->pqname, sizeof(rule->pqname));
        if (ret != 0)
                return (ret);
        ret = pf_user_strcpy(krule->tagname, rule->tagname,
            sizeof(rule->tagname));
        if (ret != 0)
                return (ret);
        ret = pf_user_strcpy(krule->match_tagname, rule->match_tagname,
            sizeof(rule->match_tagname));
        if (ret != 0)
                return (ret);
        ret = pf_user_strcpy(krule->overload_tblname, rule->overload_tblname,
            sizeof(rule->overload_tblname));
        if (ret != 0)
                return (ret);

        pf_pool_to_kpool(&rule->rpool, &krule->rdr);

        /* Don't allow userspace to set evaluations, packets or bytes. */
        /* kif, anchor, overload_tbl are not copied over. */

        krule->os_fingerprint = rule->os_fingerprint;

        krule->rtableid = rule->rtableid;
        /* pf_rule->timeout is smaller than pf_krule->timeout */
        bcopy(rule->timeout, krule->timeout, sizeof(rule->timeout));
        krule->max_states = rule->max_states;
        krule->max_src_nodes = rule->max_src_nodes;
        krule->max_src_states = rule->max_src_states;
        krule->max_src_conn = rule->max_src_conn;
        krule->max_src_conn_rate.limit = rule->max_src_conn_rate.limit;
        krule->max_src_conn_rate.seconds = rule->max_src_conn_rate.seconds;
        krule->qid = rule->qid;
        krule->pqid = rule->pqid;
        krule->nr = rule->nr;
        krule->prob = rule->prob;
        krule->cuid = rule->cuid;
        krule->cpid = rule->cpid;

        krule->return_icmp = rule->return_icmp;
        krule->return_icmp6 = rule->return_icmp6;
        krule->max_mss = rule->max_mss;
        krule->tag = rule->tag;
        krule->match_tag = rule->match_tag;
        krule->scrub_flags = rule->scrub_flags;

        bcopy(&rule->uid, &krule->uid, sizeof(krule->uid));
        bcopy(&rule->gid, &krule->gid, sizeof(krule->gid));

        krule->rule_flag = rule->rule_flag;
        krule->action = rule->action;
        krule->direction = rule->direction;
        krule->log = rule->log;
        krule->logif = rule->logif;
        krule->quick = rule->quick;
        krule->ifnot = rule->ifnot;
        krule->match_tag_not = rule->match_tag_not;
        krule->natpass = rule->natpass;

        krule->keep_state = rule->keep_state;
        krule->af = rule->af;
        krule->proto = rule->proto;
        krule->type = rule->type;
        krule->code = rule->code;
        krule->flags = rule->flags;
        krule->flagset = rule->flagset;
        krule->min_ttl = rule->min_ttl;
        krule->allow_opts = rule->allow_opts;
        krule->rt = rule->rt;
        krule->return_ttl = rule->return_ttl;
        krule->tos = rule->tos;
        krule->set_tos = rule->set_tos;

        krule->flush = rule->flush;
        krule->prio = rule->prio;
        krule->set_prio[0] = rule->set_prio[0];
        krule->set_prio[1] = rule->set_prio[1];

        bcopy(&rule->divert, &krule->divert, sizeof(krule->divert));

        return (0);
}

int
pf_ioctl_getrules(struct pfioc_rule *pr)
{
        PF_RULES_RLOCK_TRACKER;
        struct pf_kruleset      *ruleset;
        struct pf_krule         *tail;
        int                      rs_num;

        PF_RULES_RLOCK();
        ruleset = pf_find_kruleset(pr->anchor);
        if (ruleset == NULL) {
                PF_RULES_RUNLOCK();
                return (EINVAL);
        }
        rs_num = pf_get_ruleset_number(pr->rule.action);
        if (rs_num >= PF_RULESET_MAX) {
                PF_RULES_RUNLOCK();
                return (EINVAL);
        }
        tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
            pf_krulequeue);
        if (tail)
                pr->nr = tail->nr + 1;
        else
                pr->nr = 0;
        pr->ticket = ruleset->rules[rs_num].active.ticket;
        PF_RULES_RUNLOCK();

        return (0);
}

static int
pf_rule_checkaf(struct pf_krule *r)
{
        switch (r->af) {
        case 0:
                if (r->rule_flag & PFRULE_AFTO)
                        return (EPFNOSUPPORT);
                break;
        case AF_INET:
                if ((r->rule_flag & PFRULE_AFTO) && r->naf != AF_INET6)
                        return (EPFNOSUPPORT);
                break;
#ifdef INET6
        case AF_INET6:
                if ((r->rule_flag & PFRULE_AFTO) && r->naf != AF_INET)
                        return (EPFNOSUPPORT);
                break;
#endif /* INET6 */
        default:
                return (EPFNOSUPPORT);
        }

        if ((r->rule_flag & PFRULE_AFTO) == 0 && r->naf != 0)
                return (EPFNOSUPPORT);

        return (0);
}

static int
pf_validate_range(uint8_t op, uint16_t port[2])
{
        uint16_t a = ntohs(port[0]);
        uint16_t b = ntohs(port[1]);

        if ((op == PF_OP_RRG && a > b) ||  /* 34:12,  i.e. none */
            (op == PF_OP_IRG && a >= b) || /* 34><12, i.e. none */
            (op == PF_OP_XRG && a > b))    /* 34<>22, i.e. all */
                return 1;
        return 0;
}

static int
pf_chk_limiter_action(int limiter_action)
{
        int rv;

        switch (limiter_action) {
        case PF_LIMITER_NOMATCH:
        case PF_LIMITER_BLOCK:
                rv = 0;
                break;
        default:
                rv = 1;
        }

        return (rv);
}

int
pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
    uint32_t pool_ticket, const char *anchor, const char *anchor_call,
    uid_t uid, pid_t pid)
{
        struct pf_kruleset      *ruleset;
        struct pf_krule         *tail;
        struct pf_kpooladdr     *pa;
        struct pfi_kkif         *kif = NULL, *rcv_kif = NULL;
        int                      rs_num;
        int                      error = 0;

#define ERROUT(x)               ERROUT_FUNCTION(errout, x)
#define ERROUT_UNLOCKED(x)      ERROUT_FUNCTION(errout_unlocked, x)

        if ((rule->return_icmp >> 8) > ICMP_MAXTYPE)
                ERROUT_UNLOCKED(EINVAL);

        if ((error = pf_rule_checkaf(rule)))
                ERROUT_UNLOCKED(error);
        if (pf_validate_range(rule->src.port_op, rule->src.port))
                ERROUT_UNLOCKED(EINVAL);
        if (pf_validate_range(rule->dst.port_op, rule->dst.port))
                ERROUT_UNLOCKED(EINVAL);
        if (pf_chk_limiter_action(rule->statelim.limiter_action) ||
            pf_chk_limiter_action(rule->sourcelim.limiter_action))
                ERROUT_UNLOCKED(EINVAL);

        if (rule->ifname[0])
                kif = pf_kkif_create(M_WAITOK);
        if (rule->rcv_ifname[0])
                rcv_kif = pf_kkif_create(M_WAITOK);
        pf_counter_u64_init(&rule->evaluations, M_WAITOK);
        for (int i = 0; i < 2; i++) {
                pf_counter_u64_init(&rule->packets[i], M_WAITOK);
                pf_counter_u64_init(&rule->bytes[i], M_WAITOK);
        }
        rule->states_cur = counter_u64_alloc(M_WAITOK);
        rule->states_tot = counter_u64_alloc(M_WAITOK);
        for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++)
                rule->src_nodes[sn_type] = counter_u64_alloc(M_WAITOK);
        rule->cuid = uid;
        rule->cpid = pid;
        TAILQ_INIT(&rule->rdr.list);
        TAILQ_INIT(&rule->nat.list);
        TAILQ_INIT(&rule->route.list);

        PF_CONFIG_LOCK();
        PF_RULES_WLOCK();
#ifdef PF_WANT_32_TO_64_COUNTER
        LIST_INSERT_HEAD(&V_pf_allrulelist, rule, allrulelist);
        MPASS(!rule->allrulelinked);
        rule->allrulelinked = true;
        V_pf_allrulecount++;
#endif
        ruleset = pf_find_kruleset(anchor);
        if (ruleset == NULL)
                ERROUT(EINVAL);
        rs_num = pf_get_ruleset_number(rule->action);
        if (rs_num >= PF_RULESET_MAX)
                ERROUT(EINVAL);
        if (ticket != ruleset->rules[rs_num].inactive.ticket) {
                DPFPRINTF(PF_DEBUG_MISC,
                    "ticket: %d != [%d]%d", ticket, rs_num,
                    ruleset->rules[rs_num].inactive.ticket);
                ERROUT(EBUSY);
        }
        if (pool_ticket != V_ticket_pabuf) {
                DPFPRINTF(PF_DEBUG_MISC,
                    "pool_ticket: %d != %d", pool_ticket,
                    V_ticket_pabuf);
                ERROUT(EBUSY);
        }
        /*
         * XXXMJG hack: there is no mechanism to ensure they started the
         * transaction. Ticket checked above may happen to match by accident,
         * even if nobody called DIOCXBEGIN, let alone this process.
         * Partially work around it by checking if the RB tree got allocated,
         * see pf_begin_rules.
         */
        if (ruleset->rules[rs_num].inactive.tree == NULL) {
                ERROUT(EINVAL);
        }

        tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
            pf_krulequeue);
        if (tail)
                rule->nr = tail->nr + 1;
        else
                rule->nr = 0;
        if (rule->ifname[0]) {
                rule->kif = pfi_kkif_attach(kif, rule->ifname);
                kif = NULL;
                pfi_kkif_ref(rule->kif);
        } else
                rule->kif = NULL;

        if (rule->rcv_ifname[0]) {
                rule->rcv_kif = pfi_kkif_attach(rcv_kif, rule->rcv_ifname);
                rcv_kif = NULL;
                pfi_kkif_ref(rule->rcv_kif);
        } else
                rule->rcv_kif = NULL;

        if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs)
                ERROUT(EBUSY);
#ifdef ALTQ
        /* set queue IDs */
        if (rule->qname[0] != 0) {
                if ((rule->qid = pf_qname2qid(rule->qname, true)) == 0)
                        ERROUT(EBUSY);
                else if (rule->pqname[0] != 0) {
                        if ((rule->pqid =
                            pf_qname2qid(rule->pqname, true)) == 0)
                                ERROUT(EBUSY);
                } else
                        rule->pqid = rule->qid;
        }
#endif
        if (rule->tagname[0])
                if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
                        ERROUT(EBUSY);
        if (rule->match_tagname[0])
                if ((rule->match_tag =
                    pf_tagname2tag(rule->match_tagname)) == 0)
                        ERROUT(EBUSY);
        if (rule->rt && !rule->direction)
                ERROUT(EINVAL);
        if (!rule->log)
                rule->logif = 0;
        if (! pf_init_threshold(&rule->pktrate, rule->pktrate.limit,
           rule->pktrate.seconds))
                ERROUT(ENOMEM);
        if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
                ERROUT(ENOMEM);
        if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
                ERROUT(ENOMEM);
        if (pf_kanchor_setup(rule, ruleset, anchor_call))
                ERROUT(EINVAL);
        if (rule->scrub_flags & PFSTATE_SETPRIO &&
            (rule->set_prio[0] > PF_PRIO_MAX ||
            rule->set_prio[1] > PF_PRIO_MAX))
                ERROUT(EINVAL);
        for (int i = 0; i < 3; i++) {
                TAILQ_FOREACH(pa, &V_pf_pabuf[i], entries)
                        if (pa->addr.type == PF_ADDR_TABLE) {
                                pa->addr.p.tbl = pfr_attach_table(ruleset,
                                    pa->addr.v.tblname);
                                if (pa->addr.p.tbl == NULL)
                                        ERROUT(ENOMEM);
                        }
        }

        rule->overload_tbl = NULL;
        if (rule->overload_tblname[0]) {
                if ((rule->overload_tbl = pfr_attach_table(ruleset,
                    rule->overload_tblname)) == NULL)
                        ERROUT(EINVAL);
                else
                        rule->overload_tbl->pfrkt_flags |=
                            PFR_TFLAG_ACTIVE;
        }

        pf_mv_kpool(&V_pf_pabuf[0], &rule->nat.list);

        /*
         * Old version of pfctl provide route redirection pools in single
         * common redirection pool rdr. New versions use rdr only for
         * rdr-to rules.
         */
        if (rule->rt > PF_NOPFROUTE && TAILQ_EMPTY(&V_pf_pabuf[2])) {
                pf_mv_kpool(&V_pf_pabuf[1], &rule->route.list);
        } else {
                pf_mv_kpool(&V_pf_pabuf[1], &rule->rdr.list);
                pf_mv_kpool(&V_pf_pabuf[2], &rule->route.list);
        }

        if (((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
            (rule->action == PF_BINAT)) && rule->anchor == NULL &&
            TAILQ_FIRST(&rule->rdr.list) == NULL) {
                ERROUT(EINVAL);
        }

        if (rule->rt > PF_NOPFROUTE && (TAILQ_FIRST(&rule->route.list) == NULL)) {
                ERROUT(EINVAL);
        }

        if (rule->action == PF_PASS && (rule->rdr.opts & PF_POOL_STICKYADDR ||
            rule->nat.opts & PF_POOL_STICKYADDR) && !rule->keep_state) {
                ERROUT(EINVAL);
        }

        MPASS(error == 0);

        rule->nat.cur = TAILQ_FIRST(&rule->nat.list);
        rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list);
        rule->route.cur = TAILQ_FIRST(&rule->route.list);
        rule->route.ipv6_nexthop_af = AF_INET6;
        TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
            rule, entries);
        ruleset->rules[rs_num].inactive.rcount++;

        PF_RULES_WUNLOCK();
        pf_hash_rule(rule);
        if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].inactive.tree, rule) != NULL) {
                PF_RULES_WLOCK();
                TAILQ_REMOVE(ruleset->rules[rs_num].inactive.ptr, rule, entries);
                ruleset->rules[rs_num].inactive.rcount--;
                pf_free_rule(rule);
                rule = NULL;
                ERROUT(EEXIST);
        }
        PF_CONFIG_UNLOCK();

        return (0);

#undef ERROUT
#undef ERROUT_UNLOCKED
errout:
        PF_RULES_WUNLOCK();
        PF_CONFIG_UNLOCK();
errout_unlocked:
        pf_kkif_free(rcv_kif);
        pf_kkif_free(kif);
        pf_krule_free(rule);
        return (error);
}

static bool
pf_label_match(const struct pf_krule *rule, const char *label)
{
        int i = 0;

        while (*rule->label[i]) {
                if (strcmp(rule->label[i], label) == 0)
                        return (true);
                i++;
        }

        return (false);
}

static unsigned int
pf_kill_matching_state(struct pf_state_key_cmp *key, int dir)
{
        struct pf_kstate *s;
        int more = 0;

        s = pf_find_state_all(key, dir, &more);
        if (s == NULL)
                return (0);

        if (more) {
                PF_STATE_UNLOCK(s);
                return (0);
        }

        pf_remove_state(s);
        return (1);
}

static int
pf_killstates_row(struct pf_kstate_kill *psk, struct pf_idhash *ih)
{
        struct pf_kstate        *s;
        struct pf_state_key     *sk;
        struct pf_addr          *srcaddr, *dstaddr;
        struct pf_state_key_cmp  match_key;
        int                      idx, killed = 0;
        unsigned int             dir;
        u_int16_t                srcport, dstport;
        struct pfi_kkif         *kif;

relock_DIOCKILLSTATES:
        PF_HASHROW_LOCK(ih);
        LIST_FOREACH(s, &ih->states, entry) {
                /* For floating states look at the original kif. */
                kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;

                sk = s->key[psk->psk_nat ? PF_SK_STACK : PF_SK_WIRE];
                if (s->direction == PF_OUT) {
                        srcaddr = &sk->addr[1];
                        dstaddr = &sk->addr[0];
                        srcport = sk->port[1];
                        dstport = sk->port[0];
                } else {
                        srcaddr = &sk->addr[0];
                        dstaddr = &sk->addr[1];
                        srcport = sk->port[0];
                        dstport = sk->port[1];
                }

                if (psk->psk_af && sk->af != psk->psk_af)
                        continue;

                if (psk->psk_proto && psk->psk_proto != sk->proto)
                        continue;

                if (! pf_match_addr(psk->psk_src.neg,
                    &psk->psk_src.addr.v.a.addr,
                    &psk->psk_src.addr.v.a.mask, srcaddr, sk->af))
                        continue;

                if (! pf_match_addr(psk->psk_dst.neg,
                    &psk->psk_dst.addr.v.a.addr,
                    &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af))
                        continue;

                if (!  pf_match_addr(psk->psk_rt_addr.neg,
                    &psk->psk_rt_addr.addr.v.a.addr,
                    &psk->psk_rt_addr.addr.v.a.mask,
                    &s->act.rt_addr, sk->af))
                        continue;

                if (psk->psk_src.port_op != 0 &&
                    ! pf_match_port(psk->psk_src.port_op,
                    psk->psk_src.port[0], psk->psk_src.port[1], srcport))
                        continue;

                if (psk->psk_dst.port_op != 0 &&
                    ! pf_match_port(psk->psk_dst.port_op,
                    psk->psk_dst.port[0], psk->psk_dst.port[1], dstport))
                        continue;

                if (psk->psk_label[0] &&
                    ! pf_label_match(s->rule, psk->psk_label))
                        continue;

                if (psk->psk_ifname[0] && strcmp(psk->psk_ifname,
                    kif->pfik_name))
                        continue;

                if (psk->psk_kill_match) {
                        /* Create the key to find matching states, with lock
                         * held. */

                        bzero(&match_key, sizeof(match_key));

                        if (s->direction == PF_OUT) {
                                dir = PF_IN;
                                idx = psk->psk_nat ? PF_SK_WIRE : PF_SK_STACK;
                        } else {
                                dir = PF_OUT;
                                idx = psk->psk_nat ? PF_SK_STACK : PF_SK_WIRE;
                        }

                        match_key.af = s->key[idx]->af;
                        match_key.proto = s->key[idx]->proto;
                        pf_addrcpy(&match_key.addr[0],
                            &s->key[idx]->addr[1], match_key.af);
                        match_key.port[0] = s->key[idx]->port[1];
                        pf_addrcpy(&match_key.addr[1],
                            &s->key[idx]->addr[0], match_key.af);
                        match_key.port[1] = s->key[idx]->port[0];
                }

                pf_remove_state(s);
                killed++;

                if (psk->psk_kill_match)
                        killed += pf_kill_matching_state(&match_key, dir);

                goto relock_DIOCKILLSTATES;
        }
        PF_HASHROW_UNLOCK(ih);

        return (killed);
}

int
pf_start(void)
{
        int error = 0;

        sx_xlock(&V_pf_ioctl_lock);
        if (V_pf_status.running)
                error = EEXIST;
        else {
                hook_pf();
                if (! TAILQ_EMPTY(V_pf_keth->active.rules))
                        hook_pf_eth();
                V_pf_status.running = 1;
                V_pf_status.since = time_uptime;
                new_unrhdr64(&V_pf_stateid, time_second);

                DPFPRINTF(PF_DEBUG_MISC, "pf: started");
        }
        sx_xunlock(&V_pf_ioctl_lock);

        return (error);
}

int
pf_stop(void)
{
        int error = 0;

        sx_xlock(&V_pf_ioctl_lock);
        if (!V_pf_status.running)
                error = ENOENT;
        else {
                V_pf_status.running = 0;
                dehook_pf();
                dehook_pf_eth();
                V_pf_status.since = time_uptime;
                DPFPRINTF(PF_DEBUG_MISC, "pf: stopped");
        }
        sx_xunlock(&V_pf_ioctl_lock);

        return (error);
}

void
pf_ioctl_clear_status(void)
{
        PF_RULES_WLOCK();
        for (int i = 0; i < PFRES_MAX; i++)
                counter_u64_zero(V_pf_status.counters[i]);
        for (int i = 0; i < FCNT_MAX; i++)
                pf_counter_u64_zero(&V_pf_status.fcounters[i]);
        for (int i = 0; i < SCNT_MAX; i++)
                counter_u64_zero(V_pf_status.scounters[i]);
        for (int i = 0; i < NCNT_MAX; i++)
                counter_u64_zero(V_pf_status.ncounters[i]);
        for (int i = 0; i < KLCNT_MAX; i++)
                counter_u64_zero(V_pf_status.lcounters[i]);
        V_pf_status.since = time_uptime;
        if (*V_pf_status.ifname)
                pfi_update_status(V_pf_status.ifname, NULL);
        PF_RULES_WUNLOCK();
}

int
pf_ioctl_set_timeout(int timeout, int seconds, int *prev_seconds)
{
        uint32_t old;

        if (timeout < 0 || timeout >= PFTM_MAX ||
            seconds < 0)
                return (EINVAL);

        PF_RULES_WLOCK();
        old = V_pf_default_rule.timeout[timeout];
        if (timeout == PFTM_INTERVAL && seconds == 0)
                seconds = 1;
        V_pf_default_rule.timeout[timeout] = seconds;
        if (timeout == PFTM_INTERVAL && seconds < old)
                wakeup(pf_purge_thread);

        if (prev_seconds != NULL)
                *prev_seconds = old;

        PF_RULES_WUNLOCK();

        return (0);
}

int
pf_ioctl_get_timeout(int timeout, int *seconds)
{
        PF_RULES_RLOCK_TRACKER;

        if (timeout < 0 || timeout >= PFTM_MAX)
                return (EINVAL);

        PF_RULES_RLOCK();
        *seconds = V_pf_default_rule.timeout[timeout];
        PF_RULES_RUNLOCK();

        return (0);
}

int
pf_ioctl_set_limit(int index, unsigned int limit, unsigned int *old_limit)
{

        PF_RULES_WLOCK();
        if (index < 0 || index >= PF_LIMIT_MAX ||
            V_pf_limits[index].zone == NULL) {
                PF_RULES_WUNLOCK();
                return (EINVAL);
        }
        uma_zone_set_max(V_pf_limits[index].zone,
            limit == 0 ? INT_MAX : limit);
        if (old_limit != NULL)
                *old_limit = V_pf_limits[index].limit;
        V_pf_limits[index].limit = limit;
        PF_RULES_WUNLOCK();

        return (0);
}

int
pf_ioctl_get_limit(int index, unsigned int *limit)
{
        PF_RULES_RLOCK_TRACKER;

        if (index < 0 || index >= PF_LIMIT_MAX)
                return (EINVAL);

        PF_RULES_RLOCK();
        *limit = V_pf_limits[index].limit;
        PF_RULES_RUNLOCK();

        return (0);
}

int
pf_ioctl_begin_addrs(uint32_t *ticket)
{
        PF_RULES_WLOCK();
        pf_empty_kpool(&V_pf_pabuf[0]);
        pf_empty_kpool(&V_pf_pabuf[1]);
        pf_empty_kpool(&V_pf_pabuf[2]);
        *ticket = ++V_ticket_pabuf;
        PF_RULES_WUNLOCK();

        return (0);
}

int
pf_ioctl_add_addr(struct pf_nl_pooladdr *pp)
{
        struct pf_kpooladdr     *pa = NULL;
        struct pfi_kkif         *kif = NULL;
        int error;

        if (pp->which != PF_RDR && pp->which != PF_NAT &&
            pp->which != PF_RT)
                return (EINVAL);

        switch (pp->af) {
#ifdef INET
        case AF_INET:
                /* FALLTHROUGH */
#endif /* INET */
#ifdef INET6
        case AF_INET6:
                /* FALLTHROUGH */
#endif /* INET6 */
        case AF_UNSPEC:
                break;
        default:
                return (EAFNOSUPPORT);
        }

        if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
            pp->addr.addr.type != PF_ADDR_DYNIFTL &&
            pp->addr.addr.type != PF_ADDR_TABLE)
                return (EINVAL);

        if (pp->addr.addr.p.dyn != NULL)
                return (EINVAL);

        pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK);
        error = pf_pooladdr_to_kpooladdr(&pp->addr, pa);
        if (error != 0)
                goto out;
        if (pa->ifname[0])
                kif = pf_kkif_create(M_WAITOK);
        PF_RULES_WLOCK();
        if (pp->ticket != V_ticket_pabuf) {
                PF_RULES_WUNLOCK();
                if (pa->ifname[0])
                        pf_kkif_free(kif);
                error = EBUSY;
                goto out;
        }
        if (pa->ifname[0]) {
                pa->kif = pfi_kkif_attach(kif, pa->ifname);
                kif = NULL;
                pfi_kkif_ref(pa->kif);
        } else
                pa->kif = NULL;
        if (pa->addr.type == PF_ADDR_DYNIFTL && ((error =
            pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) {
                if (pa->ifname[0])
                        pfi_kkif_unref(pa->kif);
                PF_RULES_WUNLOCK();
                goto out;
        }
        pa->af = pp->af;
        switch (pp->which) {
        case PF_NAT:
                TAILQ_INSERT_TAIL(&V_pf_pabuf[0], pa, entries);
                break;
        case PF_RDR:
                TAILQ_INSERT_TAIL(&V_pf_pabuf[1], pa, entries);
                break;
        case PF_RT:
                TAILQ_INSERT_TAIL(&V_pf_pabuf[2], pa, entries);
                break;
        }
        PF_RULES_WUNLOCK();

        return (0);

out:
        free(pa, M_PFRULE);
        return (error);
}

int
pf_ioctl_get_addrs(struct pf_nl_pooladdr *pp)
{
        struct pf_kpool         *pool;
        struct pf_kpooladdr     *pa;

        PF_RULES_RLOCK_TRACKER;

        if (pp->which != PF_RDR && pp->which != PF_NAT &&
            pp->which != PF_RT)
                return (EINVAL);

        pp->anchor[sizeof(pp->anchor) - 1] = 0;
        pp->nr = 0;

        PF_RULES_RLOCK();
        pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
            pp->r_num, 0, 1, 0, pp->which);
        if (pool == NULL) {
                PF_RULES_RUNLOCK();
                return (EBUSY);
        }
        TAILQ_FOREACH(pa, &pool->list, entries)
                pp->nr++;
        PF_RULES_RUNLOCK();

        return (0);
}

int
pf_ioctl_get_addr(struct pf_nl_pooladdr *pp)
{
        struct pf_kpool         *pool;
        struct pf_kpooladdr     *pa;
        u_int32_t                nr = 0;

        if (pp->which != PF_RDR && pp->which != PF_NAT &&
            pp->which != PF_RT)
                return (EINVAL);

        PF_RULES_RLOCK_TRACKER;

        pp->anchor[sizeof(pp->anchor) - 1] = '\0';

        PF_RULES_RLOCK();
        pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
            pp->r_num, 0, 1, 1, pp->which);
        if (pool == NULL) {
                PF_RULES_RUNLOCK();
                return (EBUSY);
        }
        pa = TAILQ_FIRST(&pool->list);
        while ((pa != NULL) && (nr < pp->nr)) {
                pa = TAILQ_NEXT(pa, entries);
                nr++;
        }
        if (pa == NULL) {
                PF_RULES_RUNLOCK();
                return (EBUSY);
        }
        pf_kpooladdr_to_pooladdr(pa, &pp->addr);
        pp->af = pa->af;
        pf_addr_copyout(&pp->addr.addr);
        PF_RULES_RUNLOCK();

        return (0);
}

int
pf_ioctl_get_rulesets(struct pfioc_ruleset *pr)
{
        struct pf_kruleset      *ruleset;
        struct pf_kanchor       *anchor;

        PF_RULES_RLOCK_TRACKER;

        pr->path[sizeof(pr->path) - 1] = '\0';

        PF_RULES_RLOCK();
        if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
                PF_RULES_RUNLOCK();
                return (ENOENT);
        }
        pr->nr = 0;
        if (ruleset == &pf_main_ruleset) {
                /* XXX kludge for pf_main_ruleset */
                RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
                        if (anchor->parent == NULL)
                                pr->nr++;
        } else {
                RB_FOREACH(anchor, pf_kanchor_node,
                    &ruleset->anchor->children)
                        pr->nr++;
        }
        PF_RULES_RUNLOCK();

        return (0);
}

int
pf_ioctl_get_ruleset(struct pfioc_ruleset *pr)
{
        struct pf_kruleset      *ruleset;
        struct pf_kanchor       *anchor;
        u_int32_t                nr = 0;
        int                      error = 0;

        PF_RULES_RLOCK_TRACKER;

        PF_RULES_RLOCK();
        if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
                PF_RULES_RUNLOCK();
                return (ENOENT);
        }

        pr->name[0] = '\0';
        if (ruleset == &pf_main_ruleset) {
                /* XXX kludge for pf_main_ruleset */
                RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
                        if (anchor->parent == NULL && nr++ == pr->nr) {
                                strlcpy(pr->name, anchor->name,
                                    sizeof(pr->name));
                                break;
                        }
        } else {
                RB_FOREACH(anchor, pf_kanchor_node,
                    &ruleset->anchor->children)
                        if (nr++ == pr->nr) {
                                strlcpy(pr->name, anchor->name,
                                    sizeof(pr->name));
                                break;
                        }
        }
        if (!pr->name[0])
                error = EBUSY;
        PF_RULES_RUNLOCK();

        return (error);
}

int
pf_ioctl_natlook(struct pfioc_natlook *pnl)
{
        struct pf_state_key     *sk;
        struct pf_kstate        *state;
        struct pf_state_key_cmp  key;
        int                      m = 0, direction = pnl->direction;
        int                      sidx, didx;

        /* NATLOOK src and dst are reversed, so reverse sidx/didx */
        sidx = (direction == PF_IN) ? 1 : 0;
        didx = (direction == PF_IN) ? 0 : 1;

        if (!pnl->proto ||
            PF_AZERO(&pnl->saddr, pnl->af) ||
            PF_AZERO(&pnl->daddr, pnl->af) ||
            ((pnl->proto == IPPROTO_TCP ||
            pnl->proto == IPPROTO_UDP) &&
            (!pnl->dport || !pnl->sport)))
                return (EINVAL);

        switch (pnl->direction) {
        case PF_IN:
        case PF_OUT:
        case PF_INOUT:
                break;
        default:
                return (EINVAL);
        }

        switch (pnl->af) {
#ifdef INET
        case AF_INET:
                break;
#endif /* INET */
#ifdef INET6
        case AF_INET6:
                break;
#endif /* INET6 */
        default:
                return (EAFNOSUPPORT);
        }

        bzero(&key, sizeof(key));
        key.af = pnl->af;
        key.proto = pnl->proto;
        pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af);
        key.port[sidx] = pnl->sport;
        pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af);
        key.port[didx] = pnl->dport;

        state = pf_find_state_all(&key, direction, &m);
        if (state == NULL)
                return (ENOENT);

        if (m > 1) {
                PF_STATE_UNLOCK(state);
                return (E2BIG); /* more than one state */
        }

        sk = state->key[sidx];
        pf_addrcpy(&pnl->rsaddr,
            &sk->addr[sidx], sk->af);
        pnl->rsport = sk->port[sidx];
        pf_addrcpy(&pnl->rdaddr,
            &sk->addr[didx], sk->af);
        pnl->rdport = sk->port[didx];
        PF_STATE_UNLOCK(state);

        return (0);
}

static int
pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
{
        int                      error = 0;
        PF_RULES_RLOCK_TRACKER;

#define ERROUT_IOCTL(target, x)                                 \
    do {                                                                \
            error = (x);                                                \
            SDT_PROBE3(pf, ioctl, ioctl, error, cmd, error, __LINE__);  \
            goto target;                                                \
    } while (0)


        /* XXX keep in sync with switch() below */
        if (securelevel_gt(td->td_ucred, 2))
                switch (cmd) {
                case DIOCGETRULES:
                case DIOCGETRULENV:
                case DIOCGETADDRS:
                case DIOCGETADDR:
                case DIOCGETSTATE:
                case DIOCGETSTATENV:
                case DIOCSETSTATUSIF:
                case DIOCGETSTATUSNV:
                case DIOCCLRSTATUS:
                case DIOCNATLOOK:
                case DIOCSETDEBUG:
#ifdef COMPAT_FREEBSD14
                case DIOCGETSTATES:
                case DIOCGETSTATESV2:
#endif
                case DIOCGETTIMEOUT:
                case DIOCCLRRULECTRS:
                case DIOCGETLIMIT:
                case DIOCGETALTQSV0:
                case DIOCGETALTQSV1:
                case DIOCGETALTQV0:
                case DIOCGETALTQV1:
                case DIOCGETQSTATSV0:
                case DIOCGETQSTATSV1:
                case DIOCGETRULESETS:
                case DIOCGETRULESET:
                case DIOCRGETTABLES:
                case DIOCRGETTSTATS:
                case DIOCRCLRTSTATS:
                case DIOCRCLRADDRS:
                case DIOCRADDADDRS:
                case DIOCRDELADDRS:
                case DIOCRSETADDRS:
                case DIOCRGETADDRS:
                case DIOCRGETASTATS:
                case DIOCRCLRASTATS:
                case DIOCRTSTADDRS:
                case DIOCOSFPGET:
                case DIOCGETSRCNODES:
                case DIOCCLRSRCNODES:
                case DIOCGETSYNCOOKIES:
                case DIOCIGETIFACES:
                case DIOCGIFSPEEDV0:
                case DIOCGIFSPEEDV1:
                case DIOCSETIFFLAG:
                case DIOCCLRIFFLAG:
                case DIOCGETETHRULES:
                case DIOCGETETHRULE:
                case DIOCGETETHRULESETS:
                case DIOCGETETHRULESET:
                        break;
                case DIOCRCLRTABLES:
                case DIOCRADDTABLES:
                case DIOCRDELTABLES:
                case DIOCRSETTFLAGS:
                        if (((struct pfioc_table *)addr)->pfrio_flags &
                            PFR_FLAG_DUMMY)
                                break; /* dummy operation ok */
                        return (EPERM);
                default:
                        return (EPERM);
                }

        if (!(flags & FWRITE))
                switch (cmd) {
                case DIOCGETRULES:
                case DIOCGETADDRS:
                case DIOCGETADDR:
                case DIOCGETSTATE:
                case DIOCGETSTATENV:
                case DIOCGETSTATUSNV:
#ifdef COMPAT_FREEBSD14
                case DIOCGETSTATES:
                case DIOCGETSTATESV2:
#endif
                case DIOCGETTIMEOUT:
                case DIOCGETLIMIT:
                case DIOCGETALTQSV0:
                case DIOCGETALTQSV1:
                case DIOCGETALTQV0:
                case DIOCGETALTQV1:
                case DIOCGETQSTATSV0:
                case DIOCGETQSTATSV1:
                case DIOCGETRULESETS:
                case DIOCGETRULESET:
                case DIOCNATLOOK:
                case DIOCRGETTABLES:
                case DIOCRGETTSTATS:
                case DIOCRGETADDRS:
                case DIOCRGETASTATS:
                case DIOCRTSTADDRS:
                case DIOCOSFPGET:
                case DIOCGETSRCNODES:
                case DIOCGETSYNCOOKIES:
                case DIOCIGETIFACES:
                case DIOCGIFSPEEDV1:
                case DIOCGIFSPEEDV0:
                case DIOCGETRULENV:
                case DIOCGETETHRULES:
                case DIOCGETETHRULE:
                case DIOCGETETHRULESETS:
                case DIOCGETETHRULESET:
                        break;
                case DIOCRCLRTABLES:
                case DIOCRADDTABLES:
                case DIOCRDELTABLES:
                case DIOCRCLRTSTATS:
                case DIOCRCLRADDRS:
                case DIOCRADDADDRS:
                case DIOCRDELADDRS:
                case DIOCRSETADDRS:
                case DIOCRSETTFLAGS:
                        if (((struct pfioc_table *)addr)->pfrio_flags &
                            PFR_FLAG_DUMMY) {
                                flags |= FWRITE; /* need write lock for dummy */
                                break; /* dummy operation ok */
                        }
                        return (EACCES);
                default:
                        return (EACCES);
                }

        CURVNET_SET(TD_TO_VNET(td));

        switch (cmd) {
#ifdef COMPAT_FREEBSD14
        case DIOCSTART:
                error = pf_start();
                break;

        case DIOCSTOP:
                error = pf_stop();
                break;
#endif

        case DIOCGETETHRULES: {
                struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
                nvlist_t                *nvl;
                void                    *packed;
                struct pf_keth_rule     *tail;
                struct pf_keth_ruleset  *rs;
                u_int32_t                ticket, nr;
                const char              *anchor = "";

                nvl = NULL;
                packed = NULL;

#define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULES_error, x)

                if (nv->len > pf_ioctl_maxcount)
                        ERROUT(ENOMEM);

                /* Copy the request in */
                packed = malloc(nv->len, M_NVLIST, M_WAITOK);
                error = copyin(nv->data, packed, nv->len);
                if (error)
                        ERROUT(error);

                nvl = nvlist_unpack(packed, nv->len, 0);
                if (nvl == NULL)
                        ERROUT(EBADMSG);

                if (! nvlist_exists_string(nvl, "anchor"))
                        ERROUT(EBADMSG);

                anchor = nvlist_get_string(nvl, "anchor");

                rs = pf_find_keth_ruleset(anchor);

                nvlist_destroy(nvl);
                nvl = NULL;
                free(packed, M_NVLIST);
                packed = NULL;

                if (rs == NULL)
                        ERROUT(ENOENT);

                /* Reply */
                nvl = nvlist_create(0);
                if (nvl == NULL)
                        ERROUT(ENOMEM);

                PF_RULES_RLOCK();

                ticket = rs->active.ticket;
                tail = TAILQ_LAST(rs->active.rules, pf_keth_ruleq);
                if (tail)
                        nr = tail->nr + 1;
                else
                        nr = 0;

                PF_RULES_RUNLOCK();

                nvlist_add_number(nvl, "ticket", ticket);
                nvlist_add_number(nvl, "nr", nr);

                packed = nvlist_pack(nvl, &nv->len);
                if (packed == NULL)
                        ERROUT(ENOMEM);

                if (nv->size == 0)
                        ERROUT(0);
                else if (nv->size < nv->len)
                        ERROUT(ENOSPC);

                error = copyout(packed, nv->data, nv->len);

#undef ERROUT
DIOCGETETHRULES_error:
                free(packed, M_NVLIST);
                nvlist_destroy(nvl);
                break;
        }

        case DIOCGETETHRULE: {
                struct epoch_tracker     et;
                struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
                nvlist_t                *nvl = NULL;
                void                    *nvlpacked = NULL;
                struct pf_keth_rule     *rule = NULL;
                struct pf_keth_ruleset  *rs;
                u_int32_t                ticket, nr;
                bool                     clear = false;
                const char              *anchor;

#define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULE_error, x)

                if (nv->len > pf_ioctl_maxcount)
                        ERROUT(ENOMEM);

                nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
                error = copyin(nv->data, nvlpacked, nv->len);
                if (error)
                        ERROUT(error);

                nvl = nvlist_unpack(nvlpacked, nv->len, 0);
                if (nvl == NULL)
                        ERROUT(EBADMSG);
                if (! nvlist_exists_number(nvl, "ticket"))
                        ERROUT(EBADMSG);
                ticket = nvlist_get_number(nvl, "ticket");
                if (! nvlist_exists_string(nvl, "anchor"))
                        ERROUT(EBADMSG);
                anchor = nvlist_get_string(nvl, "anchor");

                if (nvlist_exists_bool(nvl, "clear"))
                        clear = nvlist_get_bool(nvl, "clear");

                if (clear && !(flags & FWRITE))
                        ERROUT(EACCES);

                if (! nvlist_exists_number(nvl, "nr"))
                        ERROUT(EBADMSG);
                nr = nvlist_get_number(nvl, "nr");

                PF_RULES_RLOCK();
                rs = pf_find_keth_ruleset(anchor);
                if (rs == NULL) {
                        PF_RULES_RUNLOCK();
                        ERROUT(ENOENT);
                }
                if (ticket != rs->active.ticket) {
                        PF_RULES_RUNLOCK();
                        ERROUT(EBUSY);
                }

                nvlist_destroy(nvl);
                nvl = NULL;
                free(nvlpacked, M_NVLIST);
                nvlpacked = NULL;

                rule = TAILQ_FIRST(rs->active.rules);
                while ((rule != NULL) && (rule->nr != nr))
                        rule = TAILQ_NEXT(rule, entries);
                if (rule == NULL) {
                        PF_RULES_RUNLOCK();
                        ERROUT(ENOENT);
                }
                /* Make sure rule can't go away. */
                NET_EPOCH_ENTER(et);
                PF_RULES_RUNLOCK();
                nvl = pf_keth_rule_to_nveth_rule(rule);
                if (pf_keth_anchor_nvcopyout(rs, rule, nvl)) {
                        NET_EPOCH_EXIT(et);
                        ERROUT(EBUSY);
                }
                NET_EPOCH_EXIT(et);
                if (nvl == NULL)
                        ERROUT(ENOMEM);

                nvlpacked = nvlist_pack(nvl, &nv->len);
                if (nvlpacked == NULL)
                        ERROUT(ENOMEM);

                if (nv->size == 0)
                        ERROUT(0);
                else if (nv->size < nv->len)
                        ERROUT(ENOSPC);

                error = copyout(nvlpacked, nv->data, nv->len);
                if (error == 0 && clear) {
                        counter_u64_zero(rule->evaluations);
                        for (int i = 0; i < 2; i++) {
                                counter_u64_zero(rule->packets[i]);
                                counter_u64_zero(rule->bytes[i]);
                        }
                }

#undef ERROUT
DIOCGETETHRULE_error:
                free(nvlpacked, M_NVLIST);
                nvlist_destroy(nvl);
                break;
        }

        case DIOCADDETHRULE: {
                struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
                nvlist_t                *nvl = NULL;
                void                    *nvlpacked = NULL;
                struct pf_keth_rule     *rule = NULL, *tail = NULL;
                struct pf_keth_ruleset  *ruleset = NULL;
                struct pfi_kkif         *kif = NULL, *bridge_to_kif = NULL;
                const char              *anchor = "", *anchor_call = "";

#define ERROUT(x)       ERROUT_IOCTL(DIOCADDETHRULE_error, x)

                if (nv->len > pf_ioctl_maxcount)
                        ERROUT(ENOMEM);

                nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
                error = copyin(nv->data, nvlpacked, nv->len);
                if (error)
                        ERROUT(error);

                nvl = nvlist_unpack(nvlpacked, nv->len, 0);
                if (nvl == NULL)
                        ERROUT(EBADMSG);

                if (! nvlist_exists_number(nvl, "ticket"))
                        ERROUT(EBADMSG);

                if (nvlist_exists_string(nvl, "anchor"))
                        anchor = nvlist_get_string(nvl, "anchor");
                if (nvlist_exists_string(nvl, "anchor_call"))
                        anchor_call = nvlist_get_string(nvl, "anchor_call");

                ruleset = pf_find_keth_ruleset(anchor);
                if (ruleset == NULL)
                        ERROUT(EINVAL);

                if (nvlist_get_number(nvl, "ticket") !=
                    ruleset->inactive.ticket) {
                        DPFPRINTF(PF_DEBUG_MISC,
                            "ticket: %d != %d",
                            (u_int32_t)nvlist_get_number(nvl, "ticket"),
                            ruleset->inactive.ticket);
                        ERROUT(EBUSY);
                }

                rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
                rule->timestamp = NULL;

                error = pf_nveth_rule_to_keth_rule(nvl, rule);
                if (error != 0)
                        ERROUT(error);

                if (rule->ifname[0])
                        kif = pf_kkif_create(M_WAITOK);
                if (rule->bridge_to_name[0])
                        bridge_to_kif = pf_kkif_create(M_WAITOK);
                rule->evaluations = counter_u64_alloc(M_WAITOK);
                for (int i = 0; i < 2; i++) {
                        rule->packets[i] = counter_u64_alloc(M_WAITOK);
                        rule->bytes[i] = counter_u64_alloc(M_WAITOK);
                }
                rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
                    M_WAITOK | M_ZERO);

                PF_RULES_WLOCK();

                if (rule->ifname[0]) {
                        rule->kif = pfi_kkif_attach(kif, rule->ifname);
                        pfi_kkif_ref(rule->kif);
                } else
                        rule->kif = NULL;
                if (rule->bridge_to_name[0]) {
                        rule->bridge_to = pfi_kkif_attach(bridge_to_kif,
                            rule->bridge_to_name);
                        pfi_kkif_ref(rule->bridge_to);
                } else
                        rule->bridge_to = NULL;

#ifdef ALTQ
                /* set queue IDs */
                if (rule->qname[0] != 0) {
                        if ((rule->qid = pf_qname2qid(rule->qname, true)) == 0)
                                error = EBUSY;
                        else
                                rule->qid = rule->qid;
                }
#endif
                if (rule->tagname[0])
                        if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
                                error = EBUSY;
                if (rule->match_tagname[0])
                        if ((rule->match_tag = pf_tagname2tag(
                            rule->match_tagname)) == 0)
                                error = EBUSY;

                if (error == 0 && rule->ipdst.addr.type == PF_ADDR_TABLE)
                        error = pf_eth_addr_setup(ruleset, &rule->ipdst.addr);
                if (error == 0 && rule->ipsrc.addr.type == PF_ADDR_TABLE)
                        error = pf_eth_addr_setup(ruleset, &rule->ipsrc.addr);

                if (error) {
                        pf_free_eth_rule(rule);
                        PF_RULES_WUNLOCK();
                        ERROUT(error);
                }

                if (pf_keth_anchor_setup(rule, ruleset, anchor_call)) {
                        pf_free_eth_rule(rule);
                        PF_RULES_WUNLOCK();
                        ERROUT(EINVAL);
                }

                tail = TAILQ_LAST(ruleset->inactive.rules, pf_keth_ruleq);
                if (tail)
                        rule->nr = tail->nr + 1;
                else
                        rule->nr = 0;

                TAILQ_INSERT_TAIL(ruleset->inactive.rules, rule, entries);

                PF_RULES_WUNLOCK();

#undef ERROUT
DIOCADDETHRULE_error:
                nvlist_destroy(nvl);
                free(nvlpacked, M_NVLIST);
                break;
        }

        case DIOCGETETHRULESETS: {
                struct epoch_tracker     et;
                struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
                nvlist_t                *nvl = NULL;
                void                    *nvlpacked = NULL;
                struct pf_keth_ruleset  *ruleset;
                struct pf_keth_anchor   *anchor;
                int                      nr = 0;

#define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULESETS_error, x)

                if (nv->len > pf_ioctl_maxcount)
                        ERROUT(ENOMEM);

                nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
                error = copyin(nv->data, nvlpacked, nv->len);
                if (error)
                        ERROUT(error);

                nvl = nvlist_unpack(nvlpacked, nv->len, 0);
                if (nvl == NULL)
                        ERROUT(EBADMSG);
                if (! nvlist_exists_string(nvl, "path"))
                        ERROUT(EBADMSG);

                NET_EPOCH_ENTER(et);

                if ((ruleset = pf_find_keth_ruleset(
                    nvlist_get_string(nvl, "path"))) == NULL) {
                        NET_EPOCH_EXIT(et);
                        ERROUT(ENOENT);
                }

                if (ruleset->anchor == NULL) {
                        RB_FOREACH(anchor, pf_keth_anchor_global, &V_pf_keth_anchors)
                                if (anchor->parent == NULL)
                                        nr++;
                } else {
                        RB_FOREACH(anchor, pf_keth_anchor_node,
                            &ruleset->anchor->children)
                                nr++;
                }

                NET_EPOCH_EXIT(et);

                nvlist_destroy(nvl);
                nvl = NULL;
                free(nvlpacked, M_NVLIST);
                nvlpacked = NULL;

                nvl = nvlist_create(0);
                if (nvl == NULL)
                        ERROUT(ENOMEM);

                nvlist_add_number(nvl, "nr", nr);

                nvlpacked = nvlist_pack(nvl, &nv->len);
                if (nvlpacked == NULL)
                        ERROUT(ENOMEM);

                if (nv->size == 0)
                        ERROUT(0);
                else if (nv->size < nv->len)
                        ERROUT(ENOSPC);

                error = copyout(nvlpacked, nv->data, nv->len);

#undef ERROUT
DIOCGETETHRULESETS_error:
                free(nvlpacked, M_NVLIST);
                nvlist_destroy(nvl);
                break;
        }

        case DIOCGETETHRULESET: {
                struct epoch_tracker     et;
                struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
                nvlist_t                *nvl = NULL;
                void                    *nvlpacked = NULL;
                struct pf_keth_ruleset  *ruleset;
                struct pf_keth_anchor   *anchor;
                int                      nr = 0, req_nr = 0;
                bool                     found = false;

#define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULESET_error, x)

                if (nv->len > pf_ioctl_maxcount)
                        ERROUT(ENOMEM);

                nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
                error = copyin(nv->data, nvlpacked, nv->len);
                if (error)
                        ERROUT(error);

                nvl = nvlist_unpack(nvlpacked, nv->len, 0);
                if (nvl == NULL)
                        ERROUT(EBADMSG);
                if (! nvlist_exists_string(nvl, "path"))
                        ERROUT(EBADMSG);
                if (! nvlist_exists_number(nvl, "nr"))
                        ERROUT(EBADMSG);

                req_nr = nvlist_get_number(nvl, "nr");

                NET_EPOCH_ENTER(et);

                if ((ruleset = pf_find_keth_ruleset(
                    nvlist_get_string(nvl, "path"))) == NULL) {
                        NET_EPOCH_EXIT(et);
                        ERROUT(ENOENT);
                }

                nvlist_destroy(nvl);
                nvl = NULL;
                free(nvlpacked, M_NVLIST);
                nvlpacked = NULL;

                nvl = nvlist_create(0);
                if (nvl == NULL) {
                        NET_EPOCH_EXIT(et);
                        ERROUT(ENOMEM);
                }

                if (ruleset->anchor == NULL) {
                        RB_FOREACH(anchor, pf_keth_anchor_global,
                            &V_pf_keth_anchors) {
                                if (anchor->parent == NULL && nr++ == req_nr) {
                                        found = true;
                                        break;
                                }
                        }
                } else {
                        RB_FOREACH(anchor, pf_keth_anchor_node,
                             &ruleset->anchor->children) {
                                if (nr++ == req_nr) {
                                        found = true;
                                        break;
                                }
                        }
                }

                NET_EPOCH_EXIT(et);
                if (found) {
                        nvlist_add_number(nvl, "nr", nr);
                        nvlist_add_string(nvl, "name", anchor->name);
                        if (ruleset->anchor)
                                nvlist_add_string(nvl, "path",
                                    ruleset->anchor->path);
                        else
                                nvlist_add_string(nvl, "path", "");
                } else {
                        ERROUT(EBUSY);
                }

                nvlpacked = nvlist_pack(nvl, &nv->len);
                if (nvlpacked == NULL)
                        ERROUT(ENOMEM);

                if (nv->size == 0)
                        ERROUT(0);
                else if (nv->size < nv->len)
                        ERROUT(ENOSPC);

                error = copyout(nvlpacked, nv->data, nv->len);

#undef ERROUT
DIOCGETETHRULESET_error:
                free(nvlpacked, M_NVLIST);
                nvlist_destroy(nvl);
                break;
        }

        case DIOCADDRULENV: {
                struct pfioc_nv *nv = (struct pfioc_nv *)addr;
                nvlist_t        *nvl = NULL;
                void            *nvlpacked = NULL;
                struct pf_krule *rule = NULL;
                const char      *anchor = "", *anchor_call = "";
                uint32_t         ticket = 0, pool_ticket = 0;

#define ERROUT(x)       ERROUT_IOCTL(DIOCADDRULENV_error, x)

                if (nv->len > pf_ioctl_maxcount)
                        ERROUT(ENOMEM);

                nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
                error = copyin(nv->data, nvlpacked, nv->len);
                if (error)
                        ERROUT(error);

                nvl = nvlist_unpack(nvlpacked, nv->len, 0);
                if (nvl == NULL)
                        ERROUT(EBADMSG);

                if (! nvlist_exists_number(nvl, "ticket"))
                        ERROUT(EINVAL);
                ticket = nvlist_get_number(nvl, "ticket");

                if (! nvlist_exists_number(nvl, "pool_ticket"))
                        ERROUT(EINVAL);
                pool_ticket = nvlist_get_number(nvl, "pool_ticket");

                if (! nvlist_exists_nvlist(nvl, "rule"))
                        ERROUT(EINVAL);

                rule = pf_krule_alloc();
                error = pf_nvrule_to_krule(nvlist_get_nvlist(nvl, "rule"),
                    rule);
                if (error)
                        ERROUT(error);

                if (nvlist_exists_string(nvl, "anchor"))
                        anchor = nvlist_get_string(nvl, "anchor");
                if (nvlist_exists_string(nvl, "anchor_call"))
                        anchor_call = nvlist_get_string(nvl, "anchor_call");

                if ((error = nvlist_error(nvl)))
                        ERROUT(error);

                /* Frees rule on error */
                error = pf_ioctl_addrule(rule, ticket, pool_ticket, anchor,
                    anchor_call, td->td_ucred->cr_ruid,
                    td->td_proc ? td->td_proc->p_pid : 0);

                nvlist_destroy(nvl);
                free(nvlpacked, M_NVLIST);
                break;
#undef ERROUT
DIOCADDRULENV_error:
                pf_krule_free(rule);
                nvlist_destroy(nvl);
                free(nvlpacked, M_NVLIST);

                break;
        }
        case DIOCADDRULE: {
                struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
                struct pf_krule         *rule;

                rule = pf_krule_alloc();
                error = pf_rule_to_krule(&pr->rule, rule);
                if (error != 0) {
                        pf_krule_free(rule);
                        goto fail;
                }

                pr->anchor[sizeof(pr->anchor) - 1] = '\0';

                /* Frees rule on error */
                error = pf_ioctl_addrule(rule, pr->ticket, pr->pool_ticket,
                    pr->anchor, pr->anchor_call, td->td_ucred->cr_ruid,
                    td->td_proc ? td->td_proc->p_pid : 0);
                break;
        }

        case DIOCGETRULES: {
                struct pfioc_rule       *pr = (struct pfioc_rule *)addr;

                pr->anchor[sizeof(pr->anchor) - 1] = '\0';

                error = pf_ioctl_getrules(pr);

                break;
        }

        case DIOCGETRULENV: {
                PF_RULES_RLOCK_TRACKER;
                struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
                nvlist_t                *nvrule = NULL;
                nvlist_t                *nvl = NULL;
                struct pf_kruleset      *ruleset;
                struct pf_krule         *rule;
                void                    *nvlpacked = NULL;
                int                      rs_num, nr;
                bool                     clear_counter = false;

#define ERROUT(x)       ERROUT_IOCTL(DIOCGETRULENV_error, x)
#define ERROUT_LOCKED(x) do {                   \
        if (clear_counter)                      \
                PF_RULES_WUNLOCK();             \
        else                                    \
                PF_RULES_RUNLOCK();             \
        ERROUT(x);                              \
} while (0)

                if (nv->len > pf_ioctl_maxcount)
                        ERROUT(ENOMEM);

                /* Copy the request in */
                nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
                error = copyin(nv->data, nvlpacked, nv->len);
                if (error)
                        ERROUT(error);

                nvl = nvlist_unpack(nvlpacked, nv->len, 0);
                if (nvl == NULL)
                        ERROUT(EBADMSG);

                if (! nvlist_exists_string(nvl, "anchor"))
                        ERROUT(EBADMSG);
                if (! nvlist_exists_number(nvl, "ruleset"))
                        ERROUT(EBADMSG);
                if (! nvlist_exists_number(nvl, "ticket"))
                        ERROUT(EBADMSG);
                if (! nvlist_exists_number(nvl, "nr"))
                        ERROUT(EBADMSG);

                if (nvlist_exists_bool(nvl, "clear_counter"))
                        clear_counter = nvlist_get_bool(nvl, "clear_counter");

                if (clear_counter && !(flags & FWRITE))
                        ERROUT(EACCES);

                nr = nvlist_get_number(nvl, "nr");

                if (clear_counter)
                        PF_RULES_WLOCK();
                else
                        PF_RULES_RLOCK();
                ruleset = pf_find_kruleset(nvlist_get_string(nvl, "anchor"));
                if (ruleset == NULL)
                        ERROUT_LOCKED(ENOENT);

                rs_num = pf_get_ruleset_number(nvlist_get_number(nvl, "ruleset"));
                if (rs_num >= PF_RULESET_MAX)
                        ERROUT_LOCKED(EINVAL);

                if (nvlist_get_number(nvl, "ticket") !=
                    ruleset->rules[rs_num].active.ticket)
                        ERROUT_LOCKED(EBUSY);

                if ((error = nvlist_error(nvl)))
                        ERROUT_LOCKED(error);

                rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
                while ((rule != NULL) && (rule->nr != nr))
                        rule = TAILQ_NEXT(rule, entries);
                if (rule == NULL)
                        ERROUT_LOCKED(EBUSY);

                nvrule = pf_krule_to_nvrule(rule);

                nvlist_destroy(nvl);
                nvl = nvlist_create(0);
                if (nvl == NULL)
                        ERROUT_LOCKED(ENOMEM);
                nvlist_add_number(nvl, "nr", nr);
                nvlist_add_nvlist(nvl, "rule", nvrule);
                nvlist_destroy(nvrule);
                nvrule = NULL;
                if (pf_kanchor_nvcopyout(ruleset, rule, nvl))
                        ERROUT_LOCKED(EBUSY);

                free(nvlpacked, M_NVLIST);
                nvlpacked = nvlist_pack(nvl, &nv->len);
                if (nvlpacked == NULL)
                        ERROUT_LOCKED(ENOMEM);

                if (nv->size == 0)
                        ERROUT_LOCKED(0);
                else if (nv->size < nv->len)
                        ERROUT_LOCKED(ENOSPC);

                if (clear_counter) {
                        pf_krule_clear_counters(rule);
                        PF_RULES_WUNLOCK();
                } else {
                        PF_RULES_RUNLOCK();
                }

                error = copyout(nvlpacked, nv->data, nv->len);

#undef ERROUT_LOCKED
#undef ERROUT
DIOCGETRULENV_error:
                free(nvlpacked, M_NVLIST);
                nvlist_destroy(nvrule);
                nvlist_destroy(nvl);

                break;
        }

        case DIOCCHANGERULE: {
                struct pfioc_rule       *pcr = (struct pfioc_rule *)addr;
                struct pf_kruleset      *ruleset;
                struct pf_krule         *oldrule = NULL, *newrule = NULL;
                struct pfi_kkif         *kif = NULL;
                struct pf_kpooladdr     *pa;
                u_int32_t                nr = 0;
                int                      rs_num;

                pcr->anchor[sizeof(pcr->anchor) - 1] = '\0';

                if (pcr->action < PF_CHANGE_ADD_HEAD ||
                    pcr->action > PF_CHANGE_GET_TICKET) {
                        error = EINVAL;
                        goto fail;
                }
                if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
                        error = EINVAL;
                        goto fail;
                }

                if (pcr->action != PF_CHANGE_REMOVE) {
                        newrule = pf_krule_alloc();
                        error = pf_rule_to_krule(&pcr->rule, newrule);
                        if (error != 0) {
                                pf_krule_free(newrule);
                                goto fail;
                        }

                        if ((error = pf_rule_checkaf(newrule))) {
                                pf_krule_free(newrule);
                                goto fail;
                        }
                        if (newrule->ifname[0])
                                kif = pf_kkif_create(M_WAITOK);
                        pf_counter_u64_init(&newrule->evaluations, M_WAITOK);
                        for (int i = 0; i < 2; i++) {
                                pf_counter_u64_init(&newrule->packets[i], M_WAITOK);
                                pf_counter_u64_init(&newrule->bytes[i], M_WAITOK);
                        }
                        newrule->states_cur = counter_u64_alloc(M_WAITOK);
                        newrule->states_tot = counter_u64_alloc(M_WAITOK);
                        for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++)
                                newrule->src_nodes[sn_type] = counter_u64_alloc(M_WAITOK);
                        newrule->cuid = td->td_ucred->cr_ruid;
                        newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
                        TAILQ_INIT(&newrule->nat.list);
                        TAILQ_INIT(&newrule->rdr.list);
                        TAILQ_INIT(&newrule->route.list);
                }
#define ERROUT(x)       ERROUT_IOCTL(DIOCCHANGERULE_error, x)

                PF_CONFIG_LOCK();
                PF_RULES_WLOCK();
#ifdef PF_WANT_32_TO_64_COUNTER
                if (newrule != NULL) {
                        LIST_INSERT_HEAD(&V_pf_allrulelist, newrule, allrulelist);
                        newrule->allrulelinked = true;
                        V_pf_allrulecount++;
                }
#endif

                if (!(pcr->action == PF_CHANGE_REMOVE ||
                    pcr->action == PF_CHANGE_GET_TICKET) &&
                    pcr->pool_ticket != V_ticket_pabuf)
                        ERROUT(EBUSY);

                ruleset = pf_find_kruleset(pcr->anchor);
                if (ruleset == NULL)
                        ERROUT(EINVAL);

                rs_num = pf_get_ruleset_number(pcr->rule.action);
                if (rs_num >= PF_RULESET_MAX)
                        ERROUT(EINVAL);

                /*
                 * XXXMJG: there is no guarantee that the ruleset was
                 * created by the usual route of calling DIOCXBEGIN.
                 * As a result it is possible the rule tree will not
                 * be allocated yet. Hack around it by doing it here.
                 * Note it is fine to let the tree persist in case of
                 * error as it will be freed down the road on future
                 * updates (if need be).
                 */
                if (ruleset->rules[rs_num].active.tree == NULL) {
                        ruleset->rules[rs_num].active.tree = pf_rule_tree_alloc(M_NOWAIT);
                        if (ruleset->rules[rs_num].active.tree == NULL) {
                                ERROUT(ENOMEM);
                        }
                }

                if (pcr->action == PF_CHANGE_GET_TICKET) {
                        pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
                        ERROUT(0);
                } else if (pcr->ticket !=
                            ruleset->rules[rs_num].active.ticket)
                                ERROUT(EINVAL);

                if (pcr->action != PF_CHANGE_REMOVE) {
                        if (newrule->ifname[0]) {
                                newrule->kif = pfi_kkif_attach(kif,
                                    newrule->ifname);
                                kif = NULL;
                                pfi_kkif_ref(newrule->kif);
                        } else
                                newrule->kif = NULL;

                        if (newrule->rtableid > 0 &&
                            newrule->rtableid >= rt_numfibs)
                                error = EBUSY;

#ifdef ALTQ
                        /* set queue IDs */
                        if (newrule->qname[0] != 0) {
                                if ((newrule->qid =
                                    pf_qname2qid(newrule->qname, true)) == 0)
                                        error = EBUSY;
                                else if (newrule->pqname[0] != 0) {
                                        if ((newrule->pqid =
                                            pf_qname2qid(newrule->pqname, true)) == 0)
                                                error = EBUSY;
                                } else
                                        newrule->pqid = newrule->qid;
                        }
#endif /* ALTQ */
                        if (newrule->tagname[0])
                                if ((newrule->tag =
                                    pf_tagname2tag(newrule->tagname)) == 0)
                                        error = EBUSY;
                        if (newrule->match_tagname[0])
                                if ((newrule->match_tag = pf_tagname2tag(
                                    newrule->match_tagname)) == 0)
                                        error = EBUSY;
                        if (newrule->rt && !newrule->direction)
                                error = EINVAL;
                        if (!newrule->log)
                                newrule->logif = 0;
                        if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
                                error = ENOMEM;
                        if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
                                error = ENOMEM;
                        if (pf_kanchor_setup(newrule, ruleset, pcr->anchor_call))
                                error = EINVAL;
                        for (int i = 0; i < 3; i++) {
                                TAILQ_FOREACH(pa, &V_pf_pabuf[i], entries)
                                        if (pa->addr.type == PF_ADDR_TABLE) {
                                                pa->addr.p.tbl =
                                                    pfr_attach_table(ruleset,
                                                    pa->addr.v.tblname);
                                                if (pa->addr.p.tbl == NULL)
                                                        error = ENOMEM;
                                        }
                        }

                        newrule->overload_tbl = NULL;
                        if (newrule->overload_tblname[0]) {
                                if ((newrule->overload_tbl = pfr_attach_table(
                                    ruleset, newrule->overload_tblname)) ==
                                    NULL)
                                        error = EINVAL;
                                else
                                        newrule->overload_tbl->pfrkt_flags |=
                                            PFR_TFLAG_ACTIVE;
                        }

                        pf_mv_kpool(&V_pf_pabuf[0], &newrule->nat.list);
                        pf_mv_kpool(&V_pf_pabuf[1], &newrule->rdr.list);
                        pf_mv_kpool(&V_pf_pabuf[2], &newrule->route.list);
                        if (((((newrule->action == PF_NAT) ||
                            (newrule->action == PF_RDR) ||
                            (newrule->action == PF_BINAT) ||
                            (newrule->rt > PF_NOPFROUTE)) &&
                            !newrule->anchor)) &&
                            (TAILQ_FIRST(&newrule->rdr.list) == NULL))
                                error = EINVAL;

                        if (error) {
                                pf_free_rule(newrule);
                                PF_RULES_WUNLOCK();
                                PF_CONFIG_UNLOCK();
                                goto fail;
                        }

                        newrule->nat.cur = TAILQ_FIRST(&newrule->nat.list);
                        newrule->rdr.cur = TAILQ_FIRST(&newrule->rdr.list);
                }
                pf_empty_kpool(&V_pf_pabuf[0]);
                pf_empty_kpool(&V_pf_pabuf[1]);
                pf_empty_kpool(&V_pf_pabuf[2]);

                if (pcr->action == PF_CHANGE_ADD_HEAD)
                        oldrule = TAILQ_FIRST(
                            ruleset->rules[rs_num].active.ptr);
                else if (pcr->action == PF_CHANGE_ADD_TAIL)
                        oldrule = TAILQ_LAST(
                            ruleset->rules[rs_num].active.ptr, pf_krulequeue);
                else {
                        oldrule = TAILQ_FIRST(
                            ruleset->rules[rs_num].active.ptr);
                        while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
                                oldrule = TAILQ_NEXT(oldrule, entries);
                        if (oldrule == NULL) {
                                if (newrule != NULL)
                                        pf_free_rule(newrule);
                                PF_RULES_WUNLOCK();
                                PF_CONFIG_UNLOCK();
                                error = EINVAL;
                                goto fail;
                        }
                }

                if (pcr->action == PF_CHANGE_REMOVE) {
                        pf_unlink_rule(ruleset->rules[rs_num].active.ptr,
                            oldrule);
                        RB_REMOVE(pf_krule_global,
                            ruleset->rules[rs_num].active.tree, oldrule);
                        ruleset->rules[rs_num].active.rcount--;
                } else {
                        pf_hash_rule(newrule);
                        if (RB_INSERT(pf_krule_global,
                            ruleset->rules[rs_num].active.tree, newrule) != NULL) {
                                pf_free_rule(newrule);
                                PF_RULES_WUNLOCK();
                                PF_CONFIG_UNLOCK();
                                error = EEXIST;
                                goto fail;
                        }

                        if (oldrule == NULL)
                                TAILQ_INSERT_TAIL(
                                    ruleset->rules[rs_num].active.ptr,
                                    newrule, entries);
                        else if (pcr->action == PF_CHANGE_ADD_HEAD ||
                            pcr->action == PF_CHANGE_ADD_BEFORE)
                                TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
                        else
                                TAILQ_INSERT_AFTER(
                                    ruleset->rules[rs_num].active.ptr,
                                    oldrule, newrule, entries);
                        ruleset->rules[rs_num].active.rcount++;
                }

                nr = 0;
                TAILQ_FOREACH(oldrule,
                    ruleset->rules[rs_num].active.ptr, entries)
                        oldrule->nr = nr++;

                ruleset->rules[rs_num].active.ticket++;

                pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
                pf_remove_if_empty_kruleset(ruleset);

                PF_RULES_WUNLOCK();
                PF_CONFIG_UNLOCK();
                break;

#undef ERROUT
DIOCCHANGERULE_error:
                PF_RULES_WUNLOCK();
                PF_CONFIG_UNLOCK();
                pf_krule_free(newrule);
                pf_kkif_free(kif);
                break;
        }

        case DIOCCLRSTATESNV: {
                error = pf_clearstates_nv((struct pfioc_nv *)addr);
                break;
        }

        case DIOCKILLSTATESNV: {
                error = pf_killstates_nv((struct pfioc_nv *)addr);
                break;
        }

        case DIOCADDSTATE: {
                struct pfioc_state              *ps = (struct pfioc_state *)addr;
                struct pfsync_state_1301        *sp = &ps->state;

                if (sp->timeout >= PFTM_MAX) {
                        error = EINVAL;
                        goto fail;
                }
                if (V_pfsync_state_import_ptr != NULL) {
                        PF_RULES_RLOCK();
                        error = V_pfsync_state_import_ptr(
                            (union pfsync_state_union *)sp, PFSYNC_SI_IOCTL,
                            PFSYNC_MSG_VERSION_1301);
                        PF_RULES_RUNLOCK();
                } else
                        error = EOPNOTSUPP;
                break;
        }

        case DIOCGETSTATE: {
                struct pfioc_state      *ps = (struct pfioc_state *)addr;
                struct pf_kstate        *s;

                s = pf_find_state_byid(ps->state.id, ps->state.creatorid);
                if (s == NULL) {
                        error = ENOENT;
                        goto fail;
                }

                pfsync_state_export_1301(&ps->state, s);
                PF_STATE_UNLOCK(s);
                break;
        }

        case DIOCGETSTATENV: {
                error = pf_getstate((struct pfioc_nv *)addr);
                break;
        }

#ifdef COMPAT_FREEBSD14
        case DIOCGETSTATES: {
                struct pfioc_states     *ps = (struct pfioc_states *)addr;
                struct pf_kstate        *s;
                struct pfsync_state_1301        *pstore, *p;
                int                      i, nr;
                size_t                   slice_count = 16, count;
                void                    *out;

                if (ps->ps_len <= 0) {
                        nr = uma_zone_get_cur(V_pf_state_z);
                        ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
                        break;
                }

                out = ps->ps_states;
                pstore = mallocarray(slice_count,
                    sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO);
                nr = 0;

                for (i = 0; i <= V_pf_hashmask; i++) {
                        struct pf_idhash *ih = &V_pf_idhash[i];

DIOCGETSTATES_retry:
                        p = pstore;

                        if (LIST_EMPTY(&ih->states))
                                continue;

                        PF_HASHROW_LOCK(ih);
                        count = 0;
                        LIST_FOREACH(s, &ih->states, entry) {
                                if (s->timeout == PFTM_UNLINKED)
                                        continue;
                                count++;
                        }

                        if (count > slice_count) {
                                PF_HASHROW_UNLOCK(ih);
                                free(pstore, M_PF);
                                slice_count = count * 2;
                                pstore = mallocarray(slice_count,
                                    sizeof(struct pfsync_state_1301), M_PF,
                                    M_WAITOK | M_ZERO);
                                goto DIOCGETSTATES_retry;
                        }

                        if ((nr+count) * sizeof(*p) > ps->ps_len) {
                                PF_HASHROW_UNLOCK(ih);
                                goto DIOCGETSTATES_full;
                        }

                        LIST_FOREACH(s, &ih->states, entry) {
                                if (s->timeout == PFTM_UNLINKED)
                                        continue;

                                pfsync_state_export_1301(p, s);
                                p++;
                                nr++;
                        }
                        PF_HASHROW_UNLOCK(ih);
                        error = copyout(pstore, out,
                            sizeof(struct pfsync_state_1301) * count);
                        if (error) {
                                free(pstore, M_PF);
                                goto fail;
                        }
                        out = ps->ps_states + nr;
                }
DIOCGETSTATES_full:
                ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
                free(pstore, M_PF);

                break;
        }

        case DIOCGETSTATESV2: {
                struct pfioc_states_v2  *ps = (struct pfioc_states_v2 *)addr;
                struct pf_kstate        *s;
                struct pf_state_export  *pstore, *p;
                int i, nr;
                size_t slice_count = 16, count;
                void *out;

                if (ps->ps_req_version > PF_STATE_VERSION) {
                        error = ENOTSUP;
                        goto fail;
                }

                if (ps->ps_len <= 0) {
                        nr = uma_zone_get_cur(V_pf_state_z);
                        ps->ps_len = sizeof(struct pf_state_export) * nr;
                        break;
                }

                out = ps->ps_states;
                pstore = mallocarray(slice_count,
                    sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO);
                nr = 0;

                for (i = 0; i <= V_pf_hashmask; i++) {
                        struct pf_idhash *ih = &V_pf_idhash[i];

DIOCGETSTATESV2_retry:
                        p = pstore;

                        if (LIST_EMPTY(&ih->states))
                                continue;

                        PF_HASHROW_LOCK(ih);
                        count = 0;
                        LIST_FOREACH(s, &ih->states, entry) {
                                if (s->timeout == PFTM_UNLINKED)
                                        continue;
                                count++;
                        }

                        if (count > slice_count) {
                                PF_HASHROW_UNLOCK(ih);
                                free(pstore, M_PF);
                                slice_count = count * 2;
                                pstore = mallocarray(slice_count,
                                    sizeof(struct pf_state_export), M_PF,
                                    M_WAITOK | M_ZERO);
                                goto DIOCGETSTATESV2_retry;
                        }

                        if ((nr+count) * sizeof(*p) > ps->ps_len) {
                                PF_HASHROW_UNLOCK(ih);
                                goto DIOCGETSTATESV2_full;
                        }

                        LIST_FOREACH(s, &ih->states, entry) {
                                if (s->timeout == PFTM_UNLINKED)
                                        continue;

                                pf_state_export(p, s);
                                p++;
                                nr++;
                        }
                        PF_HASHROW_UNLOCK(ih);
                        error = copyout(pstore, out,
                            sizeof(struct pf_state_export) * count);
                        if (error) {
                                free(pstore, M_PF);
                                goto fail;
                        }
                        out = ps->ps_states + nr;
                }
DIOCGETSTATESV2_full:
                ps->ps_len = nr * sizeof(struct pf_state_export);
                free(pstore, M_PF);

                break;
        }
#endif
        case DIOCGETSTATUSNV: {
                error = pf_getstatus((struct pfioc_nv *)addr);
                break;
        }

        case DIOCSETSTATUSIF: {
                struct pfioc_if *pi = (struct pfioc_if *)addr;

                if (pi->ifname[0] == 0) {
                        bzero(V_pf_status.ifname, IFNAMSIZ);
                        break;
                }
                PF_RULES_WLOCK();
                error = pf_user_strcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCCLRSTATUS: {
                pf_ioctl_clear_status();
                break;
        }

        case DIOCNATLOOK: {
                struct pfioc_natlook    *pnl = (struct pfioc_natlook *)addr;

                error = pf_ioctl_natlook(pnl);
                break;
        }

        case DIOCSETTIMEOUT: {
                struct pfioc_tm *pt = (struct pfioc_tm *)addr;

                error = pf_ioctl_set_timeout(pt->timeout, pt->seconds,
                    &pt->seconds);
                break;
        }

        case DIOCGETTIMEOUT: {
                struct pfioc_tm *pt = (struct pfioc_tm *)addr;

                error = pf_ioctl_get_timeout(pt->timeout, &pt->seconds);
                break;
        }

        case DIOCGETLIMIT: {
                struct pfioc_limit      *pl = (struct pfioc_limit *)addr;

                error = pf_ioctl_get_limit(pl->index, &pl->limit);
                break;
        }

        case DIOCSETLIMIT: {
                struct pfioc_limit      *pl = (struct pfioc_limit *)addr;
                unsigned int old_limit;

                error = pf_ioctl_set_limit(pl->index, pl->limit, &old_limit);
                pl->limit = old_limit;
                break;
        }

        case DIOCSETDEBUG: {
                u_int32_t       *level = (u_int32_t *)addr;

                PF_RULES_WLOCK();
                V_pf_status.debug = *level;
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCCLRRULECTRS: {
                /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
                struct pf_kruleset      *ruleset = &pf_main_ruleset;
                struct pf_krule         *rule;

                PF_RULES_WLOCK();
                TAILQ_FOREACH(rule,
                    ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
                        pf_counter_u64_zero(&rule->evaluations);
                        for (int i = 0; i < 2; i++) {
                                pf_counter_u64_zero(&rule->packets[i]);
                                pf_counter_u64_zero(&rule->bytes[i]);
                        }
                }
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCGIFSPEEDV0:
        case DIOCGIFSPEEDV1: {
                struct pf_ifspeed_v1    *psp = (struct pf_ifspeed_v1 *)addr;
                struct pf_ifspeed_v1    ps;
                struct ifnet            *ifp;

                if (psp->ifname[0] == '\0') {
                        error = EINVAL;
                        goto fail;
                }

                error = pf_user_strcpy(ps.ifname, psp->ifname, IFNAMSIZ);
                if (error != 0)
                        goto fail;
                ifp = ifunit(ps.ifname);
                if (ifp != NULL) {
                        psp->baudrate32 =
                            (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX);
                        if (cmd == DIOCGIFSPEEDV1)
                                psp->baudrate = ifp->if_baudrate;
                } else {
                        error = EINVAL;
                }
                break;
        }

#ifdef ALTQ
        case DIOCSTARTALTQ: {
                struct pf_altq          *altq;

                PF_RULES_WLOCK();
                /* enable all altq interfaces on active list */
                TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
                        if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
                                error = pf_enable_altq(altq);
                                if (error != 0)
                                        break;
                        }
                }
                if (error == 0)
                        V_pf_altq_running = 1;
                PF_RULES_WUNLOCK();
                DPFPRINTF(PF_DEBUG_MISC, "altq: started");
                break;
        }

        case DIOCSTOPALTQ: {
                struct pf_altq          *altq;

                PF_RULES_WLOCK();
                /* disable all altq interfaces on active list */
                TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
                        if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
                                error = pf_disable_altq(altq);
                                if (error != 0)
                                        break;
                        }
                }
                if (error == 0)
                        V_pf_altq_running = 0;
                PF_RULES_WUNLOCK();
                DPFPRINTF(PF_DEBUG_MISC, "altq: stopped");
                break;
        }

        case DIOCADDALTQV0:
        case DIOCADDALTQV1: {
                struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
                struct pf_altq          *altq, *a;
                struct ifnet            *ifp;

                altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO);
                error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd));
                if (error)
                        goto fail;
                altq->local_flags = 0;

                PF_RULES_WLOCK();
                if (pa->ticket != V_ticket_altqs_inactive) {
                        PF_RULES_WUNLOCK();
                        free(altq, M_PFALTQ);
                        error = EBUSY;
                        goto fail;
                }

                /*
                 * if this is for a queue, find the discipline and
                 * copy the necessary fields
                 */
                if (altq->qname[0] != 0) {
                        if ((altq->qid = pf_qname2qid(altq->qname, true)) == 0) {
                                PF_RULES_WUNLOCK();
                                error = EBUSY;
                                free(altq, M_PFALTQ);
                                goto fail;
                        }
                        altq->altq_disc = NULL;
                        TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) {
                                if (strncmp(a->ifname, altq->ifname,
                                    IFNAMSIZ) == 0) {
                                        altq->altq_disc = a->altq_disc;
                                        break;
                                }
                        }
                }

                if ((ifp = ifunit(altq->ifname)) == NULL)
                        altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
                else
                        error = altq_add(ifp, altq);

                if (error) {
                        PF_RULES_WUNLOCK();
                        free(altq, M_PFALTQ);
                        goto fail;
                }

                if (altq->qname[0] != 0)
                        TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
                else
                        TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries);
                /* version error check done on import above */
                pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCGETALTQSV0:
        case DIOCGETALTQSV1: {
                struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
                struct pf_altq          *altq;

                PF_RULES_RLOCK();
                pa->nr = 0;
                TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries)
                        pa->nr++;
                TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
                        pa->nr++;
                pa->ticket = V_ticket_altqs_active;
                PF_RULES_RUNLOCK();
                break;
        }

        case DIOCGETALTQV0:
        case DIOCGETALTQV1: {
                struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
                struct pf_altq          *altq;

                PF_RULES_RLOCK();
                if (pa->ticket != V_ticket_altqs_active) {
                        PF_RULES_RUNLOCK();
                        error = EBUSY;
                        goto fail;
                }
                altq = pf_altq_get_nth_active(pa->nr);
                if (altq == NULL) {
                        PF_RULES_RUNLOCK();
                        error = EBUSY;
                        goto fail;
                }
                pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
                PF_RULES_RUNLOCK();
                break;
        }

        case DIOCCHANGEALTQV0:
        case DIOCCHANGEALTQV1:
                /* CHANGEALTQ not supported yet! */
                error = ENODEV;
                break;

        case DIOCGETQSTATSV0:
        case DIOCGETQSTATSV1: {
                struct pfioc_qstats_v1  *pq = (struct pfioc_qstats_v1 *)addr;
                struct pf_altq          *altq;
                int                      nbytes;
                u_int32_t                version;

                PF_RULES_RLOCK();
                if (pq->ticket != V_ticket_altqs_active) {
                        PF_RULES_RUNLOCK();
                        error = EBUSY;
                        goto fail;
                }
                nbytes = pq->nbytes;
                altq = pf_altq_get_nth_active(pq->nr);
                if (altq == NULL) {
                        PF_RULES_RUNLOCK();
                        error = EBUSY;
                        goto fail;
                }

                if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
                        PF_RULES_RUNLOCK();
                        error = ENXIO;
                        goto fail;
                }
                PF_RULES_RUNLOCK();
                if (cmd == DIOCGETQSTATSV0)
                        version = 0;  /* DIOCGETQSTATSV0 means stats struct v0 */
                else
                        version = pq->version;
                error = altq_getqstats(altq, pq->buf, &nbytes, version);
                if (error == 0) {
                        pq->scheduler = altq->scheduler;
                        pq->nbytes = nbytes;
                }
                break;
        }
#endif /* ALTQ */

        case DIOCBEGINADDRS: {
                struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;

                error = pf_ioctl_begin_addrs(&pp->ticket);
                break;
        }

        case DIOCADDADDR: {
                struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
                struct pf_nl_pooladdr npp = {};

                npp.which = PF_RDR;
                memcpy(&npp, pp, sizeof(*pp));
                error = pf_ioctl_add_addr(&npp);
                break;
        }

        case DIOCGETADDRS: {
                struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
                struct pf_nl_pooladdr npp = {};

                npp.which = PF_RDR;
                memcpy(&npp, pp, sizeof(*pp));
                error = pf_ioctl_get_addrs(&npp);
                memcpy(pp, &npp, sizeof(*pp));

                break;
        }

        case DIOCGETADDR: {
                struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
                struct pf_nl_pooladdr npp = {};

                npp.which = PF_RDR;
                memcpy(&npp, pp, sizeof(*pp));
                error = pf_ioctl_get_addr(&npp);
                memcpy(pp, &npp, sizeof(*pp));

                break;
        }

        case DIOCCHANGEADDR: {
                struct pfioc_pooladdr   *pca = (struct pfioc_pooladdr *)addr;
                struct pf_kpool         *pool;
                struct pf_kpooladdr     *oldpa = NULL, *newpa = NULL;
                struct pf_kruleset      *ruleset;
                struct pfi_kkif         *kif = NULL;

                pca->anchor[sizeof(pca->anchor) - 1] = '\0';

                if (pca->action < PF_CHANGE_ADD_HEAD ||
                    pca->action > PF_CHANGE_REMOVE) {
                        error = EINVAL;
                        goto fail;
                }
                if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
                    pca->addr.addr.type != PF_ADDR_DYNIFTL &&
                    pca->addr.addr.type != PF_ADDR_TABLE) {
                        error = EINVAL;
                        goto fail;
                }
                if (pca->addr.addr.p.dyn != NULL) {
                        error = EINVAL;
                        goto fail;
                }

                if (pca->action != PF_CHANGE_REMOVE) {
#ifndef INET
                        if (pca->af == AF_INET) {
                                error = EAFNOSUPPORT;
                                goto fail;
                        }
#endif /* INET */
#ifndef INET6
                        if (pca->af == AF_INET6) {
                                error = EAFNOSUPPORT;
                                goto fail;
                        }
#endif /* INET6 */
                        newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK);
                        bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
                        if (newpa->ifname[0])
                                kif = pf_kkif_create(M_WAITOK);
                        newpa->kif = NULL;
                }
#define ERROUT(x)       ERROUT_IOCTL(DIOCCHANGEADDR_error, x)
                PF_RULES_WLOCK();
                ruleset = pf_find_kruleset(pca->anchor);
                if (ruleset == NULL)
                        ERROUT(EBUSY);

                pool = pf_get_kpool(pca->anchor, pca->ticket, pca->r_action,
                    pca->r_num, pca->r_last, 1, 1, PF_RDR);
                if (pool == NULL)
                        ERROUT(EBUSY);

                if (pca->action != PF_CHANGE_REMOVE) {
                        if (newpa->ifname[0]) {
                                newpa->kif = pfi_kkif_attach(kif, newpa->ifname);
                                pfi_kkif_ref(newpa->kif);
                                kif = NULL;
                        }

                        switch (newpa->addr.type) {
                        case PF_ADDR_DYNIFTL:
                                error = pfi_dynaddr_setup(&newpa->addr,
                                    pca->af);
                                break;
                        case PF_ADDR_TABLE:
                                newpa->addr.p.tbl = pfr_attach_table(ruleset,
                                    newpa->addr.v.tblname);
                                if (newpa->addr.p.tbl == NULL)
                                        error = ENOMEM;
                                break;
                        }
                        if (error)
                                goto DIOCCHANGEADDR_error;
                }

                switch (pca->action) {
                case PF_CHANGE_ADD_HEAD:
                        oldpa = TAILQ_FIRST(&pool->list);
                        break;
                case PF_CHANGE_ADD_TAIL:
                        oldpa = TAILQ_LAST(&pool->list, pf_kpalist);
                        break;
                default:
                        oldpa = TAILQ_FIRST(&pool->list);
                        for (int i = 0; oldpa && i < pca->nr; i++)
                                oldpa = TAILQ_NEXT(oldpa, entries);

                        if (oldpa == NULL)
                                ERROUT(EINVAL);
                }

                if (pca->action == PF_CHANGE_REMOVE) {
                        TAILQ_REMOVE(&pool->list, oldpa, entries);
                        switch (oldpa->addr.type) {
                        case PF_ADDR_DYNIFTL:
                                pfi_dynaddr_remove(oldpa->addr.p.dyn);
                                break;
                        case PF_ADDR_TABLE:
                                pfr_detach_table(oldpa->addr.p.tbl);
                                break;
                        }
                        if (oldpa->kif)
                                pfi_kkif_unref(oldpa->kif);
                        free(oldpa, M_PFRULE);
                } else {
                        if (oldpa == NULL)
                                TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
                        else if (pca->action == PF_CHANGE_ADD_HEAD ||
                            pca->action == PF_CHANGE_ADD_BEFORE)
                                TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
                        else
                                TAILQ_INSERT_AFTER(&pool->list, oldpa,
                                    newpa, entries);
                }

                pool->cur = TAILQ_FIRST(&pool->list);
                pf_addrcpy(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
                PF_RULES_WUNLOCK();
                break;

#undef ERROUT
DIOCCHANGEADDR_error:
                if (newpa != NULL) {
                        if (newpa->kif)
                                pfi_kkif_unref(newpa->kif);
                        free(newpa, M_PFRULE);
                }
                PF_RULES_WUNLOCK();
                pf_kkif_free(kif);
                break;
        }

        case DIOCGETRULESETS: {
                struct pfioc_ruleset    *pr = (struct pfioc_ruleset *)addr;

                pr->path[sizeof(pr->path) - 1] = '\0';

                error = pf_ioctl_get_rulesets(pr);
                break;
        }

        case DIOCGETRULESET: {
                struct pfioc_ruleset    *pr = (struct pfioc_ruleset *)addr;

                pr->path[sizeof(pr->path) - 1] = '\0';

                error = pf_ioctl_get_ruleset(pr);
                break;
        }

        case DIOCRCLRTABLES: {
                struct pfioc_table *io = (struct pfioc_table *)addr;

                if (io->pfrio_esize != 0) {
                        error = ENODEV;
                        goto fail;
                }
                if (strnlen(io->pfrio_table.pfrt_anchor, MAXPATHLEN)
                    == MAXPATHLEN) {
                        error = EINVAL;
                        goto fail;
                }
                if (strnlen(io->pfrio_table.pfrt_name, PF_TABLE_NAME_SIZE)
                    == PF_TABLE_NAME_SIZE) {
                        error = EINVAL;
                        goto fail;
                }

                PF_RULES_WLOCK();
                error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
                    io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCRADDTABLES: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_table *pfrts;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_table)) {
                        error = ENODEV;
                        goto fail;
                }

                if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
                        error = ENOMEM;
                        goto fail;
                }

                totlen = io->pfrio_size * sizeof(struct pfr_table);
                pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfrts, totlen);
                if (error) {
                        free(pfrts, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                error = pfr_add_tables(pfrts, io->pfrio_size,
                    &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                free(pfrts, M_PF);
                break;
        }

        case DIOCRDELTABLES: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_table *pfrts;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_table)) {
                        error = ENODEV;
                        goto fail;
                }

                if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
                        error = ENOMEM;
                        goto fail;
                }

                totlen = io->pfrio_size * sizeof(struct pfr_table);
                pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfrts, totlen);
                if (error) {
                        free(pfrts, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                error = pfr_del_tables(pfrts, io->pfrio_size,
                    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                free(pfrts, M_PF);
                break;
        }

        case DIOCRGETTABLES: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_table *pfrts;
                size_t totlen;
                int n;

                if (io->pfrio_esize != sizeof(struct pfr_table)) {
                        error = ENODEV;
                        goto fail;
                }
                PF_RULES_RLOCK();
                n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
                if (n < 0) {
                        PF_RULES_RUNLOCK();
                        error = EINVAL;
                        goto fail;
                }
                io->pfrio_size = min(io->pfrio_size, n);

                totlen = io->pfrio_size * sizeof(struct pfr_table);

                pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
                    M_PF, M_NOWAIT | M_ZERO);
                if (pfrts == NULL) {
                        error = ENOMEM;
                        PF_RULES_RUNLOCK();
                        goto fail;
                }
                error = pfr_get_tables(&io->pfrio_table, pfrts,
                    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_RUNLOCK();
                if (error == 0)
                        error = copyout(pfrts, io->pfrio_buffer, totlen);
                free(pfrts, M_PF);
                break;
        }

        case DIOCRGETTSTATS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_tstats *pfrtstats;
                size_t totlen;
                int n;

                if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
                        error = ENODEV;
                        goto fail;
                }
                PF_TABLE_STATS_LOCK();
                PF_RULES_RLOCK();
                n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
                if (n < 0) {
                        PF_RULES_RUNLOCK();
                        PF_TABLE_STATS_UNLOCK();
                        error = EINVAL;
                        goto fail;
                }
                io->pfrio_size = min(io->pfrio_size, n);

                totlen = io->pfrio_size * sizeof(struct pfr_tstats);
                pfrtstats = mallocarray(io->pfrio_size,
                    sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO);
                if (pfrtstats == NULL) {
                        error = ENOMEM;
                        PF_RULES_RUNLOCK();
                        PF_TABLE_STATS_UNLOCK();
                        goto fail;
                }
                error = pfr_get_tstats(&io->pfrio_table, pfrtstats,
                    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_RUNLOCK();
                PF_TABLE_STATS_UNLOCK();
                if (error == 0)
                        error = copyout(pfrtstats, io->pfrio_buffer, totlen);
                free(pfrtstats, M_PF);
                break;
        }

        case DIOCRCLRTSTATS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_table *pfrts;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_table)) {
                        error = ENODEV;
                        goto fail;
                }

                if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
                        /* We used to count tables and use the minimum required
                         * size, so we didn't fail on overly large requests.
                         * Keep doing so. */
                        io->pfrio_size = pf_ioctl_maxcount;
                        goto fail;
                }

                totlen = io->pfrio_size * sizeof(struct pfr_table);
                pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfrts, totlen);
                if (error) {
                        free(pfrts, M_PF);
                        goto fail;
                }

                PF_TABLE_STATS_LOCK();
                PF_RULES_RLOCK();
                error = pfr_clr_tstats(pfrts, io->pfrio_size,
                    &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_RUNLOCK();
                PF_TABLE_STATS_UNLOCK();
                free(pfrts, M_PF);
                break;
        }

        case DIOCRSETTFLAGS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_table *pfrts;
                size_t totlen;
                int n;

                if (io->pfrio_esize != sizeof(struct pfr_table)) {
                        error = ENODEV;
                        goto fail;
                }

                PF_RULES_RLOCK();
                n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
                if (n < 0) {
                        PF_RULES_RUNLOCK();
                        error = EINVAL;
                        goto fail;
                }

                io->pfrio_size = min(io->pfrio_size, n);
                PF_RULES_RUNLOCK();

                totlen = io->pfrio_size * sizeof(struct pfr_table);
                pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfrts, totlen);
                if (error) {
                        free(pfrts, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                error = pfr_set_tflags(pfrts, io->pfrio_size,
                    io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
                    &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                free(pfrts, M_PF);
                break;
        }

        case DIOCRCLRADDRS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;

                if (io->pfrio_esize != 0) {
                        error = ENODEV;
                        goto fail;
                }
                PF_RULES_WLOCK();
                error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
                    io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCRADDADDRS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_addr *pfras;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_addr)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->pfrio_size < 0 ||
                    io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = io->pfrio_size * sizeof(struct pfr_addr);
                pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfras, totlen);
                if (error) {
                        free(pfras, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                io->pfrio_nadd = 0;
                error = pfr_add_addrs(&io->pfrio_table, pfras,
                    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
                    PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
                        error = copyout(pfras, io->pfrio_buffer, totlen);
                free(pfras, M_PF);
                break;
        }

        case DIOCRDELADDRS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_addr *pfras;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_addr)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->pfrio_size < 0 ||
                    io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = io->pfrio_size * sizeof(struct pfr_addr);
                pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfras, totlen);
                if (error) {
                        free(pfras, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                error = pfr_del_addrs(&io->pfrio_table, pfras,
                    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
                    PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
                        error = copyout(pfras, io->pfrio_buffer, totlen);
                free(pfras, M_PF);
                break;
        }

        case DIOCRSETADDRS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_addr *pfras;
                size_t totlen, count;

                if (io->pfrio_esize != sizeof(struct pfr_addr)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->pfrio_size < 0 || io->pfrio_size2 < 0) {
                        error = EINVAL;
                        goto fail;
                }
                count = max(io->pfrio_size, io->pfrio_size2);
                if (count > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = count * sizeof(struct pfr_addr);
                pfras = mallocarray(count, sizeof(struct pfr_addr), M_PF,
                    M_WAITOK);
                error = copyin(io->pfrio_buffer, pfras, totlen);
                if (error) {
                        free(pfras, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                error = pfr_set_addrs(&io->pfrio_table, pfras,
                    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
                    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
                    PFR_FLAG_START | PFR_FLAG_DONE | PFR_FLAG_USERIOCTL, 0);
                PF_RULES_WUNLOCK();
                if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
                        error = copyout(pfras, io->pfrio_buffer, totlen);
                free(pfras, M_PF);
                break;
        }

        case DIOCRGETADDRS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_addr *pfras;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_addr)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->pfrio_size < 0 ||
                    io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = io->pfrio_size * sizeof(struct pfr_addr);
                pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
                    M_PF, M_WAITOK | M_ZERO);
                PF_RULES_RLOCK();
                error = pfr_get_addrs(&io->pfrio_table, pfras,
                    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_RUNLOCK();
                if (error == 0)
                        error = copyout(pfras, io->pfrio_buffer, totlen);
                free(pfras, M_PF);
                break;
        }

        case DIOCRGETASTATS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_astats *pfrastats;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_astats)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->pfrio_size < 0 ||
                    io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = io->pfrio_size * sizeof(struct pfr_astats);
                pfrastats = mallocarray(io->pfrio_size,
                    sizeof(struct pfr_astats), M_PF, M_WAITOK | M_ZERO);
                PF_RULES_RLOCK();
                error = pfr_get_astats(&io->pfrio_table, pfrastats,
                    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_RUNLOCK();
                if (error == 0)
                        error = copyout(pfrastats, io->pfrio_buffer, totlen);
                free(pfrastats, M_PF);
                break;
        }

        case DIOCRCLRASTATS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_addr *pfras;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_addr)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->pfrio_size < 0 ||
                    io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = io->pfrio_size * sizeof(struct pfr_addr);
                pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfras, totlen);
                if (error) {
                        free(pfras, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                error = pfr_clr_astats(&io->pfrio_table, pfras,
                    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
                    PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
                        error = copyout(pfras, io->pfrio_buffer, totlen);
                free(pfras, M_PF);
                break;
        }

        case DIOCRTSTADDRS: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_addr *pfras;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_addr)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->pfrio_size < 0 ||
                    io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = io->pfrio_size * sizeof(struct pfr_addr);
                pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfras, totlen);
                if (error) {
                        free(pfras, M_PF);
                        goto fail;
                }
                PF_RULES_RLOCK();
                error = pfr_tst_addrs(&io->pfrio_table, pfras,
                    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
                    PFR_FLAG_USERIOCTL);
                PF_RULES_RUNLOCK();
                if (error == 0)
                        error = copyout(pfras, io->pfrio_buffer, totlen);
                free(pfras, M_PF);
                break;
        }

        case DIOCRINADEFINE: {
                struct pfioc_table *io = (struct pfioc_table *)addr;
                struct pfr_addr *pfras;
                size_t totlen;

                if (io->pfrio_esize != sizeof(struct pfr_addr)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->pfrio_size < 0 ||
                    io->pfrio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = io->pfrio_size * sizeof(struct pfr_addr);
                pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
                    M_PF, M_WAITOK);
                error = copyin(io->pfrio_buffer, pfras, totlen);
                if (error) {
                        free(pfras, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                error = pfr_ina_define(&io->pfrio_table, pfras,
                    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
                    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
                PF_RULES_WUNLOCK();
                free(pfras, M_PF);
                break;
        }

        case DIOCOSFPADD: {
                struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
                PF_RULES_WLOCK();
                error = pf_osfp_add(io);
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCOSFPGET: {
                struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
                PF_RULES_RLOCK();
                error = pf_osfp_get(io);
                PF_RULES_RUNLOCK();
                break;
        }

        case DIOCXBEGIN: {
                struct pfioc_trans      *io = (struct pfioc_trans *)addr;
                struct pfioc_trans_e    *ioes, *ioe;
                size_t                   totlen;
                int                      i;

                if (io->esize != sizeof(*ioe)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->size < 0 ||
                    io->size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = sizeof(struct pfioc_trans_e) * io->size;
                ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
                    M_PF, M_WAITOK);
                error = copyin(io->array, ioes, totlen);
                if (error) {
                        free(ioes, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
                        ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
                        switch (ioe->rs_num) {
                        case PF_RULESET_ETH:
                                if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail;
                                }
                                break;
#ifdef ALTQ
                        case PF_RULESET_ALTQ:
                                if (ioe->anchor[0]) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        error = EINVAL;
                                        goto fail;
                                }
                                if ((error = pf_begin_altq(&ioe->ticket))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail;
                                }
                                break;
#endif /* ALTQ */
                        case PF_RULESET_TABLE:
                            {
                                struct pfr_table table;

                                bzero(&table, sizeof(table));
                                strlcpy(table.pfrt_anchor, ioe->anchor,
                                    sizeof(table.pfrt_anchor));
                                if ((error = pfr_ina_begin(&table,
                                    &ioe->ticket, NULL, 0))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail;
                                }
                                break;
                            }
                        default:
                                if ((error = pf_begin_rules(&ioe->ticket,
                                    ioe->rs_num, ioe->anchor))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail;
                                }
                                break;
                        }
                }
                PF_RULES_WUNLOCK();
                error = copyout(ioes, io->array, totlen);
                free(ioes, M_PF);
                break;
        }

        case DIOCXROLLBACK: {
                struct pfioc_trans      *io = (struct pfioc_trans *)addr;
                struct pfioc_trans_e    *ioe, *ioes;
                size_t                   totlen;
                int                      i;

                if (io->esize != sizeof(*ioe)) {
                        error = ENODEV;
                        goto fail;
                }
                if (io->size < 0 ||
                    io->size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
                        error = EINVAL;
                        goto fail;
                }
                totlen = sizeof(struct pfioc_trans_e) * io->size;
                ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
                    M_PF, M_WAITOK);
                error = copyin(io->array, ioes, totlen);
                if (error) {
                        free(ioes, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
                        ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
                        switch (ioe->rs_num) {
                        case PF_RULESET_ETH:
                                if ((error = pf_rollback_eth(ioe->ticket,
                                    ioe->anchor))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail; /* really bad */
                                }
                                break;
#ifdef ALTQ
                        case PF_RULESET_ALTQ:
                                if (ioe->anchor[0]) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        error = EINVAL;
                                        goto fail;
                                }
                                if ((error = pf_rollback_altq(ioe->ticket))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail; /* really bad */
                                }
                                break;
#endif /* ALTQ */
                        case PF_RULESET_TABLE:
                            {
                                struct pfr_table table;

                                bzero(&table, sizeof(table));
                                strlcpy(table.pfrt_anchor, ioe->anchor,
                                    sizeof(table.pfrt_anchor));
                                if ((error = pfr_ina_rollback(&table,
                                    ioe->ticket, NULL, 0))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail; /* really bad */
                                }
                                break;
                            }
                        default:
                                if ((error = pf_rollback_rules(ioe->ticket,
                                    ioe->rs_num, ioe->anchor))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail; /* really bad */
                                }
                                break;
                        }
                }
                PF_RULES_WUNLOCK();
                free(ioes, M_PF);
                break;
        }

        case DIOCXCOMMIT: {
                struct pfioc_trans      *io = (struct pfioc_trans *)addr;
                struct pfioc_trans_e    *ioe, *ioes;
                struct pf_kruleset      *rs;
                struct pf_keth_ruleset  *ers;
                size_t                   totlen;
                int                      i;

                if (io->esize != sizeof(*ioe)) {
                        error = ENODEV;
                        goto fail;
                }

                if (io->size < 0 ||
                    io->size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
                        error = EINVAL;
                        goto fail;
                }

                totlen = sizeof(struct pfioc_trans_e) * io->size;
                ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
                    M_PF, M_WAITOK);
                error = copyin(io->array, ioes, totlen);
                if (error) {
                        free(ioes, M_PF);
                        goto fail;
                }
                PF_RULES_WLOCK();
                /* First makes sure everything will succeed. */
                for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
                        ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
                        switch (ioe->rs_num) {
                        case PF_RULESET_ETH:
                                ers = pf_find_keth_ruleset(ioe->anchor);
                                if (ers == NULL || ioe->ticket == 0 ||
                                    ioe->ticket != ers->inactive.ticket) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        error = EINVAL;
                                        goto fail;
                                }
                                break;
#ifdef ALTQ
                        case PF_RULESET_ALTQ:
                                if (ioe->anchor[0]) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        error = EINVAL;
                                        goto fail;
                                }
                                if (!V_altqs_inactive_open || ioe->ticket !=
                                    V_ticket_altqs_inactive) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        error = EBUSY;
                                        goto fail;
                                }
                                break;
#endif /* ALTQ */
                        case PF_RULESET_TABLE:
                                rs = pf_find_kruleset(ioe->anchor);
                                if (rs == NULL || !rs->topen || ioe->ticket !=
                                    rs->tticket) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        error = EBUSY;
                                        goto fail;
                                }
                                break;
                        default:
                                if (ioe->rs_num < 0 || ioe->rs_num >=
                                    PF_RULESET_MAX) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        error = EINVAL;
                                        goto fail;
                                }
                                rs = pf_find_kruleset(ioe->anchor);
                                if (rs == NULL ||
                                    !rs->rules[ioe->rs_num].inactive.open ||
                                    rs->rules[ioe->rs_num].inactive.ticket !=
                                    ioe->ticket) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        error = EBUSY;
                                        goto fail;
                                }
                                break;
                        }
                }
                /* Now do the commit - no errors should happen here. */
                for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
                        switch (ioe->rs_num) {
                        case PF_RULESET_ETH:
                                if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail; /* really bad */
                                }
                                break;
#ifdef ALTQ
                        case PF_RULESET_ALTQ:
                                if ((error = pf_commit_altq(ioe->ticket))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail; /* really bad */
                                }
                                break;
#endif /* ALTQ */
                        case PF_RULESET_TABLE:
                            {
                                struct pfr_table table;

                                bzero(&table, sizeof(table));
                                (void)strlcpy(table.pfrt_anchor, ioe->anchor,
                                    sizeof(table.pfrt_anchor));
                                if ((error = pfr_ina_commit(&table,
                                    ioe->ticket, NULL, NULL, 0))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail; /* really bad */
                                }
                                break;
                            }
                        default:
                                if ((error = pf_commit_rules(ioe->ticket,
                                    ioe->rs_num, ioe->anchor))) {
                                        PF_RULES_WUNLOCK();
                                        free(ioes, M_PF);
                                        goto fail; /* really bad */
                                }
                                break;
                        }
                }
                PF_RULES_WUNLOCK();

                /* Only hook into EtherNet taffic if we've got rules for it. */
                if (! TAILQ_EMPTY(V_pf_keth->active.rules))
                        hook_pf_eth();
                else
                        dehook_pf_eth();

                free(ioes, M_PF);
                break;
        }

        case DIOCGETSRCNODES: {
                struct pfioc_src_nodes  *psn = (struct pfioc_src_nodes *)addr;
                struct pf_srchash       *sh;
                struct pf_ksrc_node     *n;
                struct pf_src_node      *p, *pstore;
                uint32_t                 i, nr = 0;

                for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask;
                                i++, sh++) {
                        PF_HASHROW_LOCK(sh);
                        LIST_FOREACH(n, &sh->nodes, entry)
                                nr++;
                        PF_HASHROW_UNLOCK(sh);
                }

                psn->psn_len = min(psn->psn_len,
                    sizeof(struct pf_src_node) * nr);

                if (psn->psn_len == 0) {
                        psn->psn_len = sizeof(struct pf_src_node) * nr;
                        goto fail;
                }

                nr = 0;

                p = pstore = malloc(psn->psn_len, M_PF, M_WAITOK | M_ZERO);
                for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask;
                    i++, sh++) {
                    PF_HASHROW_LOCK(sh);
                    LIST_FOREACH(n, &sh->nodes, entry) {

                        if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
                                break;

                        pf_src_node_copy(n, p);

                        p++;
                        nr++;
                    }
                    PF_HASHROW_UNLOCK(sh);
                }
                error = copyout(pstore, psn->psn_src_nodes,
                    sizeof(struct pf_src_node) * nr);
                if (error) {
                        free(pstore, M_PF);
                        goto fail;
                }
                psn->psn_len = sizeof(struct pf_src_node) * nr;
                free(pstore, M_PF);
                break;
        }

        case DIOCCLRSRCNODES: {
                pf_kill_srcnodes(NULL);
                break;
        }

        case DIOCKILLSRCNODES:
                pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
                break;

#ifdef COMPAT_FREEBSD13
        case DIOCKEEPCOUNTERS_FREEBSD13:
#endif
        case DIOCKEEPCOUNTERS:
                error = pf_keepcounters((struct pfioc_nv *)addr);
                break;

        case DIOCGETSYNCOOKIES:
                error = pf_get_syncookies((struct pfioc_nv *)addr);
                break;

        case DIOCSETSYNCOOKIES:
                error = pf_set_syncookies((struct pfioc_nv *)addr);
                break;

        case DIOCSETHOSTID: {
                u_int32_t       *hostid = (u_int32_t *)addr;

                PF_RULES_WLOCK();
                if (*hostid == 0)
                        V_pf_status.hostid = arc4random();
                else
                        V_pf_status.hostid = *hostid;
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCOSFPFLUSH:
                PF_RULES_WLOCK();
                pf_osfp_flush();
                PF_RULES_WUNLOCK();
                break;

        case DIOCIGETIFACES: {
                struct pfioc_iface *io = (struct pfioc_iface *)addr;
                struct pfi_kif *ifstore;
                size_t bufsiz;

                if (io->pfiio_esize != sizeof(struct pfi_kif)) {
                        error = ENODEV;
                        goto fail;
                }

                if (io->pfiio_size < 0 ||
                    io->pfiio_size > pf_ioctl_maxcount ||
                    WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) {
                        error = EINVAL;
                        goto fail;
                }

                io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';

                bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
                ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif),
                    M_PF, M_WAITOK | M_ZERO);

                PF_RULES_RLOCK();
                pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
                PF_RULES_RUNLOCK();
                error = copyout(ifstore, io->pfiio_buffer, bufsiz);
                free(ifstore, M_PF);
                break;
        }

        case DIOCSETIFFLAG: {
                struct pfioc_iface *io = (struct pfioc_iface *)addr;

                io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';

                PF_RULES_WLOCK();
                error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCCLRIFFLAG: {
                struct pfioc_iface *io = (struct pfioc_iface *)addr;

                io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';

                PF_RULES_WLOCK();
                error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
                PF_RULES_WUNLOCK();
                break;
        }

        case DIOCSETREASS: {
                u_int32_t       *reass = (u_int32_t *)addr;

                V_pf_status.reass = *reass & (PF_REASS_ENABLED|PF_REASS_NODF);
                /* Removal of DF flag without reassembly enabled is not a
                 * valid combination. Disable reassembly in such case. */
                if (!(V_pf_status.reass & PF_REASS_ENABLED))
                        V_pf_status.reass = 0;
                break;
        }

        default:
                error = ENODEV;
                break;
        }
fail:
        CURVNET_RESTORE();

#undef ERROUT_IOCTL

        return (error);
}

static void
pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version)
{
        const char      *tagname;

        /* copy from state key */
        sp->pfs_1301.key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
        sp->pfs_1301.key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
        sp->pfs_1301.key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
        sp->pfs_1301.key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
        sp->pfs_1301.key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
        sp->pfs_1301.key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
        sp->pfs_1301.key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
        sp->pfs_1301.key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];

        /* copy from state */
        strlcpy(sp->pfs_1301.ifname, st->kif->pfik_name, sizeof(sp->pfs_1301.ifname));
        bcopy(&st->act.rt_addr, &sp->pfs_1301.rt_addr, sizeof(sp->pfs_1301.rt_addr));
        sp->pfs_1301.creation = htonl(time_uptime - (st->creation / 1000));
        sp->pfs_1301.expire = pf_state_expires(st);
        if (sp->pfs_1301.expire <= time_uptime)
                sp->pfs_1301.expire = htonl(0);
        else
                sp->pfs_1301.expire = htonl(sp->pfs_1301.expire - time_uptime);

        switch (msg_version) {
                case PFSYNC_MSG_VERSION_1301:
                        sp->pfs_1301.state_flags = st->state_flags;
                        sp->pfs_1301.direction = st->direction;
                        sp->pfs_1301.log = st->act.log;
                        sp->pfs_1301.timeout = st->timeout;
                        sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto;
                        sp->pfs_1301.af = st->key[PF_SK_WIRE]->af;
                        /*
                         * XXX Why do we bother pfsyncing source node information if source
                         * nodes are not synced? Showing users that there is source tracking
                         * when there is none seems useless.
                         */
                        if (st->sns[PF_SN_LIMIT] != NULL)
                                sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE;
                        if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE])
                                sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE;
                        break;
                case PFSYNC_MSG_VERSION_1400:
                        sp->pfs_1400.state_flags = htons(st->state_flags);
                        sp->pfs_1400.direction = st->direction;
                        sp->pfs_1400.log = st->act.log;
                        sp->pfs_1400.timeout = st->timeout;
                        sp->pfs_1400.proto = st->key[PF_SK_WIRE]->proto;
                        sp->pfs_1400.af = st->key[PF_SK_WIRE]->af;
                        sp->pfs_1400.qid = htons(st->act.qid);
                        sp->pfs_1400.pqid = htons(st->act.pqid);
                        sp->pfs_1400.dnpipe = htons(st->act.dnpipe);
                        sp->pfs_1400.dnrpipe = htons(st->act.dnrpipe);
                        sp->pfs_1400.rtableid = htonl(st->act.rtableid);
                        sp->pfs_1400.min_ttl = st->act.min_ttl;
                        sp->pfs_1400.set_tos = st->act.set_tos;
                        sp->pfs_1400.max_mss = htons(st->act.max_mss);
                        sp->pfs_1400.set_prio[0] = st->act.set_prio[0];
                        sp->pfs_1400.set_prio[1] = st->act.set_prio[1];
                        sp->pfs_1400.rt = st->act.rt;
                        if (st->act.rt_kif)
                                strlcpy(sp->pfs_1400.rt_ifname,
                                    st->act.rt_kif->pfik_name,
                                    sizeof(sp->pfs_1400.rt_ifname));
                        /*
                         * XXX Why do we bother pfsyncing source node information if source
                         * nodes are not synced? Showing users that there is source tracking
                         * when there is none seems useless.
                         */
                        if (st->sns[PF_SN_LIMIT] != NULL)
                                sp->pfs_1400.sync_flags |= PFSYNC_FLAG_SRCNODE;
                        if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE])
                                sp->pfs_1400.sync_flags |= PFSYNC_FLAG_NATSRCNODE;
                        break;
                case PFSYNC_MSG_VERSION_1500:
                        sp->pfs_1500.state_flags = htons(st->state_flags);
                        sp->pfs_1500.direction = st->direction;
                        sp->pfs_1500.log = st->act.log;
                        sp->pfs_1500.timeout = st->timeout;
                        sp->pfs_1500.wire_proto = st->key[PF_SK_WIRE]->proto;
                        sp->pfs_1500.wire_af = st->key[PF_SK_WIRE]->af;
                        sp->pfs_1500.stack_proto = st->key[PF_SK_STACK]->proto;
                        sp->pfs_1500.stack_af = st->key[PF_SK_STACK]->af;
                        sp->pfs_1500.qid = htons(st->act.qid);
                        sp->pfs_1500.pqid = htons(st->act.pqid);
                        sp->pfs_1500.dnpipe = htons(st->act.dnpipe);
                        sp->pfs_1500.dnrpipe = htons(st->act.dnrpipe);
                        sp->pfs_1500.rtableid = htonl(st->act.rtableid);
                        sp->pfs_1500.min_ttl = st->act.min_ttl;
                        sp->pfs_1500.set_tos = st->act.set_tos;
                        sp->pfs_1500.max_mss = htons(st->act.max_mss);
                        sp->pfs_1500.set_prio[0] = st->act.set_prio[0];
                        sp->pfs_1500.set_prio[1] = st->act.set_prio[1];
                        sp->pfs_1500.rt = st->act.rt;
                        sp->pfs_1500.rt_af = st->act.rt_af;
                        if (st->act.rt_kif)
                                strlcpy(sp->pfs_1500.rt_ifname,
                                    st->act.rt_kif->pfik_name,
                                    sizeof(sp->pfs_1500.rt_ifname));
                        strlcpy(sp->pfs_1500.orig_ifname,
                            st->orig_kif->pfik_name,
                            sizeof(sp->pfs_1500.orig_ifname));
                        if ((tagname = pf_tag2tagname(st->tag)) != NULL)
                                strlcpy(sp->pfs_1500.tagname, tagname,
                                    sizeof(sp->pfs_1500.tagname));
                        break;
                default:
                        panic("%s: Unsupported pfsync_msg_version %d",
                            __func__, msg_version);
        }

        sp->pfs_1301.id = st->id;
        sp->pfs_1301.creatorid = st->creatorid;
        pf_state_peer_hton(&st->src, &sp->pfs_1301.src);
        pf_state_peer_hton(&st->dst, &sp->pfs_1301.dst);

        if (st->rule == NULL)
                sp->pfs_1301.rule = htonl(-1);
        else
                sp->pfs_1301.rule = htonl(st->rule->nr);
        if (st->anchor == NULL)
                sp->pfs_1301.anchor = htonl(-1);
        else
                sp->pfs_1301.anchor = htonl(st->anchor->nr);
        if (st->nat_rule == NULL)
                sp->pfs_1301.nat_rule = htonl(-1);
        else
                sp->pfs_1301.nat_rule = htonl(st->nat_rule->nr);

        pf_state_counter_hton(st->packets[0], sp->pfs_1301.packets[0]);
        pf_state_counter_hton(st->packets[1], sp->pfs_1301.packets[1]);
        pf_state_counter_hton(st->bytes[0], sp->pfs_1301.bytes[0]);
        pf_state_counter_hton(st->bytes[1], sp->pfs_1301.bytes[1]);
}

void
pfsync_state_export_1301(struct pfsync_state_1301 *sp, struct pf_kstate *st)
{
        bzero(sp, sizeof(*sp));
        pfsync_state_export((union pfsync_state_union *)sp, st,
            PFSYNC_MSG_VERSION_1301);
}

void
pfsync_state_export_1400(struct pfsync_state_1400 *sp, struct pf_kstate *st)
{
        bzero(sp, sizeof(*sp));
        pfsync_state_export((union pfsync_state_union *)sp, st,
            PFSYNC_MSG_VERSION_1400);
}

void
pfsync_state_export_1500(struct pfsync_state_1500 *sp, struct pf_kstate *st)
{
        bzero(sp, sizeof(*sp));
        pfsync_state_export((union pfsync_state_union *)sp, st,
            PFSYNC_MSG_VERSION_1500);
}

void
pf_state_export(struct pf_state_export *sp, struct pf_kstate *st)
{
        bzero(sp, sizeof(*sp));

        sp->version = PF_STATE_VERSION;

        /* copy from state key */
        sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
        sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
        sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
        sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
        sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
        sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
        sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
        sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
        sp->proto = st->key[PF_SK_WIRE]->proto;
        sp->af = st->key[PF_SK_WIRE]->af;

        /* copy from state */
        strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
        strlcpy(sp->orig_ifname, st->orig_kif->pfik_name,
            sizeof(sp->orig_ifname));
        memcpy(&sp->rt_addr, &st->act.rt_addr, sizeof(sp->rt_addr));
        sp->creation = htonl(time_uptime - (st->creation / 1000));
        sp->expire = pf_state_expires(st);
        if (sp->expire <= time_uptime)
                sp->expire = htonl(0);
        else
                sp->expire = htonl(sp->expire - time_uptime);

        sp->direction = st->direction;
        sp->log = st->act.log;
        sp->timeout = st->timeout;
        /* 8 bits for the old libpfctl, 16 bits for the new libpfctl */
        sp->state_flags_compat = st->state_flags;
        sp->state_flags = htons(st->state_flags);
        if (st->sns[PF_SN_LIMIT] != NULL)
                sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
        if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE] != NULL)
                sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
        sp->id = st->id;
        sp->creatorid = st->creatorid;
        pf_state_peer_hton(&st->src, &sp->src);
        pf_state_peer_hton(&st->dst, &sp->dst);

        if (st->rule == NULL)
                sp->rule = htonl(-1);
        else
                sp->rule = htonl(st->rule->nr);
        if (st->anchor == NULL)
                sp->anchor = htonl(-1);
        else
                sp->anchor = htonl(st->anchor->nr);
        if (st->nat_rule == NULL)
                sp->nat_rule = htonl(-1);
        else
                sp->nat_rule = htonl(st->nat_rule->nr);

        sp->packets[0] = st->packets[0];
        sp->packets[1] = st->packets[1];
        sp->bytes[0] = st->bytes[0];
        sp->bytes[1] = st->bytes[1];

        sp->qid = htons(st->act.qid);
        sp->pqid = htons(st->act.pqid);
        sp->dnpipe = htons(st->act.dnpipe);
        sp->dnrpipe = htons(st->act.dnrpipe);
        sp->rtableid = htonl(st->act.rtableid);
        sp->min_ttl = st->act.min_ttl;
        sp->set_tos = st->act.set_tos;
        sp->max_mss = htons(st->act.max_mss);
        sp->rt = st->act.rt;
        if (st->act.rt_kif)
                strlcpy(sp->rt_ifname, st->act.rt_kif->pfik_name,
                    sizeof(sp->rt_ifname));
        sp->set_prio[0] = st->act.set_prio[0];
        sp->set_prio[1] = st->act.set_prio[1];

}

static void
pf_tbladdr_copyout(struct pf_addr_wrap *aw)
{
        struct pfr_ktable *kt;

        KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type));

        kt = aw->p.tbl;
        if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
                kt = kt->pfrkt_root;
        aw->p.tbl = NULL;
        aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
                kt->pfrkt_cnt : -1;
}

static int
pf_add_status_counters(nvlist_t *nvl, const char *name, counter_u64_t *counters,
    size_t number, char **names)
{
        nvlist_t        *nvc;

        nvc = nvlist_create(0);
        if (nvc == NULL)
                return (ENOMEM);

        for (int i = 0; i < number; i++) {
                nvlist_append_number_array(nvc, "counters",
                    counter_u64_fetch(counters[i]));
                nvlist_append_string_array(nvc, "names",
                    names[i]);
                nvlist_append_number_array(nvc, "ids",
                    i);
        }
        nvlist_add_nvlist(nvl, name, nvc);
        nvlist_destroy(nvc);

        return (0);
}

static int
pf_getstatus(struct pfioc_nv *nv)
{
        nvlist_t        *nvl = NULL, *nvc = NULL;
        void            *nvlpacked = NULL;
        int              error;
        struct pf_status s;
        char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES;
        char *pf_lcounter[KLCNT_MAX+1] = KLCNT_NAMES;
        char *pf_fcounter[FCNT_MAX+1] = FCNT_NAMES;
        time_t since;

        PF_RULES_RLOCK_TRACKER;

#define ERROUT(x)      ERROUT_FUNCTION(errout, x)

        PF_RULES_RLOCK();

        nvl = nvlist_create(0);
        if (nvl == NULL)
                ERROUT(ENOMEM);

        since = time_second - (time_uptime - V_pf_status.since);

        nvlist_add_bool(nvl, "running", V_pf_status.running);
        nvlist_add_number(nvl, "since", since);
        nvlist_add_number(nvl, "debug", V_pf_status.debug);
        nvlist_add_number(nvl, "hostid", V_pf_status.hostid);
        nvlist_add_number(nvl, "states", V_pf_status.states);
        nvlist_add_number(nvl, "src_nodes", V_pf_status.src_nodes);
        nvlist_add_number(nvl, "reass", V_pf_status.reass);
        nvlist_add_bool(nvl, "syncookies_active",
            V_pf_status.syncookies_active);
        nvlist_add_number(nvl, "halfopen_states", V_pf_status.states_halfopen);

        /* counters */
        error = pf_add_status_counters(nvl, "counters", V_pf_status.counters,
            PFRES_MAX, pf_reasons);
        if (error != 0)
                ERROUT(error);

        /* lcounters */
        error = pf_add_status_counters(nvl, "lcounters", V_pf_status.lcounters,
            KLCNT_MAX, pf_lcounter);
        if (error != 0)
                ERROUT(error);

        /* fcounters */
        nvc = nvlist_create(0);
        if (nvc == NULL)
                ERROUT(ENOMEM);

        for (int i = 0; i < FCNT_MAX; i++) {
                nvlist_append_number_array(nvc, "counters",
                    pf_counter_u64_fetch(&V_pf_status.fcounters[i]));
                nvlist_append_string_array(nvc, "names",
                    pf_fcounter[i]);
                nvlist_append_number_array(nvc, "ids",
                    i);
        }
        nvlist_add_nvlist(nvl, "fcounters", nvc);
        nvlist_destroy(nvc);
        nvc = NULL;

        /* scounters */
        error = pf_add_status_counters(nvl, "scounters", V_pf_status.scounters,
            SCNT_MAX, pf_fcounter);
        if (error != 0)
                ERROUT(error);

        nvlist_add_string(nvl, "ifname", V_pf_status.ifname);
        nvlist_add_binary(nvl, "chksum", V_pf_status.pf_chksum,
            PF_MD5_DIGEST_LENGTH);

        pfi_update_status(V_pf_status.ifname, &s);

        /* pcounters / bcounters */
        for (int i = 0; i < 2; i++) {
                for (int j = 0; j < 2; j++) {
                        for (int k = 0; k < 2; k++) {
                                nvlist_append_number_array(nvl, "pcounters",
                                    s.pcounters[i][j][k]);
                        }
                        nvlist_append_number_array(nvl, "bcounters",
                            s.bcounters[i][j]);
                }
        }

        nvlpacked = nvlist_pack(nvl, &nv->len);
        if (nvlpacked == NULL)
                ERROUT(ENOMEM);

        if (nv->size == 0)
                ERROUT(0);
        else if (nv->size < nv->len)
                ERROUT(ENOSPC);

        PF_RULES_RUNLOCK();
        error = copyout(nvlpacked, nv->data, nv->len);
        goto done;

#undef ERROUT
errout:
        PF_RULES_RUNLOCK();
done:
        free(nvlpacked, M_NVLIST);
        nvlist_destroy(nvc);
        nvlist_destroy(nvl);

        return (error);
}

/*
 * XXX - Check for version mismatch!!!
 */
static void
pf_clear_all_states(void)
{
        struct epoch_tracker     et;
        struct pf_kstate        *s;
        u_int i;

        NET_EPOCH_ENTER(et);
        for (i = 0; i <= V_pf_hashmask; i++) {
                struct pf_idhash *ih = &V_pf_idhash[i];
relock:
                PF_HASHROW_LOCK(ih);
                LIST_FOREACH(s, &ih->states, entry) {
                        s->timeout = PFTM_PURGE;
                        /* Don't send out individual delete messages. */
                        s->state_flags |= PFSTATE_NOSYNC;
                        pf_remove_state(s);
                        goto relock;
                }
                PF_HASHROW_UNLOCK(ih);
        }
        NET_EPOCH_EXIT(et);
}

static int
pf_clear_tables(void)
{
        struct pfioc_table io;
        int error;

        bzero(&io, sizeof(io));
        io.pfrio_flags |= PFR_FLAG_ALLRSETS;

        error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
            io.pfrio_flags);

        return (error);
}

static void
pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
{
        struct pf_ksrc_node_list         kill;
        u_int                            killed;

        LIST_INIT(&kill);
        for (int i = 0; i <= V_pf_srchashmask; i++) {
                struct pf_srchash *sh = &V_pf_srchash[i];
                struct pf_ksrc_node *sn, *tmp;

                PF_HASHROW_LOCK(sh);
                LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
                        if (psnk == NULL ||
                            (pf_match_addr(psnk->psnk_src.neg,
                              &psnk->psnk_src.addr.v.a.addr,
                              &psnk->psnk_src.addr.v.a.mask,
                              &sn->addr, sn->af) &&
                            pf_match_addr(psnk->psnk_dst.neg,
                              &psnk->psnk_dst.addr.v.a.addr,
                              &psnk->psnk_dst.addr.v.a.mask,
                              &sn->raddr, sn->af))) {
                                pf_unlink_src_node(sn);
                                LIST_INSERT_HEAD(&kill, sn, entry);
                                sn->expire = 1;
                        }
                PF_HASHROW_UNLOCK(sh);
        }

        for (int i = 0; i <= V_pf_hashmask; i++) {
                struct pf_idhash *ih = &V_pf_idhash[i];
                struct pf_kstate *s;

                PF_HASHROW_LOCK(ih);
                LIST_FOREACH(s, &ih->states, entry) {
                        for(pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX;
                            sn_type++) {
                                if (s->sns[sn_type] &&
                                    s->sns[sn_type]->expire == 1) {
                                        s->sns[sn_type] = NULL;
                                }
                        }
                }
                PF_HASHROW_UNLOCK(ih);
        }

        killed = pf_free_src_nodes(&kill);

        if (psnk != NULL)
                psnk->psnk_killed = killed;
}

static int
pf_keepcounters(struct pfioc_nv *nv)
{
        nvlist_t        *nvl = NULL;
        void            *nvlpacked = NULL;
        int              error = 0;

#define ERROUT(x)       ERROUT_FUNCTION(on_error, x)

        if (nv->len > pf_ioctl_maxcount)
                ERROUT(ENOMEM);

        nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
        error = copyin(nv->data, nvlpacked, nv->len);
        if (error)
                ERROUT(error);

        nvl = nvlist_unpack(nvlpacked, nv->len, 0);
        if (nvl == NULL)
                ERROUT(EBADMSG);

        if (! nvlist_exists_bool(nvl, "keep_counters"))
                ERROUT(EBADMSG);

        V_pf_status.keep_counters = nvlist_get_bool(nvl, "keep_counters");

on_error:
        nvlist_destroy(nvl);
        free(nvlpacked, M_NVLIST);
        return (error);
}

unsigned int
pf_clear_states(const struct pf_kstate_kill *kill)
{
        struct pf_state_key_cmp  match_key;
        struct pf_kstate        *s;
        struct pfi_kkif *kif;
        int              idx;
        unsigned int     killed = 0, dir;

        NET_EPOCH_ASSERT();

        for (unsigned int i = 0; i <= V_pf_hashmask; i++) {
                struct pf_idhash *ih = &V_pf_idhash[i];

relock_DIOCCLRSTATES:
                PF_HASHROW_LOCK(ih);
                LIST_FOREACH(s, &ih->states, entry) {
                        /* For floating states look at the original kif. */
                        kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;

                        if (kill->psk_ifname[0] &&
                            strcmp(kill->psk_ifname,
                            kif->pfik_name))
                                continue;

                        if (kill->psk_kill_match) {
                                bzero(&match_key, sizeof(match_key));

                                if (s->direction == PF_OUT) {
                                        dir = PF_IN;
                                        idx = PF_SK_STACK;
                                } else {
                                        dir = PF_OUT;
                                        idx = PF_SK_WIRE;
                                }

                                match_key.af = s->key[idx]->af;
                                match_key.proto = s->key[idx]->proto;
                                pf_addrcpy(&match_key.addr[0],
                                    &s->key[idx]->addr[1], match_key.af);
                                match_key.port[0] = s->key[idx]->port[1];
                                pf_addrcpy(&match_key.addr[1],
                                    &s->key[idx]->addr[0], match_key.af);
                                match_key.port[1] = s->key[idx]->port[0];
                        }

                        /*
                         * Don't send out individual
                         * delete messages.
                         */
                        s->state_flags |= PFSTATE_NOSYNC;
                        pf_remove_state(s);
                        killed++;

                        if (kill->psk_kill_match)
                                killed += pf_kill_matching_state(&match_key,
                                    dir);

                        goto relock_DIOCCLRSTATES;
                }
                PF_HASHROW_UNLOCK(ih);
        }

        if (V_pfsync_clear_states_ptr != NULL)
                V_pfsync_clear_states_ptr(V_pf_status.hostid, kill->psk_ifname);

        return (killed);
}

void
pf_killstates(struct pf_kstate_kill *kill, unsigned int *killed)
{
        struct pf_kstate        *s;

        NET_EPOCH_ASSERT();
        if (kill->psk_pfcmp.id) {
                if (kill->psk_pfcmp.creatorid == 0)
                        kill->psk_pfcmp.creatorid = V_pf_status.hostid;
                if ((s = pf_find_state_byid(kill->psk_pfcmp.id,
                    kill->psk_pfcmp.creatorid))) {
                        pf_remove_state(s);
                        *killed = 1;
                }
                return;
        }

        for (unsigned int i = 0; i <= V_pf_hashmask; i++)
                *killed += pf_killstates_row(kill, &V_pf_idhash[i]);
}

static int
pf_killstates_nv(struct pfioc_nv *nv)
{
        struct pf_kstate_kill    kill;
        struct epoch_tracker     et;
        nvlist_t                *nvl = NULL;
        void                    *nvlpacked = NULL;
        int                      error = 0;
        unsigned int             killed = 0;

#define ERROUT(x)       ERROUT_FUNCTION(on_error, x)

        if (nv->len > pf_ioctl_maxcount)
                ERROUT(ENOMEM);

        nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
        error = copyin(nv->data, nvlpacked, nv->len);
        if (error)
                ERROUT(error);

        nvl = nvlist_unpack(nvlpacked, nv->len, 0);
        if (nvl == NULL)
                ERROUT(EBADMSG);

        error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
        if (error)
                ERROUT(error);

        NET_EPOCH_ENTER(et);
        pf_killstates(&kill, &killed);
        NET_EPOCH_EXIT(et);

        free(nvlpacked, M_NVLIST);
        nvlpacked = NULL;
        nvlist_destroy(nvl);
        nvl = nvlist_create(0);
        if (nvl == NULL)
                ERROUT(ENOMEM);

        nvlist_add_number(nvl, "killed", killed);

        nvlpacked = nvlist_pack(nvl, &nv->len);
        if (nvlpacked == NULL)
                ERROUT(ENOMEM);

        if (nv->size == 0)
                ERROUT(0);
        else if (nv->size < nv->len)
                ERROUT(ENOSPC);

        error = copyout(nvlpacked, nv->data, nv->len);

on_error:
        nvlist_destroy(nvl);
        free(nvlpacked, M_NVLIST);
        return (error);
}

static int
pf_clearstates_nv(struct pfioc_nv *nv)
{
        struct pf_kstate_kill    kill;
        struct epoch_tracker     et;
        nvlist_t                *nvl = NULL;
        void                    *nvlpacked = NULL;
        int                      error = 0;
        unsigned int             killed;

#define ERROUT(x)       ERROUT_FUNCTION(on_error, x)

        if (nv->len > pf_ioctl_maxcount)
                ERROUT(ENOMEM);

        nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
        error = copyin(nv->data, nvlpacked, nv->len);
        if (error)
                ERROUT(error);

        nvl = nvlist_unpack(nvlpacked, nv->len, 0);
        if (nvl == NULL)
                ERROUT(EBADMSG);

        error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
        if (error)
                ERROUT(error);

        NET_EPOCH_ENTER(et);
        killed = pf_clear_states(&kill);
        NET_EPOCH_EXIT(et);

        free(nvlpacked, M_NVLIST);
        nvlpacked = NULL;
        nvlist_destroy(nvl);
        nvl = nvlist_create(0);
        if (nvl == NULL)
                ERROUT(ENOMEM);

        nvlist_add_number(nvl, "killed", killed);

        nvlpacked = nvlist_pack(nvl, &nv->len);
        if (nvlpacked == NULL)
                ERROUT(ENOMEM);

        if (nv->size == 0)
                ERROUT(0);
        else if (nv->size < nv->len)
                ERROUT(ENOSPC);

        error = copyout(nvlpacked, nv->data, nv->len);

#undef ERROUT
on_error:
        nvlist_destroy(nvl);
        free(nvlpacked, M_NVLIST);
        return (error);
}

static int
pf_getstate(struct pfioc_nv *nv)
{
        nvlist_t                *nvl = NULL, *nvls;
        void                    *nvlpacked = NULL;
        struct pf_kstate        *s = NULL;
        int                      error = 0;
        uint64_t                 id, creatorid;

#define ERROUT(x)       ERROUT_FUNCTION(errout, x)

        if (nv->len > pf_ioctl_maxcount)
                ERROUT(ENOMEM);

        nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
        error = copyin(nv->data, nvlpacked, nv->len);
        if (error)
                ERROUT(error);

        nvl = nvlist_unpack(nvlpacked, nv->len, 0);
        if (nvl == NULL)
                ERROUT(EBADMSG);

        PFNV_CHK(pf_nvuint64(nvl, "id", &id));
        PFNV_CHK(pf_nvuint64(nvl, "creatorid", &creatorid));

        s = pf_find_state_byid(id, creatorid);
        if (s == NULL)
                ERROUT(ENOENT);

        free(nvlpacked, M_NVLIST);
        nvlpacked = NULL;
        nvlist_destroy(nvl);
        nvl = nvlist_create(0);
        if (nvl == NULL)
                ERROUT(ENOMEM);

        nvls = pf_state_to_nvstate(s);
        if (nvls == NULL)
                ERROUT(ENOMEM);

        nvlist_add_nvlist(nvl, "state", nvls);
        nvlist_destroy(nvls);

        nvlpacked = nvlist_pack(nvl, &nv->len);
        if (nvlpacked == NULL)
                ERROUT(ENOMEM);

        if (nv->size == 0)
                ERROUT(0);
        else if (nv->size < nv->len)
                ERROUT(ENOSPC);

        error = copyout(nvlpacked, nv->data, nv->len);

#undef ERROUT
errout:
        if (s != NULL)
                PF_STATE_UNLOCK(s);
        free(nvlpacked, M_NVLIST);
        nvlist_destroy(nvl);
        return (error);
}

/*
 * XXX - Check for version mismatch!!!
 */

/*
 * Duplicate pfctl -Fa operation to get rid of as much as we can.
 */
static int
shutdown_pf(void)
{
        int error = 0;
        u_int32_t t[5];
        char nn = '\0';
        struct pf_kanchor *anchor, *tmp_anchor;
        struct pf_keth_anchor *eth_anchor, *tmp_eth_anchor;
        int rs_num;

        do {
                /* Unlink rules of all user defined anchors */
                RB_FOREACH_SAFE(anchor, pf_kanchor_global, &V_pf_anchors,
                    tmp_anchor) {
                        for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) {
                                if ((error = pf_begin_rules(&t[rs_num], rs_num,
                                    anchor->path)) != 0) {
                                        DPFPRINTF(PF_DEBUG_MISC, "%s: "
                                            "anchor.path=%s rs_num=%d",
                                            __func__, anchor->path, rs_num);
                                        goto error;     /* XXX: rollback? */
                                }
                        }
                        for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) {
                                error = pf_commit_rules(t[rs_num], rs_num,
                                    anchor->path);
                                MPASS(error == 0);
                        }
                }

                /* Unlink rules of all user defined ether anchors */
                RB_FOREACH_SAFE(eth_anchor, pf_keth_anchor_global,
                    &V_pf_keth_anchors, tmp_eth_anchor) {
                        if ((error = pf_begin_eth(&t[0], eth_anchor->path))
                            != 0) {
                                DPFPRINTF(PF_DEBUG_MISC, "%s: eth "
                                    "anchor.path=%s", __func__,
                                    eth_anchor->path);
                                goto error;
                        }
                        error = pf_commit_eth(t[0], eth_anchor->path);
                        MPASS(error == 0);
                }

                if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
                    != 0) {
                        DPFPRINTF(PF_DEBUG_MISC, "%s: SCRUB", __func__);
                        break;
                }
                if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
                    != 0) {
                        DPFPRINTF(PF_DEBUG_MISC, "%s: FILTER", __func__);
                        break;          /* XXX: rollback? */
                }
                if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
                    != 0) {
                        DPFPRINTF(PF_DEBUG_MISC, "%s: NAT", __func__);
                        break;          /* XXX: rollback? */
                }
                if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
                    != 0) {
                        DPFPRINTF(PF_DEBUG_MISC, "%s: BINAT", __func__);
                        break;          /* XXX: rollback? */
                }
                if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
                    != 0) {
                        DPFPRINTF(PF_DEBUG_MISC, "%s: RDR", __func__);
                        break;          /* XXX: rollback? */
                }

                error = pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
                MPASS(error == 0);
                error = pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
                MPASS(error == 0);
                error = pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
                MPASS(error == 0);
                error = pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
                MPASS(error == 0);
                error = pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
                MPASS(error == 0);

                if ((error = pf_clear_tables()) != 0)
                        break;

                if ((error = pf_begin_eth(&t[0], &nn)) != 0) {
                        DPFPRINTF(PF_DEBUG_MISC, "%s: eth", __func__);
                        break;
                }
                error = pf_commit_eth(t[0], &nn);
                MPASS(error == 0);

#ifdef ALTQ
                if ((error = pf_begin_altq(&t[0])) != 0) {
                        DPFPRINTF(PF_DEBUG_MISC, "%s: ALTQ", __func__);
                        break;
                }
                pf_commit_altq(t[0]);
#endif

                pf_clear_all_states();

                pf_kill_srcnodes(NULL);

                for (int i = 0; i < PF_RULESET_MAX; i++) {
                        pf_rule_tree_free(pf_main_ruleset.rules[i].active.tree);
                        pf_rule_tree_free(pf_main_ruleset.rules[i].inactive.tree);
                }

                /* status does not use malloced mem so no need to cleanup */
                /* fingerprints and interfaces have their own cleanup code */
        } while(0);

error:
        return (error);
}

static pfil_return_t
pf_check_return(int chk, struct mbuf **m)
{

        switch (chk) {
        case PF_PASS:
                if (*m == NULL)
                        return (PFIL_CONSUMED);
                else
                        return (PFIL_PASS);
                break;
        default:
                if (*m != NULL) {
                        m_freem(*m);
                        *m = NULL;
                }
                return (PFIL_DROPPED);
        }
}

static pfil_return_t
pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
    void *ruleset __unused, struct inpcb *inp)
{
        int chk;

        CURVNET_ASSERT_SET();

        chk = pf_test_eth(PF_IN, flags, ifp, m, inp);

        return (pf_check_return(chk, m));
}

static pfil_return_t
pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
    void *ruleset __unused, struct inpcb *inp)
{
        int chk;

        CURVNET_ASSERT_SET();

        chk = pf_test_eth(PF_OUT, flags, ifp, m, inp);

        return (pf_check_return(chk, m));
}

#ifdef INET
static pfil_return_t
pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
    void *ruleset __unused, struct inpcb *inp)
{
        int chk;

        CURVNET_ASSERT_SET();

        chk = pf_test(AF_INET, PF_IN, flags, ifp, m, inp, NULL);

        return (pf_check_return(chk, m));
}

static pfil_return_t
pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
    void *ruleset __unused,  struct inpcb *inp)
{
        int chk;

        CURVNET_ASSERT_SET();

        chk = pf_test(AF_INET, PF_OUT, flags, ifp, m, inp, NULL);

        return (pf_check_return(chk, m));
}
#endif

#ifdef INET6
static pfil_return_t
pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags,
    void *ruleset __unused,  struct inpcb *inp)
{
        int chk;

        CURVNET_ASSERT_SET();

        /*
         * In case of loopback traffic IPv6 uses the real interface in
         * order to support scoped addresses. In order to support stateful
         * filtering we have change this to lo0 as it is the case in IPv4.
         */
        chk = pf_test(AF_INET6, PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp,
            m, inp, NULL);

        return (pf_check_return(chk, m));
}

static pfil_return_t
pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags,
    void *ruleset __unused,  struct inpcb *inp)
{
        int chk;

        CURVNET_ASSERT_SET();

        chk = pf_test(AF_INET6, PF_OUT, flags, ifp, m, inp, NULL);

        return (pf_check_return(chk, m));
}
#endif /* INET6 */

VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_in_hook);
VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_out_hook);
#define V_pf_eth_in_hook        VNET(pf_eth_in_hook)
#define V_pf_eth_out_hook       VNET(pf_eth_out_hook)

#ifdef INET
VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook);
VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook);
#define V_pf_ip4_in_hook        VNET(pf_ip4_in_hook)
#define V_pf_ip4_out_hook       VNET(pf_ip4_out_hook)
#endif
#ifdef INET6
VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook);
VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook);
#define V_pf_ip6_in_hook        VNET(pf_ip6_in_hook)
#define V_pf_ip6_out_hook       VNET(pf_ip6_out_hook)
#endif

static void
hook_pf_eth(void)
{
        struct pfil_hook_args pha = {
                .pa_version = PFIL_VERSION,
                .pa_modname = "pf",
                .pa_type = PFIL_TYPE_ETHERNET,
        };
        struct pfil_link_args pla = {
                .pa_version = PFIL_VERSION,
        };
        int ret __diagused;

        if (atomic_load_bool(&V_pf_pfil_eth_hooked))
                return;

        pha.pa_mbuf_chk = pf_eth_check_in;
        pha.pa_flags = PFIL_IN;
        pha.pa_rulname = "eth-in";
        V_pf_eth_in_hook = pfil_add_hook(&pha);
        pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
        pla.pa_head = V_link_pfil_head;
        pla.pa_hook = V_pf_eth_in_hook;
        ret = pfil_link(&pla);
        MPASS(ret == 0);
        pha.pa_mbuf_chk = pf_eth_check_out;
        pha.pa_flags = PFIL_OUT;
        pha.pa_rulname = "eth-out";
        V_pf_eth_out_hook = pfil_add_hook(&pha);
        pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
        pla.pa_head = V_link_pfil_head;
        pla.pa_hook = V_pf_eth_out_hook;
        ret = pfil_link(&pla);
        MPASS(ret == 0);

        atomic_store_bool(&V_pf_pfil_eth_hooked, true);
}

static void
hook_pf(void)
{
        struct pfil_hook_args pha = {
                .pa_version = PFIL_VERSION,
                .pa_modname = "pf",
        };
        struct pfil_link_args pla = {
                .pa_version = PFIL_VERSION,
        };
        int ret __diagused;

        if (atomic_load_bool(&V_pf_pfil_hooked))
                return;

#ifdef INET
        pha.pa_type = PFIL_TYPE_IP4;
        pha.pa_mbuf_chk = pf_check_in;
        pha.pa_flags = PFIL_IN;
        pha.pa_rulname = "default-in";
        V_pf_ip4_in_hook = pfil_add_hook(&pha);
        pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
        pla.pa_head = V_inet_pfil_head;
        pla.pa_hook = V_pf_ip4_in_hook;
        ret = pfil_link(&pla);
        MPASS(ret == 0);
        pha.pa_mbuf_chk = pf_check_out;
        pha.pa_flags = PFIL_OUT;
        pha.pa_rulname = "default-out";
        V_pf_ip4_out_hook = pfil_add_hook(&pha);
        pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
        pla.pa_head = V_inet_pfil_head;
        pla.pa_hook = V_pf_ip4_out_hook;
        ret = pfil_link(&pla);
        MPASS(ret == 0);
        if (V_pf_filter_local) {
                pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
                pla.pa_head = V_inet_local_pfil_head;
                pla.pa_hook = V_pf_ip4_out_hook;
                ret = pfil_link(&pla);
                MPASS(ret == 0);
        }
#endif
#ifdef INET6
        pha.pa_type = PFIL_TYPE_IP6;
        pha.pa_mbuf_chk = pf_check6_in;
        pha.pa_flags = PFIL_IN;
        pha.pa_rulname = "default-in6";
        V_pf_ip6_in_hook = pfil_add_hook(&pha);
        pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
        pla.pa_head = V_inet6_pfil_head;
        pla.pa_hook = V_pf_ip6_in_hook;
        ret = pfil_link(&pla);
        MPASS(ret == 0);
        pha.pa_mbuf_chk = pf_check6_out;
        pha.pa_rulname = "default-out6";
        pha.pa_flags = PFIL_OUT;
        V_pf_ip6_out_hook = pfil_add_hook(&pha);
        pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
        pla.pa_head = V_inet6_pfil_head;
        pla.pa_hook = V_pf_ip6_out_hook;
        ret = pfil_link(&pla);
        MPASS(ret == 0);
        if (V_pf_filter_local) {
                pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
                pla.pa_head = V_inet6_local_pfil_head;
                pla.pa_hook = V_pf_ip6_out_hook;
                ret = pfil_link(&pla);
                MPASS(ret == 0);
        }
#endif

        atomic_store_bool(&V_pf_pfil_hooked, true);
}

static void
dehook_pf_eth(void)
{

        if (!atomic_load_bool(&V_pf_pfil_eth_hooked))
                return;

        pfil_remove_hook(V_pf_eth_in_hook);
        pfil_remove_hook(V_pf_eth_out_hook);

        atomic_store_bool(&V_pf_pfil_eth_hooked, false);
}

static void
dehook_pf(void)
{

        if (!atomic_load_bool(&V_pf_pfil_hooked))
                return;

#ifdef INET
        pfil_remove_hook(V_pf_ip4_in_hook);
        pfil_remove_hook(V_pf_ip4_out_hook);
#endif
#ifdef INET6
        pfil_remove_hook(V_pf_ip6_in_hook);
        pfil_remove_hook(V_pf_ip6_out_hook);
#endif

        atomic_store_bool(&V_pf_pfil_hooked, false);
}

static void
pf_load_vnet(void)
{
        V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname),
            NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);

        rm_init_flags(&V_pf_rules_lock, "pf rulesets", RM_RECURSE);
        rm_init_flags(&V_pf_tags_lock, "pf tags and queues", RM_RECURSE);
        sx_init(&V_pf_ioctl_lock, "pf ioctl");

        pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize,
            PF_RULE_TAG_HASH_SIZE_DEFAULT);
#ifdef ALTQ
        pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize,
            PF_QUEUE_TAG_HASH_SIZE_DEFAULT);
#endif

        V_pf_keth = &V_pf_main_keth_anchor.ruleset;

        pfattach_vnet();
        V_pf_vnet_active = 1;
}

static int
pf_load(void)
{
        int error;

        sx_init(&pf_end_lock, "pf end thread");

        pf_mtag_initialize();

        pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME);
        if (pf_dev == NULL)
                return (ENOMEM);

        pf_end_threads = 0;
        error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge");
        if (error != 0)
                return (error);

        pfi_initialize();

        return (0);
}

static void
pf_unload_vnet(void)
{
        int ret __diagused;

        V_pf_vnet_active = 0;
        V_pf_status.running = 0;
        dehook_pf();
        dehook_pf_eth();

        PF_RULES_WLOCK();
        pf_syncookies_cleanup();
        shutdown_pf();
        PF_RULES_WUNLOCK();

        ret = swi_remove(V_pf_swi_cookie);
        MPASS(ret == 0);
        ret = intr_event_destroy(V_pf_swi_ie);
        MPASS(ret == 0);

        pf_unload_vnet_purge();

        pf_normalize_cleanup();
        PF_RULES_WLOCK();
        pfi_cleanup_vnet();
        PF_RULES_WUNLOCK();
        pfr_cleanup();
        pf_osfp_flush();
        pf_cleanup();
        if (IS_DEFAULT_VNET(curvnet))
                pf_mtag_cleanup();

        pf_cleanup_tagset(&V_pf_tags);
#ifdef ALTQ
        pf_cleanup_tagset(&V_pf_qids);
#endif
        uma_zdestroy(V_pf_tag_z);

#ifdef PF_WANT_32_TO_64_COUNTER
        PF_RULES_WLOCK();
        LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);

        MPASS(LIST_EMPTY(&V_pf_allkiflist));
        MPASS(V_pf_allkifcount == 0);

        LIST_REMOVE(&V_pf_default_rule, allrulelist);
        V_pf_allrulecount--;
        LIST_REMOVE(V_pf_rulemarker, allrulelist);

        MPASS(LIST_EMPTY(&V_pf_allrulelist));
        MPASS(V_pf_allrulecount == 0);

        PF_RULES_WUNLOCK();

        free(V_pf_kifmarker, PFI_MTYPE);
        free(V_pf_rulemarker, M_PFRULE);
#endif

        /* Free counters last as we updated them during shutdown. */
        pf_counter_u64_deinit(&V_pf_default_rule.evaluations);
        for (int i = 0; i < 2; i++) {
                pf_counter_u64_deinit(&V_pf_default_rule.packets[i]);
                pf_counter_u64_deinit(&V_pf_default_rule.bytes[i]);
        }
        counter_u64_free(V_pf_default_rule.states_cur);
        counter_u64_free(V_pf_default_rule.states_tot);
        for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++)
                counter_u64_free(V_pf_default_rule.src_nodes[sn_type]);
        uma_zfree_pcpu(pf_timestamp_pcpu_zone, V_pf_default_rule.timestamp);

        for (int i = 0; i < PFRES_MAX; i++)
                counter_u64_free(V_pf_status.counters[i]);
        for (int i = 0; i < KLCNT_MAX; i++)
                counter_u64_free(V_pf_status.lcounters[i]);
        for (int i = 0; i < FCNT_MAX; i++)
                pf_counter_u64_deinit(&V_pf_status.fcounters[i]);
        for (int i = 0; i < SCNT_MAX; i++)
                counter_u64_free(V_pf_status.scounters[i]);
        for (int i = 0; i < NCNT_MAX; i++)
                counter_u64_free(V_pf_status.ncounters[i]);

        rm_destroy(&V_pf_rules_lock);
        sx_destroy(&V_pf_ioctl_lock);
}

static void
pf_unload(void *dummy __unused)
{

        sx_xlock(&pf_end_lock);
        pf_end_threads = 1;
        while (pf_end_threads < 2) {
                wakeup_one(pf_purge_thread);
                sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0);
        }
        sx_xunlock(&pf_end_lock);

        pf_nl_unregister();

        if (pf_dev != NULL)
                destroy_dev(pf_dev);

        pfi_cleanup();

        sx_destroy(&pf_end_lock);
}

static void
vnet_pf_init(void *unused __unused)
{

        pf_load_vnet();
}
VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
    vnet_pf_init, NULL);

static void
vnet_pf_uninit(const void *unused __unused)
{

        pf_unload_vnet();
}
SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL);
VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
    vnet_pf_uninit, NULL);

static int
pf_modevent(module_t mod, int type, void *data)
{
        int error = 0;

        switch(type) {
        case MOD_LOAD:
                error = pf_load();
                pf_nl_register();
                break;
        case MOD_UNLOAD:
                /* Handled in SYSUNINIT(pf_unload) to ensure it's done after
                 * the vnet_pf_uninit()s */
                break;
        default:
                error = EINVAL;
                break;
        }

        return (error);
}

static moduledata_t pf_mod = {
        "pf",
        pf_modevent,
        0
};

DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
MODULE_DEPEND(pf, netlink, 1, 1, 1);
MODULE_DEPEND(pf, crypto, 1, 1, 1);
MODULE_VERSION(pf, PF_MODVER);