#include <sys/param.h>
#include <sys/endian.h>
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <util.h>
#include "makefs.h"
#include "zfs.h"
typedef struct zfs_zap_entry {
char *name;
uint64_t hash;
union {
uint8_t *valp;
uint16_t *val16p;
uint32_t *val32p;
uint64_t *val64p;
};
uint64_t val64;
size_t intsz;
size_t intcnt;
STAILQ_ENTRY(zfs_zap_entry) next;
} zfs_zap_entry_t;
struct zfs_zap {
STAILQ_HEAD(, zfs_zap_entry) kvps;
uint64_t hashsalt;
unsigned long kvpcnt;
unsigned long chunks;
bool micro;
dnode_phys_t *dnode;
zfs_objset_t *os;
};
static uint16_t
zap_entry_chunks(zfs_zap_entry_t *ent)
{
return (1 + howmany(strlen(ent->name) + 1, ZAP_LEAF_ARRAY_BYTES) +
howmany(ent->intsz * ent->intcnt, ZAP_LEAF_ARRAY_BYTES));
}
static uint64_t
zap_hash(uint64_t salt, const char *name)
{
static uint64_t crc64_table[256];
const uint64_t crc64_poly = 0xC96C5795D7870F42UL;
const uint8_t *cp;
uint64_t crc;
uint8_t c;
assert(salt != 0);
if (crc64_table[128] == 0) {
for (int i = 0; i < 256; i++) {
uint64_t *t;
t = crc64_table + i;
*t = i;
for (int j = 8; j > 0; j--)
*t = (*t >> 1) ^ (-(*t & 1) & crc64_poly);
}
}
assert(crc64_table[128] == crc64_poly);
for (cp = (const uint8_t *)name, crc = salt; (c = *cp) != '\0'; cp++)
crc = (crc >> 8) ^ crc64_table[(crc ^ c) & 0xFF];
crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
return (crc);
}
zfs_zap_t *
zap_alloc(zfs_objset_t *os, dnode_phys_t *dnode)
{
zfs_zap_t *zap;
zap = ecalloc(1, sizeof(*zap));
STAILQ_INIT(&zap->kvps);
zap->hashsalt = ((uint64_t)random() << 32) | random();
zap->micro = true;
zap->kvpcnt = 0;
zap->chunks = 0;
zap->dnode = dnode;
zap->os = os;
return (zap);
}
void
zap_add(zfs_zap_t *zap, const char *name, size_t intsz, size_t intcnt,
const uint8_t *val)
{
zfs_zap_entry_t *ent;
assert(intsz == 1 || intsz == 2 || intsz == 4 || intsz == 8);
assert(strlen(name) + 1 <= ZAP_MAXNAMELEN);
assert(intcnt <= ZAP_MAXVALUELEN && intcnt * intsz <= ZAP_MAXVALUELEN);
ent = ecalloc(1, sizeof(*ent));
ent->name = estrdup(name);
ent->hash = zap_hash(zap->hashsalt, ent->name);
ent->intsz = intsz;
ent->intcnt = intcnt;
if (intsz == sizeof(uint64_t) && intcnt == 1) {
ent->val64p = &ent->val64;
} else {
ent->valp = ecalloc(intcnt, intsz);
}
memcpy(ent->valp, val, intcnt * intsz);
zap->kvpcnt++;
zap->chunks += zap_entry_chunks(ent);
STAILQ_INSERT_TAIL(&zap->kvps, ent, next);
if (zap->micro && (intcnt != 1 || intsz != sizeof(uint64_t) ||
strlen(name) + 1 > MZAP_NAME_LEN || zap->kvpcnt > MZAP_ENT_MAX))
zap->micro = false;
}
void
zap_add_uint64(zfs_zap_t *zap, const char *name, uint64_t val)
{
zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val);
}
void
zap_add_uint64_self(zfs_zap_t *zap, uint64_t val)
{
char name[32];
(void)snprintf(name, sizeof(name), "%jx", (uintmax_t)val);
zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val);
}
void
zap_add_string(zfs_zap_t *zap, const char *name, const char *val)
{
zap_add(zap, name, 1, strlen(val) + 1, (const uint8_t *)val);
}
bool
zap_entry_exists(zfs_zap_t *zap, const char *name)
{
zfs_zap_entry_t *ent;
STAILQ_FOREACH(ent, &zap->kvps, next) {
if (strcmp(ent->name, name) == 0)
return (true);
}
return (false);
}
static void
zap_micro_write(zfs_opt_t *zfs, zfs_zap_t *zap)
{
dnode_phys_t *dnode;
zfs_zap_entry_t *ent;
mzap_phys_t *mzap;
mzap_ent_phys_t *ment;
off_t bytes, loc;
uint16_t cd;
_Static_assert(MZAP_ENT_MAX <= UINT16_MAX,
"micro ZAP collision differentiator must fit in 16 bits");
memset(zfs->filebuf, 0, sizeof(zfs->filebuf));
mzap = (mzap_phys_t *)&zfs->filebuf[0];
mzap->mz_block_type = ZBT_MICRO;
mzap->mz_salt = zap->hashsalt;
mzap->mz_normflags = 0;
bytes = sizeof(*mzap) + (zap->kvpcnt - 1) * sizeof(*ment);
assert(bytes <= (off_t)MZAP_MAX_BLKSZ);
cd = 0;
ment = &mzap->mz_chunk[0];
STAILQ_FOREACH(ent, &zap->kvps, next) {
memcpy(&ment->mze_value, ent->valp, ent->intsz * ent->intcnt);
ment->mze_cd = cd++;
(void)strlcpy(ment->mze_name, ent->name,
sizeof(ment->mze_name));
ment++;
}
loc = objset_space_alloc(zfs, zap->os, &bytes);
dnode = zap->dnode;
dnode->dn_maxblkid = 0;
dnode->dn_datablkszsec = bytes >> MINBLOCKSHIFT;
vdev_pwrite_dnode_data(zfs, dnode, zfs->filebuf, bytes, loc);
}
static void
zap_fat_write_array_chunk(zap_leaf_t *l, uint16_t li, size_t sz,
const uint8_t *val)
{
struct zap_leaf_array *la;
assert(sz <= ZAP_MAXVALUELEN);
assert(sz > 0);
for (uint16_t n, resid = sz; resid > 0; resid -= n, val += n, li++) {
n = MIN(resid, ZAP_LEAF_ARRAY_BYTES);
la = &ZAP_LEAF_CHUNK(l, li).l_array;
assert(la->la_type == ZAP_CHUNK_FREE);
la->la_type = ZAP_CHUNK_ARRAY;
memcpy(la->la_array, val, n);
la->la_next = li + 1;
}
la->la_next = 0xffff;
}
static unsigned int
zap_fat_write_prefixlen(zfs_zap_t *zap, zap_leaf_t *l)
{
zfs_zap_entry_t *ent;
unsigned int prefixlen;
if (zap->chunks <= ZAP_LEAF_NUMCHUNKS(l)) {
return (0);
}
for (prefixlen = 1; prefixlen < (unsigned int)l->l_bs; prefixlen++) {
uint32_t *leafchunks;
leafchunks = ecalloc(1u << prefixlen, sizeof(*leafchunks));
STAILQ_FOREACH(ent, &zap->kvps, next) {
uint64_t li;
uint16_t chunks;
li = ZAP_HASH_IDX(ent->hash, prefixlen);
chunks = zap_entry_chunks(ent);
if (ZAP_LEAF_NUMCHUNKS(l) - leafchunks[li] < chunks) {
break;
}
leafchunks[li] += chunks;
}
free(leafchunks);
if (ent == NULL) {
break;
}
}
assert(prefixlen < (unsigned int)l->l_bs);
return (prefixlen);
}
static void
zap_fat_write_leaf_init(zap_leaf_t *l, uint64_t prefix, int prefixlen)
{
zap_leaf_phys_t *leaf;
leaf = l->l_phys;
leaf->l_hdr.lh_block_type = ZBT_LEAF;
leaf->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
leaf->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
leaf->l_hdr.lh_prefix = prefix;
leaf->l_hdr.lh_prefix_len = prefixlen;
assert(leaf->l_hdr.lh_nfree < 0xffff);
memset(leaf->l_hash, 0xff,
ZAP_LEAF_HASH_NUMENTRIES(l) * sizeof(*leaf->l_hash));
for (uint16_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
struct zap_leaf_free *lf;
lf = &ZAP_LEAF_CHUNK(l, i).l_free;
lf->lf_type = ZAP_CHUNK_FREE;
if (i + 1 == ZAP_LEAF_NUMCHUNKS(l))
lf->lf_next = 0xffff;
else
lf->lf_next = i + 1;
}
}
static void
zap_fat_write(zfs_opt_t *zfs, zfs_zap_t *zap)
{
struct dnode_cursor *c;
zap_leaf_t l;
zap_phys_t *zaphdr;
struct zap_table_phys *zt;
zfs_zap_entry_t *ent;
dnode_phys_t *dnode;
uint8_t *leafblks;
uint64_t lblkcnt, *ptrhasht;
off_t loc, blksz;
size_t blkshift;
unsigned int prefixlen;
int ptrcnt;
blkshift = MAXBLOCKSHIFT;
blksz = (off_t)1 << blkshift;
ptrcnt = (blksz / 2) / sizeof(uint64_t);
memset(zfs->filebuf, 0, sizeof(zfs->filebuf));
zaphdr = (zap_phys_t *)&zfs->filebuf[0];
zaphdr->zap_block_type = ZBT_HEADER;
zaphdr->zap_magic = ZAP_MAGIC;
zaphdr->zap_num_entries = zap->kvpcnt;
zaphdr->zap_salt = zap->hashsalt;
l.l_bs = blkshift;
l.l_phys = NULL;
zt = &zaphdr->zap_ptrtbl;
zt->zt_blk = 0;
zt->zt_numblks = 0;
zt->zt_shift = flsll(ptrcnt) - 1;
zt->zt_nextblk = 0;
zt->zt_blks_copied = 0;
prefixlen = zap_fat_write_prefixlen(zap, &l);
lblkcnt = (uint64_t)1 << prefixlen;
leafblks = ecalloc(lblkcnt, blksz);
for (unsigned int li = 0; li < lblkcnt; li++) {
l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz);
zap_fat_write_leaf_init(&l, li, prefixlen);
}
zaphdr->zap_num_leafs = lblkcnt;
zaphdr->zap_freeblk = lblkcnt + 1;
ptrhasht = (uint64_t *)(&zfs->filebuf[0] + blksz / 2);
STAILQ_FOREACH(ent, &zap->kvps, next) {
struct zap_leaf_entry *le;
uint16_t *lptr;
uint64_t hi, li;
uint16_t namelen, nchunks, nnamechunks, nvalchunks;
hi = ZAP_HASH_IDX(ent->hash, zt->zt_shift);
li = ZAP_HASH_IDX(ent->hash, prefixlen);
assert(ptrhasht[hi] == 0 || ptrhasht[hi] == li + 1);
ptrhasht[hi] = li + 1;
l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz);
namelen = strlen(ent->name) + 1;
nnamechunks = howmany(namelen, ZAP_LEAF_ARRAY_BYTES);
nvalchunks = howmany(ent->intcnt,
ZAP_LEAF_ARRAY_BYTES / ent->intsz);
nchunks = 1 + nnamechunks + nvalchunks;
assert(l.l_phys->l_hdr.lh_nfree >= nchunks);
l.l_phys->l_hdr.lh_nfree -= nchunks;
l.l_phys->l_hdr.lh_nentries++;
lptr = ZAP_LEAF_HASH_ENTPTR(&l, ent->hash);
while (*lptr != 0xffff) {
assert(*lptr < ZAP_LEAF_NUMCHUNKS(&l));
le = ZAP_LEAF_ENTRY(&l, *lptr);
assert(le->le_type == ZAP_CHUNK_ENTRY);
le->le_cd++;
lptr = &le->le_next;
}
*lptr = l.l_phys->l_hdr.lh_freelist;
l.l_phys->l_hdr.lh_freelist += nchunks;
assert(l.l_phys->l_hdr.lh_freelist <=
ZAP_LEAF_NUMCHUNKS(&l));
if (l.l_phys->l_hdr.lh_freelist ==
ZAP_LEAF_NUMCHUNKS(&l))
l.l_phys->l_hdr.lh_freelist = 0xffff;
switch (ent->intsz) {
case 1:
break;
case 2:
for (uint16_t *v = ent->val16p;
v - ent->val16p < (ptrdiff_t)ent->intcnt;
v++)
*v = htobe16(*v);
break;
case 4:
for (uint32_t *v = ent->val32p;
v - ent->val32p < (ptrdiff_t)ent->intcnt;
v++)
*v = htobe32(*v);
break;
case 8:
for (uint64_t *v = ent->val64p;
v - ent->val64p < (ptrdiff_t)ent->intcnt;
v++)
*v = htobe64(*v);
break;
default:
assert(0);
}
le = ZAP_LEAF_ENTRY(&l, *lptr);
assert(le->le_type == ZAP_CHUNK_FREE);
le->le_type = ZAP_CHUNK_ENTRY;
le->le_next = 0xffff;
le->le_name_chunk = *lptr + 1;
le->le_name_numints = namelen;
le->le_value_chunk = *lptr + 1 + nnamechunks;
le->le_value_intlen = ent->intsz;
le->le_value_numints = ent->intcnt;
le->le_hash = ent->hash;
zap_fat_write_array_chunk(&l, *lptr + 1, namelen,
(uint8_t *)ent->name);
zap_fat_write_array_chunk(&l, *lptr + 1 + nnamechunks,
ent->intcnt * ent->intsz, ent->valp);
}
for (int i = 0; i < ptrcnt; i++)
if (ptrhasht[i] == 0)
ptrhasht[i] = (i >> (zt->zt_shift - prefixlen)) + 1;
dnode = zap->dnode;
dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT;
dnode->dn_maxblkid = lblkcnt + 1;
c = dnode_cursor_init(zfs, zap->os, zap->dnode,
(lblkcnt + 1) * blksz, blksz);
loc = objset_space_alloc(zfs, zap->os, &blksz);
vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, zfs->filebuf, blksz, loc,
dnode_cursor_next(zfs, c, 0));
for (uint64_t i = 0; i < lblkcnt; i++) {
loc = objset_space_alloc(zfs, zap->os, &blksz);
vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, leafblks + i * blksz,
blksz, loc, dnode_cursor_next(zfs, c, (i + 1) * blksz));
}
dnode_cursor_finish(zfs, c);
free(leafblks);
}
void
zap_write(zfs_opt_t *zfs, zfs_zap_t *zap)
{
zfs_zap_entry_t *ent;
if (zap->micro) {
zap_micro_write(zfs, zap);
} else {
assert(!STAILQ_EMPTY(&zap->kvps));
assert(zap->kvpcnt > 0);
zap_fat_write(zfs, zap);
}
while ((ent = STAILQ_FIRST(&zap->kvps)) != NULL) {
STAILQ_REMOVE_HEAD(&zap->kvps, next);
if (ent->val64p != &ent->val64)
free(ent->valp);
free(ent->name);
free(ent);
}
free(zap);
}