#include "config.h"
#include "udb.h"
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
#include "lookup3.h"
#include "util.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#ifndef MAP_FAILED
#define MAP_FAILED ((void*)-1)
#endif
#ifndef MS_SYNC
#define MS_SYNC 0
#endif
static void move_xl_segment(void* base, udb_base* udb, udb_void xl,
udb_void n, uint64_t sz, uint64_t startseg);
static int udb_alloc_compact(void* base, udb_alloc* alloc);
static udb_void
chunk_from_dataptr(udb_void data)
{
udb_void xl = data - sizeof(udb_xl_chunk_d);
if( (xl & (UDB_ALLOC_CHUNK_SIZE-1)) == 0)
return xl;
return data - sizeof(udb_chunk_d);
}
udb_void chunk_from_dataptr_ext(udb_void data) {
return chunk_from_dataptr(data);
}
#ifndef NDEBUG
static uint8_t
chunk_get_last(void* base, udb_void chunk, int exp)
{
return *((uint8_t*)UDB_REL(base, chunk+(1<<exp)-1));
}
#endif
static void
chunk_set_last(void* base, udb_void chunk, int exp, uint8_t value)
{
assert(exp >= 0 && exp <= 63);
*((uint8_t*)UDB_REL(base, chunk+((uint64_t)1<<exp)-1)) = value;
}
udb_base*
udb_base_create_fd(const char* fname, int fd, udb_walk_relptr_func walkfunc,
void* arg)
{
uint64_t m, fsz;
udb_glob_d g;
ssize_t r;
udb_base* udb = (udb_base*)xalloc_zero(sizeof(*udb));
if(!udb) {
log_msg(LOG_ERR, "out of memory");
close(fd);
return NULL;
}
udb->fname = strdup(fname);
if(!udb->fname) {
log_msg(LOG_ERR, "out of memory");
free(udb);
close(fd);
return NULL;
}
udb->walkfunc = walkfunc;
udb->walkarg = arg;
udb->fd = fd;
udb->ram_size = 1024;
udb->ram_mask = (int)udb->ram_size - 1;
udb->ram_hash = (udb_ptr**)xalloc_array_zero(sizeof(udb_ptr*),
udb->ram_size);
if(!udb->ram_hash) {
free(udb->fname);
free(udb);
log_msg(LOG_ERR, "out of memory");
close(fd);
return NULL;
}
if((r=read(fd, &m, sizeof(m))) == -1) {
log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
goto fail;
} else if(r != (ssize_t)sizeof(m)) {
log_msg(LOG_ERR, "%s: file too short", fname);
goto fail;
}
if(m != UDB_MAGIC) {
log_msg(LOG_ERR, "%s: wrong type of file", fname);
goto fail;
}
if((r=read(fd, &g, sizeof(g))) == -1) {
log_msg(LOG_ERR, "%s: %s\n", fname, strerror(errno));
goto fail;
} else if(r != (ssize_t)sizeof(g)) {
log_msg(LOG_ERR, "%s: file too short", fname);
goto fail;
}
if(g.version != 0) {
log_msg(LOG_ERR, "%s: unknown file version %d", fname,
(int)g.version);
goto fail;
}
if(g.hsize < UDB_HEADER_SIZE) {
log_msg(LOG_ERR, "%s: header size too small %d", fname,
(int)g.hsize);
goto fail;
}
if(g.hsize > UDB_HEADER_SIZE) {
log_msg(LOG_WARNING, "%s: header size too large %d", fname,
(int)g.hsize);
goto fail;
}
if(g.clean_close != 1) {
log_msg(LOG_WARNING, "%s: not cleanly closed %d", fname,
(int)g.clean_close);
goto fail;
}
if(g.dirty_alloc != 0) {
log_msg(LOG_WARNING, "%s: not cleanly closed (alloc:%d)", fname,
(int)g.dirty_alloc);
goto fail;
}
fsz = (uint64_t)lseek(fd, (off_t)0, SEEK_END);
(void)lseek(fd, (off_t)0, SEEK_SET);
if(g.fsize != fsz) {
log_msg(LOG_WARNING, "%s: file size %llu but mmap header "
"has size %llu", fname, (unsigned long long)fsz,
(unsigned long long)g.fsize);
goto fail;
}
if(g.fsize < UDB_HEADER_SIZE || g.fsize < g.hsize) {
log_msg(LOG_ERR, "%s: file too short", fname);
goto fail;
}
if(g.fsize > (uint64_t)400*1024*1024*1024*1024) {
log_msg(LOG_WARNING, "%s: file size too large %llu",
fname, (unsigned long long)g.fsize);
goto fail;
}
udb->base_size = (size_t)g.fsize;
#ifdef HAVE_MMAP
udb->base = mmap(NULL, (size_t)udb->base_size,
(int)PROT_READ|PROT_WRITE, (int)MAP_SHARED,
(int)udb->fd, (off_t)0);
#else
udb->base = MAP_FAILED; errno = ENOSYS;
#endif
if(udb->base == MAP_FAILED) {
udb->base = NULL;
log_msg(LOG_ERR, "mmap(size %u) error: %s",
(unsigned)udb->base_size, strerror(errno));
fail:
close(fd);
free(udb->fname);
free(udb->ram_hash);
free(udb);
return NULL;
}
udb->glob_data = (udb_glob_d*)((char*)udb->base+sizeof(uint64_t));
r = 0;
if(udb->glob_data->dirty_alloc != udb_dirty_clean)
r = 1;
udb->alloc = udb_alloc_create(udb, (udb_alloc_d*)(
(char*)udb->glob_data+sizeof(*udb->glob_data)));
if(!udb->alloc) {
log_msg(LOG_ERR, "out of memory");
udb_base_free(udb);
return NULL;
}
if(r) {
udb_alloc_compact(udb, udb->alloc);
udb_base_sync(udb, 1);
}
udb->glob_data->clean_close = 0;
return udb;
}
udb_base* udb_base_create_read(const char* fname, udb_walk_relptr_func walkfunc,
void* arg)
{
int fd = open(fname, O_RDWR);
if(fd == -1) {
log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
return NULL;
}
return udb_base_create_fd(fname, fd, walkfunc, arg);
}
static void udb_glob_init_new(udb_glob_d* g)
{
memset(g, 0, sizeof(*g));
g->hsize = UDB_HEADER_SIZE;
g->fsize = UDB_HEADER_SIZE;
}
static int
write_fdata(const char* fname, int fd, void* data, size_t len)
{
ssize_t w;
if((w=write(fd, data, len)) == -1) {
log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
close(fd);
return 0;
} else if(w != (ssize_t)len) {
log_msg(LOG_ERR, "%s: short write (disk full?)", fname);
close(fd);
return 0;
}
return 1;
}
udb_base* udb_base_create_new(const char* fname, udb_walk_relptr_func walkfunc,
void* arg)
{
uint64_t m;
udb_glob_d g;
udb_alloc_d a;
uint64_t endsize = UDB_HEADER_SIZE;
uint64_t endexp = 0;
int fd = open(fname, O_CREAT|O_RDWR, 0600);
if(fd == -1) {
log_msg(LOG_ERR, "%s: %s", fname, strerror(errno));
return NULL;
}
m = UDB_MAGIC;
udb_glob_init_new(&g);
udb_alloc_init_new(&a);
g.clean_close = 1;
if(!write_fdata(fname, fd, &m, sizeof(m)))
return NULL;
if(!write_fdata(fname, fd, &g, sizeof(g)))
return NULL;
if(!write_fdata(fname, fd, &a, sizeof(a)))
return NULL;
if(!write_fdata(fname, fd, &endsize, sizeof(endsize)))
return NULL;
if(!write_fdata(fname, fd, &endexp, sizeof(endexp)))
return NULL;
if(lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
log_msg(LOG_ERR, "%s: lseek %s", fname, strerror(errno));
close(fd);
return NULL;
}
if(ftruncate(fd, (off_t)g.fsize) < 0) {
log_msg(LOG_ERR, "%s: ftruncate(%d): %s", fname,
(int)g.fsize, strerror(errno));
close(fd);
return NULL;
}
return udb_base_create_fd(fname, fd, walkfunc, arg);
}
static void
udb_base_shrink(udb_base* udb, uint64_t nsize)
{
udb->glob_data->dirty_alloc = udb_dirty_fsize;
udb->glob_data->fsize = nsize;
#ifdef HAVE_MMAP
msync(udb->base, udb->base_size, MS_ASYNC);
#endif
if(ftruncate(udb->fd, (off_t)nsize) != 0) {
log_msg(LOG_ERR, "%s: ftruncate(%u) %s", udb->fname,
(unsigned)nsize, strerror(errno));
}
udb->glob_data->dirty_alloc = udb_dirty_clean;
}
void udb_base_close(udb_base* udb)
{
if(!udb)
return;
if(udb->fd != -1 && udb->base && udb->alloc) {
uint64_t nsize = udb->alloc->disk->nextgrow;
if(nsize < udb->base_size)
udb_base_shrink(udb, nsize);
}
if(udb->fd != -1) {
udb->glob_data->clean_close = 1;
close(udb->fd);
udb->fd = -1;
}
if(udb->base) {
#ifdef HAVE_MMAP
if(munmap(udb->base, udb->base_size) == -1) {
log_msg(LOG_ERR, "munmap: %s", strerror(errno));
}
#endif
udb->base = NULL;
}
}
void udb_base_free(udb_base* udb)
{
if(!udb)
return;
udb_base_close(udb);
udb_alloc_delete(udb->alloc);
free(udb->ram_hash);
free(udb->fname);
free(udb);
}
void udb_base_free_keep_mmap(udb_base* udb)
{
if(!udb) return;
if(udb->fd != -1) {
close(udb->fd);
udb->fd = -1;
}
udb->base = NULL;
udb_alloc_delete(udb->alloc);
free(udb->ram_hash);
free(udb->fname);
free(udb);
}
void udb_base_sync(udb_base* udb, int wait)
{
if(!udb) return;
#ifdef HAVE_MMAP
if(msync(udb->base, udb->base_size, wait?MS_SYNC:MS_ASYNC) != 0) {
log_msg(LOG_ERR, "msync(%s) error %s",
udb->fname, strerror(errno));
}
#else
(void)wait;
#endif
}
static uint32_t
chunk_hash_ptr(udb_void p)
{
uint32_t h[sizeof(p)/sizeof(uint32_t)];
memcpy(&h, &p, sizeof(h));
return hashword(h, sizeof(p)/sizeof(uint32_t), 0x8763);
}
int udb_ptr_is_on_bucket(udb_base* udb, udb_ptr* ptr, udb_void to)
{
uint32_t i = chunk_hash_ptr(to) & udb->ram_mask;
udb_ptr* p;
assert((size_t)i < udb->ram_size);
for(p = udb->ram_hash[i]; p; p=p->next) {
if(p == ptr)
return 1;
}
return 0;
}
static void
grow_ram_hash(udb_base* udb, udb_ptr** newhash)
{
size_t i;
size_t osize= udb->ram_size;
udb_ptr* p, *np;
udb_ptr** oldhash = udb->ram_hash;
udb->ram_size *= 2;
udb->ram_mask <<= 1;
udb->ram_mask |= 1;
udb->ram_hash = newhash;
for(i=0; i<osize; i++) {
p = oldhash[i];
while(p) {
np = p->next;
p->prev=NULL;
p->next=newhash[chunk_hash_ptr(p->data)&udb->ram_mask];
if(p->next) p->next->prev = p;
p = np;
}
}
free(oldhash);
}
void udb_base_link_ptr(udb_base* udb, udb_ptr* ptr)
{
uint32_t i;
#ifdef UDB_CHECK
assert(udb_valid_dataptr(udb, ptr->data));
#endif
udb->ram_num++;
if(udb->ram_num == udb->ram_size && udb->ram_size<(size_t)0x7fffffff) {
udb_ptr** newram = (udb_ptr**)xalloc_array_zero(
sizeof(udb_ptr*), udb->ram_size*2);
if(newram) {
grow_ram_hash(udb, newram);
}
}
i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
assert((size_t)i < udb->ram_size);
ptr->prev = NULL;
ptr->next = udb->ram_hash[i];
udb->ram_hash[i] = ptr;
if(ptr->next)
ptr->next->prev = ptr;
}
void udb_base_unlink_ptr(udb_base* udb, udb_ptr* ptr)
{
assert(ptr->data);
#ifdef UDB_CHECK
assert(udb_valid_dataptr(udb, ptr->data));
assert(udb_ptr_is_on_bucket(udb, ptr, ptr->data));
#endif
udb->ram_num--;
if(ptr->next)
ptr->next->prev = ptr->prev;
if(ptr->prev)
ptr->prev->next = ptr->next;
else {
uint32_t i = chunk_hash_ptr(ptr->data) & udb->ram_mask;
assert((size_t)i < udb->ram_size);
udb->ram_hash[i] = ptr->next;
}
}
static void
udb_base_ram_ptr_edit(udb_base* udb, udb_void old, udb_void newd)
{
uint32_t io = chunk_hash_ptr(old) & udb->ram_mask;
udb_ptr* p, *np;
p = udb->ram_hash[io];
while(p) {
np = p->next;
if(p->data == old) {
udb_base_unlink_ptr(udb, p);
p->data = newd;
udb_base_link_ptr(udb, p);
}
p = np;
}
}
udb_rel_ptr* udb_base_get_userdata(udb_base* udb)
{
return &udb->glob_data->user_global;
}
void udb_base_set_userdata(udb_base* udb, udb_void user)
{
#ifdef UDB_CHECK
if(user) { assert(udb_valid_dataptr(udb, user)); }
#endif
udb_rel_ptr_set(udb->base, &udb->glob_data->user_global, user);
}
void udb_base_set_userflags(udb_base* udb, uint8_t v)
{
udb->glob_data->userflags = v;
}
uint8_t udb_base_get_userflags(udb_base* udb)
{
return udb->glob_data->userflags;
}
static void*
udb_base_remap(udb_base* udb, udb_alloc* alloc, uint64_t nsize)
{
#ifdef HAVE_MMAP
void* nb;
#ifdef MREMAP_MAYMOVE
nb = mremap(udb->base, udb->base_size, nsize, MREMAP_MAYMOVE);
if(nb == MAP_FAILED) {
log_msg(LOG_ERR, "mremap(%s, size %u) error %s",
udb->fname, (unsigned)nsize, strerror(errno));
return 0;
}
#else
if(munmap(udb->base, udb->base_size) != 0) {
log_msg(LOG_ERR, "munmap(%s) error %s",
udb->fname, strerror(errno));
}
nb = mmap(udb->base, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
(int)MAP_SHARED, (int)udb->fd, (off_t)0);
if(nb == MAP_FAILED && errno == ENOMEM) {
nb = mmap(NULL, (size_t)nsize, (int)PROT_READ|PROT_WRITE,
(int)MAP_SHARED, (int)udb->fd, (off_t)0);
}
if(nb == MAP_FAILED) {
log_msg(LOG_ERR, "mmap(%s, size %u) error %s",
udb->fname, (unsigned)nsize, strerror(errno));
udb->base = NULL;
return 0;
}
#endif
if(nb != udb->base) {
udb->base = nb;
udb->glob_data = (udb_glob_d*)((char*)nb+sizeof(uint64_t));
alloc->disk = (udb_alloc_d*)((char*)udb->glob_data
+sizeof(*udb->glob_data));
}
udb->base_size = nsize;
return nb;
#else
(void)udb; (void)alloc; (void)nsize;
return NULL;
#endif
}
void
udb_base_remap_process(udb_base* udb)
{
udb_base_remap(udb, udb->alloc, udb->glob_data->fsize);
}
static void*
udb_base_grow_and_remap(udb_base* udb, uint64_t nsize)
{
uint8_t z = 0;
ssize_t w;
assert(nsize > 0);
udb->glob_data->dirty_alloc = udb_dirty_fsize;
#ifdef HAVE_PWRITE
if((w=pwrite(udb->fd, &z, sizeof(z), (off_t)(nsize-1))) == -1) {
#else
if(lseek(udb->fd, (off_t)(nsize-1), SEEK_SET) == -1) {
log_msg(LOG_ERR, "fseek %s: %s", udb->fname, strerror(errno));
return 0;
}
if((w=write(udb->fd, &z, sizeof(z))) == -1) {
#endif
log_msg(LOG_ERR, "grow(%s, size %u) error %s",
udb->fname, (unsigned)nsize, strerror(errno));
return 0;
} else if(w != (ssize_t)sizeof(z)) {
log_msg(LOG_ERR, "grow(%s, size %u) failed (disk full?)",
udb->fname, (unsigned)nsize);
return 0;
}
udb->glob_data->fsize = nsize;
udb->glob_data->dirty_alloc = udb_dirty_clean;
return udb_base_remap(udb, udb->alloc, nsize);
}
int udb_exp_size(uint64_t a)
{
int x = 0;
uint64_t i = a;
assert(a != 0);
i --;
while( (i&(~(uint64_t)0xff)) ) {
i >>= 8;
x += 8;
}
while(i) {
i >>= 1;
x ++;
}
assert( x>=0 && x<=63);
assert( ((uint64_t)1<<x) >= a);
assert( x==0 || (((uint64_t)1<<x)>>1) < a);
return x;
}
int udb_exp_offset(uint64_t o)
{
int x = 0;
uint64_t i = o;
assert(o != 0);
while( (i&(uint64_t)0xff) == 0) {
i >>= 8;
x += 8;
}
while( (i&(uint64_t)0x1) == 0) {
i >>= 1;
x ++;
}
assert( o % ((uint64_t)1<<x) == 0);
assert( o % ((uint64_t)1<<(x+1)) != 0);
return x;
}
void udb_alloc_init_new(udb_alloc_d* a)
{
assert(UDB_HEADER_SIZE % UDB_ALLOC_CHUNK_MINSIZE == 0);
memset(a, 0, sizeof(*a));
a->nextgrow = UDB_HEADER_SIZE;
}
static int
fsck_fsize(udb_base* udb, udb_alloc* alloc)
{
off_t realsize;
log_msg(LOG_WARNING, "udb-fsck %s: file size wrong", udb->fname);
realsize = lseek(udb->fd, (off_t)0, SEEK_END);
if(realsize == (off_t)-1) {
log_msg(LOG_ERR, "lseek(%s): %s", udb->fname, strerror(errno));
return 0;
}
udb->glob_data->fsize = (uint64_t)realsize;
if(!udb_base_remap(udb, alloc, (uint64_t)realsize))
return 0;
udb->glob_data->dirty_alloc = udb_dirty_clean;
log_msg(LOG_WARNING, "udb-fsck %s: file size fixed (sync)", udb->fname);
udb_base_sync(udb, 1);
return 1;
}
static udb_void
regen_free(void* base, udb_void c, int exp, udb_alloc_d* regen)
{
udb_free_chunk_d* cp = UDB_FREE_CHUNK(c);
uint64_t esz = (uint64_t)1<<exp;
if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
return 0;
}
cp->type = udb_chunk_type_free;
cp->flags = 0;
chunk_set_last(base, c, exp, (uint8_t)exp);
cp->prev = 0;
cp->next = regen->free[exp-UDB_ALLOC_CHUNK_MINEXP];
if(cp->next)
UDB_FREE_CHUNK(cp->next)->prev = c;
regen->stat_free += esz;
return c + esz;
}
static udb_void
regen_xl(void* base, udb_void c, udb_alloc_d* regen)
{
udb_xl_chunk_d* cp = UDB_XL_CHUNK(c);
uint64_t xlsz = cp->size;
if( (xlsz&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
return 0;
}
if( (c&(UDB_ALLOC_CHUNK_SIZE-1)) != 0) {
return 0;
}
regen->stat_alloc += xlsz;
return c + xlsz;
}
static udb_void
regen_data(void* base, udb_void c, int exp, udb_alloc_d* regen)
{
uint64_t esz = (uint64_t)1<<exp;
if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX) {
return 0;
}
chunk_set_last(base, c, exp, (uint8_t)exp);
regen->stat_alloc += esz;
return c + esz;
}
static void
regen_relptr_func(void* base, udb_rel_ptr* rp, void* arg)
{
udb_void* a = (udb_void*)arg;
if(!rp->data)
return;
if(rp->data == a[0])
rp->data = a[1];
udb_rel_ptr_link(base, rp, rp->data);
}
static void
regen_its_ptrs(void* base, udb_base* udb, udb_chunk_d* atp,
void* data, uint64_t dsz, udb_void rb_old, udb_void rb_new)
{
udb_void arg[2];
arg[0] = rb_old; arg[1] = rb_new;
(*udb->walkfunc)(base, udb->walkarg, atp->type, data, dsz,
®en_relptr_func, arg);
}
static void
regen_ptrlist(void* base, udb_base* udb, udb_alloc* alloc,
udb_void rb_old, udb_void rb_new)
{
udb_void at = alloc->udb->glob_data->hsize;
while(at < alloc->disk->nextgrow) {
int exp = (int)UDB_CHUNK(at)->exp;
udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
if(exp == UDB_EXP_XL) {
UDB_XL_CHUNK(at)->ptrlist = 0;
at += UDB_XL_CHUNK(at)->size;
} else if(tp == udb_chunk_type_free) {
at += (uint64_t)1<<exp;
} else {
UDB_CHUNK(at)->ptrlist = 0;
at += (uint64_t)1<<exp;
}
}
at = alloc->udb->glob_data->hsize;
while(at < alloc->disk->nextgrow) {
udb_chunk_d* atp = UDB_CHUNK(at);
int exp = (int)atp->exp;
udb_chunk_type tp = (udb_chunk_type)atp->type;
uint64_t sz = ((exp == UDB_EXP_XL)?UDB_XL_CHUNK(at)->size:
(uint64_t)1<<exp);
if(exp == UDB_EXP_XL) {
assert(at != rb_old);
regen_its_ptrs(base, udb, atp,
((char*)atp)+sizeof(udb_xl_chunk_d),
sz-sizeof(udb_xl_chunk_d) - sizeof(uint64_t)*2,
rb_old, rb_new);
at += sz;
} else if(tp == udb_chunk_type_free) {
at += sz;
} else {
assert(at != rb_old);
regen_its_ptrs(base, udb, atp,
((char*)atp)+sizeof(udb_chunk_d),
sz-sizeof(udb_chunk_d)-1, rb_old, rb_new);
at += sz;
}
}
}
static void
rb_mark_free_segs(void* base, udb_void s, uint64_t m)
{
udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
assert(s >= UDB_ALLOC_CHUNK_SIZE);
while(q >= s) {
UDB_CHUNK(q)->exp = UDB_ALLOC_CHUNKS_MAX;
UDB_CHUNK(q)->type = udb_chunk_type_free;
q -= UDB_ALLOC_CHUNK_SIZE;
}
}
static int
fsck_rb_xl(void* base, udb_base* udb, udb_void rb_old, udb_void rb_new,
uint64_t rb_size, uint64_t rb_seg)
{
if(rb_old <= rb_new)
return 0;
if( (rb_size&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
return 0;
if( (rb_old&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
return 0;
if( (rb_new&(UDB_ALLOC_CHUNK_SIZE-1)) != 0)
return 0;
if(rb_new + rb_size <= rb_old) {
memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
rb_mark_free_segs(base, rb_old, rb_size);
} else {
move_xl_segment(base, udb, rb_old, rb_new, rb_size, rb_seg);
rb_mark_free_segs(base, rb_new+rb_size, (rb_old+rb_size)-
(rb_new+rb_size));
}
return 1;
}
static int
fsck_rb(void* base, udb_void rb_old, udb_void rb_new, uint64_t rb_size,
udb_void* make_free)
{
if( (rb_size&(rb_size-1)) != 0)
return 0;
if( (rb_old&(rb_size-1)) != 0)
return 0;
if( (rb_new&(rb_size-1)) != 0)
return 0;
memcpy(UDB_CHUNK(rb_new), UDB_CHUNK(rb_old), rb_size);
*make_free = rb_old;
return 1;
}
static int
fsck_file(udb_base* udb, udb_alloc* alloc, int moved)
{
void* base = udb->base;
udb_alloc_d regen;
udb_void at = udb->glob_data->hsize;
udb_void rb_old = udb->glob_data->rb_old;
udb_void rb_new = udb->glob_data->rb_new;
udb_void rb_seg = udb->glob_data->rb_seg;
udb_void make_free = 0;
uint64_t rb_size = udb->glob_data->rb_size;
log_msg(LOG_WARNING, "udb-fsck %s: salvaging", udb->fname);
if(moved && rb_old && rb_new && rb_size) {
if(rb_old+rb_size <= alloc->disk->nextgrow
&& rb_new+rb_size <= alloc->disk->nextgrow) {
if(rb_size > 1024*1024) {
if(!fsck_rb_xl(base, udb, rb_old, rb_new,
rb_size, rb_seg))
return 0;
} else {
if(!fsck_rb(base, rb_old, rb_new, rb_size,
&make_free))
return 0;
}
}
}
memset(®en, 0, sizeof(regen));
regen.nextgrow = alloc->disk->nextgrow;
while(at < regen.nextgrow) {
int exp = (int)UDB_CHUNK(at)->exp;
udb_chunk_type tp = (udb_chunk_type)UDB_CHUNK(at)->type;
if(tp == udb_chunk_type_free || at == make_free) {
at = regen_free(base, at, exp, ®en);
if(!at) return 0;
} else if(exp == UDB_EXP_XL) {
at = regen_xl(base, at, ®en);
if(!at) return 0;
} else if(exp >= UDB_ALLOC_CHUNK_MINEXP
&& exp <= UDB_ALLOC_CHUNKS_MAX) {
at = regen_data(base, at, exp, ®en);
if(!at) return 0;
} else {
regen.nextgrow = at;
break;
}
}
*alloc->disk = regen;
regen_ptrlist(base, udb, alloc, rb_old, rb_new);
log_msg(LOG_WARNING, "udb-fsck %s: salvaged successfully (sync)",
udb->fname);
udb->glob_data->rb_old = 0;
udb->glob_data->rb_new = 0;
udb->glob_data->rb_size = 0;
udb->glob_data->dirty_alloc = udb_dirty_clean;
udb_base_sync(udb, 1);
return 1;
}
udb_alloc* udb_alloc_create(udb_base* udb, udb_alloc_d* disk)
{
udb_alloc* alloc = (udb_alloc*)xalloc_zero(sizeof(*alloc));
if(!alloc)
return NULL;
alloc->udb = udb;
alloc->disk = disk;
if(udb->glob_data->dirty_alloc != udb_dirty_clean) {
if(udb->glob_data->dirty_alloc == udb_dirty_fsize) {
if(fsck_fsize(udb, alloc))
return alloc;
} else if(udb->glob_data->dirty_alloc == udb_dirty_fl) {
if(fsck_file(udb, alloc, 0))
return alloc;
} else if(udb->glob_data->dirty_alloc == udb_dirty_compact) {
if(fsck_file(udb, alloc, 1))
return alloc;
}
log_msg(LOG_ERR, "error: file allocation dirty (%d)",
(int)udb->glob_data->dirty_alloc);
free(alloc);
return NULL;
}
return alloc;
}
void udb_alloc_delete(udb_alloc* alloc)
{
if(!alloc) return;
free(alloc);
}
static void
udb_alloc_unlink_fl(void* base, udb_alloc* alloc, udb_void chunk, int exp)
{
udb_free_chunk_d* fp = UDB_FREE_CHUNK(chunk);
assert(chunk);
assert(fp->exp == (uint8_t)exp);
assert(fp->type == udb_chunk_type_free);
assert(chunk_get_last(base, chunk, exp) == (uint8_t)exp);
assert(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]);
if(fp->prev)
UDB_FREE_CHUNK(fp->prev)->next = fp->next;
else alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
if(fp->next)
UDB_FREE_CHUNK(fp->next)->prev = fp->prev;
}
static udb_void
udb_alloc_pop_fl(void* base, udb_alloc* alloc, int exp)
{
udb_void f = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
assert(f);
assert(fp->exp == (uint8_t)exp);
assert(fp->type == udb_chunk_type_free);
assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = fp->next;
if(fp->next) {
UDB_FREE_CHUNK(fp->next)->prev = 0;
}
return f;
}
static void
udb_alloc_push_fl(void* base, udb_alloc* alloc, udb_void f, int exp)
{
udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
assert(f);
fp->exp = (uint8_t)exp;
fp->type = udb_chunk_type_free;
fp->flags = 0;
fp->prev = 0;
fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
if(fp->next)
UDB_FREE_CHUNK(fp->next)->prev = f;
chunk_set_last(base, f, exp, (uint8_t)exp);
alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
}
static void
udb_alloc_push_fl_noinit(void* base, udb_alloc* alloc, udb_void f, int exp)
{
udb_free_chunk_d* fp = UDB_FREE_CHUNK(f);
assert(f);
assert(fp->exp == (uint8_t)exp);
assert(fp->type == udb_chunk_type_free);
assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
fp->prev = 0;
fp->next = alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP];
if(fp->next)
UDB_FREE_CHUNK(fp->next)->prev = f;
alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP] = f;
}
static void
grow_align(void* base, udb_alloc* alloc, uint64_t esz)
{
while( (alloc->disk->nextgrow & (esz-1)) != 0) {
int fexp = udb_exp_offset(alloc->disk->nextgrow);
uint64_t fsz = (uint64_t)1<<fexp;
udb_void f = alloc->disk->nextgrow;
udb_void fn = alloc->disk->nextgrow+fsz;
assert(fn <= alloc->udb->base_size);
alloc->disk->stat_free += fsz;
udb_alloc_push_fl(base, alloc, f, fexp);
alloc->disk->nextgrow = fn;
}
}
static udb_void
grow_chunks(void* base, udb_alloc* alloc, size_t sz, int exp)
{
uint64_t esz = (uint64_t)1<<exp;
udb_void ret;
alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
grow_align(base, alloc, esz);
ret = alloc->disk->nextgrow;
UDB_CHUNK(ret)->exp = (uint8_t)exp;
UDB_CHUNK(ret)->flags = 0;
UDB_CHUNK(ret)->ptrlist = 0;
UDB_CHUNK(ret)->type = udb_chunk_type_data;
chunk_set_last(base, ret, exp, (uint8_t)exp);
alloc->disk->stat_alloc += esz;
alloc->disk->stat_data += sz;
alloc->disk->nextgrow += esz;
assert(alloc->disk->nextgrow <= alloc->udb->base_size);
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
return ret + sizeof(udb_chunk_d);
}
static uint64_t
grow_end_calc(udb_alloc* alloc, int exp)
{
uint64_t sz = (uint64_t)1<<exp;
uint64_t ng = alloc->disk->nextgrow;
uint64_t res;
if( (ng & (sz-1)) == 0) {
return ng+sz;
}
res = (ng & ~(sz-1)) + 2*sz;
return res;
}
static uint64_t
grow_extra_check(udb_alloc* alloc, uint64_t ge)
{
const uint64_t mb = 1024*1024;
uint64_t bsz = alloc->udb->base_size;
if(bsz <= mb) {
if(ge < bsz*2)
return bsz*2;
} else {
uint64_t gnow = ge - bsz;
uint64_t want = ((bsz / 8) & ~(mb-1)) + mb;
if(gnow < want)
return bsz + want;
}
return ge;
}
static int
enough_free(udb_alloc* alloc)
{
if(alloc->udb->base_size <= 2*1024*1024) {
if(((size_t)alloc->disk->nextgrow)*3 <= alloc->udb->base_size)
return 1;
} else {
uint64_t space = alloc->udb->base_size - alloc->disk->nextgrow;
if(space >= 1024*1024 && (space*4 >= alloc->udb->base_size
|| alloc->udb->base_size < 4*1024*1024))
return 1;
}
return 0;
}
static udb_void
udb_alloc_grow_space(void* base, udb_alloc* alloc, size_t sz, int exp)
{
uint64_t grow_end = grow_end_calc(alloc, exp);
assert(alloc->udb->base_size >= alloc->disk->nextgrow);
if(grow_end <= alloc->udb->base_size) {
return grow_chunks(base, alloc, sz, exp);
}
grow_end = grow_extra_check(alloc, grow_end);
if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
return 0;
}
assert(grow_end <= alloc->udb->base_size);
assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
return grow_chunks(base, alloc, sz, exp);
}
static udb_void
grow_xl(void* base, udb_alloc* alloc, uint64_t xlsz, uint64_t sz)
{
udb_void ret;
udb_xl_chunk_d* p;
alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
grow_align(base, alloc, UDB_ALLOC_CHUNK_SIZE);
ret = alloc->disk->nextgrow;
p = UDB_XL_CHUNK(ret);
p->exp = UDB_EXP_XL;
p->size = xlsz;
p->flags = 0;
p->ptrlist = 0;
p->type = udb_chunk_type_data;
*((uint64_t*)(UDB_REL(base, ret+xlsz-sizeof(uint64_t)*2))) = xlsz;
*((uint8_t*)(UDB_REL(base, ret+xlsz-1))) = UDB_EXP_XL;
alloc->disk->stat_data += sz;
alloc->disk->stat_alloc += xlsz;
alloc->disk->nextgrow += xlsz;
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
return ret + sizeof(udb_xl_chunk_d);
}
static udb_void
udb_alloc_xl_space(void* base, udb_alloc* alloc, size_t sz)
{
uint64_t asz = sz + sizeof(udb_xl_chunk_d) + sizeof(uint64_t)*2;
uint64_t need=(asz+UDB_ALLOC_CHUNK_SIZE-1)&(~(UDB_ALLOC_CHUNK_SIZE-1));
uint64_t grow_end = grow_end_calc(alloc, UDB_ALLOC_CHUNKS_MAX) + need;
assert(need >= asz);
if(grow_end <= alloc->udb->base_size) {
return grow_xl(base, alloc, need, sz);
}
grow_end = grow_extra_check(alloc, grow_end);
if(!(base=udb_base_grow_and_remap(alloc->udb, grow_end))) {
return 0;
}
assert(grow_end <= alloc->udb->base_size);
assert(alloc->udb->glob_data->fsize == alloc->udb->base_size);
return grow_xl(base, alloc, need, sz);
}
static udb_void
udb_alloc_subdivide(void* base, udb_alloc* alloc, udb_void big, int e2,
int exp)
{
int e = e2;
uint64_t sz = (uint64_t)1<<e2;
assert(big && e2 > exp);
do {
sz >>= 1;
e--;
udb_alloc_push_fl(base, alloc, big+sz, e);
} while(e != exp);
return big;
}
static int
udb_alloc_exp_needed(size_t sz)
{
uint64_t asz = sz + sizeof(udb_chunk_d) + 1;
int exp;
if(asz > UDB_ALLOC_CHUNK_SIZE) {
return UDB_EXP_XL;
} else if(asz <= UDB_ALLOC_CHUNK_MINSIZE) {
return UDB_ALLOC_CHUNK_MINEXP;
}
exp = udb_exp_size(asz);
assert(exp <= UDB_ALLOC_CHUNKS_MAX);
return exp;
}
udb_void udb_alloc_space(udb_alloc* alloc, size_t sz)
{
void* base = alloc->udb->base;
int e2, exp = udb_alloc_exp_needed(sz);
if(exp == UDB_EXP_XL)
return udb_alloc_xl_space(base, alloc, sz);
if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP]) {
udb_void ret;
alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
ret = udb_alloc_pop_fl(base, alloc, exp);
UDB_CHUNK(ret)->flags = 0;
UDB_CHUNK(ret)->ptrlist = 0;
UDB_CHUNK(ret)->type = udb_chunk_type_data;
alloc->disk->stat_data += sz;
alloc->disk->stat_alloc += (1<<exp);
assert(alloc->disk->stat_free >= (1u<<exp));
alloc->disk->stat_free -= (1<<exp);
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
return ret + sizeof(udb_chunk_d);
}
for(e2 = exp+1; e2 <= UDB_ALLOC_CHUNKS_MAX; e2++)
if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
udb_void big, ret;
alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
big = udb_alloc_pop_fl(base, alloc, e2);
ret = udb_alloc_subdivide(base, alloc, big, e2, exp);
UDB_CHUNK(ret)->exp = (uint8_t)exp;
UDB_CHUNK(ret)->flags = 0;
UDB_CHUNK(ret)->ptrlist = 0;
UDB_CHUNK(ret)->type = udb_chunk_type_data;
chunk_set_last(base, ret, exp, (uint8_t)exp);
alloc->disk->stat_data += sz;
alloc->disk->stat_alloc += (1<<exp);
assert(alloc->disk->stat_free >= (1u<<exp));
alloc->disk->stat_free -= (1<<exp);
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
return ret + sizeof(udb_chunk_d);
}
return udb_alloc_grow_space(base, alloc, sz, exp);
}
static int
have_free_for(udb_alloc* alloc, int exp)
{
int e2;
if(alloc->disk->free[exp-UDB_ALLOC_CHUNK_MINEXP])
return exp;
for(e2 = exp+1; e2 <= UDB_ALLOC_CHUNKS_MAX; e2++)
if(alloc->disk->free[e2-UDB_ALLOC_CHUNK_MINEXP]) {
return e2;
}
return 0;
}
static void
chunk_fix_ptr_each(void* base, udb_rel_ptr* rp, void* arg)
{
udb_void* data = (udb_void*)arg;
udb_void r;
if(!rp->data)
return;
r = UDB_SYSTOREL(base, rp);
if(rp->next)
UDB_REL_PTR(rp->next)->prev = r;
if(rp->prev)
UDB_REL_PTR(rp->prev)->next = r;
else {
if(rp->data == data[0])
UDB_CHUNK(data[1])->ptrlist = r;
else UDB_CHUNK(chunk_from_dataptr(rp->data))->ptrlist = r;
}
}
static void
chunk_fix_ptrs(void* base, udb_base* udb, udb_chunk_d* cp, udb_void data,
uint64_t dsz, udb_void olddata)
{
udb_void d[2];
d[0] = olddata;
d[1] = data;
(*udb->walkfunc)(base, udb->walkarg, cp->type, UDB_REL(base, data),
dsz, &chunk_fix_ptr_each, d);
udb_rel_ptr_edit(base, cp->ptrlist, data);
udb_base_ram_ptr_edit(udb, olddata, data);
}
static void
move_chunk(void* base, udb_alloc* alloc, udb_void f, int exp, uint64_t esz,
int e2)
{
udb_void res = udb_alloc_pop_fl(base, alloc, e2);
udb_chunk_d* rp;
udb_chunk_d* fp;
if(exp != e2) {
res = udb_alloc_subdivide(base, alloc, res, e2, exp);
}
assert(res != f);
alloc->udb->glob_data->rb_old = f;
alloc->udb->glob_data->rb_new = res;
alloc->udb->glob_data->rb_size = esz;
rp = UDB_CHUNK(res);
fp = UDB_CHUNK(f);
memcpy(rp, fp, esz);
chunk_fix_ptrs(base, alloc->udb, rp, res+sizeof(udb_chunk_d),
esz-sizeof(udb_chunk_d)-1, f+sizeof(udb_chunk_d));
}
static void
free_xl_space(void* base, udb_alloc* alloc, udb_void s, uint64_t m)
{
udb_void q = s + m - UDB_ALLOC_CHUNK_SIZE;
assert(s >= UDB_ALLOC_CHUNK_SIZE);
while(q >= s) {
assert(UDB_CHUNK(q)->exp == UDB_ALLOC_CHUNKS_MAX);
assert(UDB_CHUNK(q)->type == udb_chunk_type_free);
udb_alloc_unlink_fl(base, alloc, q, UDB_ALLOC_CHUNKS_MAX);
q -= UDB_ALLOC_CHUNK_SIZE;
}
}
static void
move_xl_segment(void* base, udb_base* udb, udb_void xl, udb_void n,
uint64_t sz, uint64_t startseg)
{
udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
udb_xl_chunk_d* np = UDB_XL_CHUNK(n);
uint64_t amount = xl - n;
assert(n < xl);
udb->glob_data->rb_old = xl;
udb->glob_data->rb_new = n;
udb->glob_data->rb_size = sz;
if(sz <= amount) {
memcpy(np, xlp, sz);
} else {
uint64_t seg, maxseg = amount/UDB_ALLOC_CHUNK_SIZE;
for(seg = startseg; seg<maxseg; seg++) {
udb->glob_data->rb_seg = seg;
memcpy(np+seg*UDB_ALLOC_CHUNK_SIZE,
xlp+seg*UDB_ALLOC_CHUNK_SIZE,
UDB_ALLOC_CHUNK_SIZE);
}
}
}
static void
move_xl_list(void* base, udb_alloc* alloc, udb_void xl_start, uint64_t xl_sz,
uint64_t amount)
{
udb_void xl = xl_start;
assert( (xl_start&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 );
assert( (amount&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 );
assert( (xl_sz&(UDB_ALLOC_CHUNK_SIZE-1)) == 0 );
while(xl < xl_start+xl_sz) {
udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
udb_void n = xl-amount;
uint64_t sz = xlp->size;
assert(xlp->exp == UDB_EXP_XL);
move_xl_segment(base, alloc->udb, xl, n, sz, 0);
chunk_fix_ptrs(base, alloc->udb, UDB_CHUNK(n),
n+sizeof(udb_xl_chunk_d),
sz-sizeof(udb_xl_chunk_d)-sizeof(uint64_t)*2,
xl+sizeof(udb_xl_chunk_d));
}
alloc->disk->stat_free -= amount;
alloc->disk->nextgrow -= amount;
alloc->udb->glob_data->rb_old = 0;
alloc->udb->glob_data->rb_new = 0;
alloc->udb->glob_data->rb_size = 0;
}
static udb_void
coagulate_possible(void* base, udb_alloc* alloc, udb_void f, int exp,
uint64_t esz)
{
udb_void other = f^esz;
if(exp == UDB_ALLOC_CHUNKS_MAX)
return 0;
if(other >= alloc->udb->base_size)
return 0;
if(other >= alloc->disk->nextgrow)
return 0;
if(other < alloc->udb->glob_data->hsize)
return 0;
if(f > other) {
assert(f > 1);
if(*((uint8_t*)UDB_REL(base, f-1)) == (uint8_t)exp) {
assert(UDB_FREE_CHUNK(other)->exp == (uint8_t)exp);
if(UDB_CHUNK(other)->type == udb_chunk_type_free)
return other;
}
} else {
if(UDB_CHUNK(other)->exp == (uint8_t)exp) {
assert(chunk_get_last(base, other, exp)==(uint8_t)exp);
if(UDB_CHUNK(other)->type == udb_chunk_type_free)
return other;
}
}
return 0;
}
static udb_void
coagulate_and_push(void* base, udb_alloc* alloc, udb_void last, int exp,
uint64_t esz)
{
udb_void other;
while( (other=coagulate_possible(base, alloc, last, exp, esz)) ) {
udb_alloc_unlink_fl(base, alloc, other, exp);
if(other < last)
last = other;
exp++;
esz <<= 1;
}
udb_alloc_push_fl(base, alloc, last, exp);
return last;
}
int
udb_alloc_compact(void* base, udb_alloc* alloc)
{
udb_void last;
int exp, e2;
uint64_t esz;
uint64_t at = alloc->disk->nextgrow;
udb_void xl_start = 0;
uint64_t xl_sz = 0;
if(alloc->udb->inhibit_compact)
return 1;
alloc->udb->useful_compact = 0;
while(at > alloc->udb->glob_data->hsize) {
exp = (int)*((uint8_t*)UDB_REL(base, at-1));
if(exp == UDB_EXP_XL) {
uint64_t xlsz = *((uint64_t*)UDB_REL(base,
at-sizeof(uint64_t)*2));
udb_void xl = at-xlsz;
#ifndef NDEBUG
udb_xl_chunk_d* xlp = UDB_XL_CHUNK(xl);
assert(xlp->exp == UDB_EXP_XL);
assert(xlp->type != udb_chunk_type_free);
#endif
if(xl_start != 0 && xl+xlsz != xl_start) {
uint64_t m = xl_start - (xl+xlsz);
assert(xl_start > xl+xlsz);
alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
free_xl_space(base, alloc, xl+xlsz, m);
move_xl_list(base, alloc, xl_start, xl_sz, m);
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
}
xl_start = xl;
xl_sz += xlsz;
at = xl;
continue;
} else if(exp < UDB_ALLOC_CHUNK_MINEXP
|| exp > UDB_ALLOC_CHUNKS_MAX)
break;
esz = (uint64_t)1<<exp;
last = at - esz;
assert(UDB_CHUNK(last)->exp == (uint8_t)exp);
if(UDB_CHUNK(last)->type == udb_chunk_type_free) {
if(!xl_start) {
alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
udb_alloc_unlink_fl(base, alloc, last, exp);
alloc->disk->stat_free -= esz;
alloc->disk->nextgrow = last;
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
}
at = last;
} else if( (e2=have_free_for(alloc, exp)) ) {
alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
move_chunk(base, alloc, last, exp, esz, e2);
if(xl_start) {
last = coagulate_and_push(base, alloc,
last, exp, esz);
} else {
alloc->disk->stat_free -= esz;
alloc->disk->nextgrow = last;
}
alloc->udb->glob_data->rb_old = 0;
alloc->udb->glob_data->rb_new = 0;
alloc->udb->glob_data->rb_size = 0;
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
at = last;
} else {
break;
}
}
if(xl_start) {
udb_void at = xl_start;
uint64_t m = 0;
while(*((uint8_t*)UDB_REL(base, at-1))==UDB_ALLOC_CHUNKS_MAX){
udb_void chunk = at - UDB_ALLOC_CHUNK_SIZE;
if(UDB_CHUNK(chunk)->type != udb_chunk_type_free)
break;
assert(UDB_CHUNK(chunk)->exp==UDB_ALLOC_CHUNKS_MAX);
m += UDB_ALLOC_CHUNK_SIZE;
at = chunk;
}
if(m != 0) {
assert(at+m == xl_start);
alloc->udb->glob_data->dirty_alloc = udb_dirty_compact;
free_xl_space(base, alloc, at, m);
move_xl_list(base, alloc, xl_start, xl_sz, m);
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
}
}
if(enough_free(alloc)) {
uint64_t nsize = alloc->disk->nextgrow;
udb_base_shrink(alloc->udb, nsize);
if(!udb_base_remap(alloc->udb, alloc, nsize))
return 0;
}
return 1;
}
int
udb_compact(udb_base* udb)
{
if(!udb) return 1;
if(!udb->useful_compact) return 1;
DEBUG(DEBUG_DBACCESS, 1, (LOG_INFO, "Compacting database..."));
return udb_alloc_compact(udb->base, udb->alloc);
}
void udb_compact_inhibited(udb_base* udb, int inhibit)
{
if(!udb) return;
udb->inhibit_compact = inhibit;
}
#ifdef UDB_CHECK
void udb_check_rptr_zero(void* base, udb_rel_ptr* p, void* arg)
{
(void)base;
(void)arg;
assert(p->data == 0);
}
#endif
static void
udb_free_xl(void* base, udb_alloc* alloc, udb_void f, udb_xl_chunk_d* fp,
size_t sz)
{
uint64_t xlsz = fp->size;
uint64_t c;
assert(*((uint64_t*)(UDB_REL(base, f+xlsz-sizeof(uint64_t)*2)))==xlsz);
assert(*((uint8_t*)(UDB_REL(base, f+xlsz-1))) == UDB_EXP_XL);
assert( (xlsz & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 );
assert( (f & (UDB_ALLOC_CHUNK_SIZE-1)) == 0 );
#ifdef UDB_CHECK
(*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
UDB_REL(base, f+sizeof(udb_xl_chunk_d)), xlsz,
&udb_check_rptr_zero, NULL);
#endif
alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
alloc->disk->stat_data -= sz;
alloc->disk->stat_alloc -= xlsz;
alloc->disk->stat_free += xlsz;
c = f + xlsz - UDB_ALLOC_CHUNK_SIZE;
assert(f >= UDB_ALLOC_CHUNK_SIZE);
while(c >= f) {
udb_alloc_push_fl(base, alloc, c, UDB_ALLOC_CHUNKS_MAX);
c -= UDB_ALLOC_CHUNK_SIZE;
}
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
}
int udb_alloc_free(udb_alloc* alloc, udb_void r, size_t sz)
{
void* base;
udb_void f;
udb_chunk_d* fp;
uint64_t esz;
int exp;
udb_void other;
int coagulated = 0;
if(!r)
return 1;
base = alloc->udb->base;
f = chunk_from_dataptr(r);
fp = UDB_CHUNK(f);
assert(fp->type != udb_chunk_type_free);
assert(fp->ptrlist == 0);
fp->ptrlist = 0;
if(fp->exp == UDB_EXP_XL) {
udb_free_xl(base, alloc, f, (udb_xl_chunk_d*)fp, sz);
if(alloc->udb->inhibit_compact) {
alloc->udb->useful_compact = 1;
return 1;
}
return udb_alloc_compact(base, alloc);
}
exp = (int)fp->exp;
esz = (uint64_t)1<<exp;
assert(sz < esz);
assert(chunk_get_last(base, f, exp) == (uint8_t)exp);
#ifdef UDB_CHECK
(*alloc->udb->walkfunc)(base, alloc->udb->walkarg, fp->type,
UDB_REL(base, r), esz, &udb_check_rptr_zero, NULL);
#endif
alloc->udb->glob_data->dirty_alloc = udb_dirty_fl;
alloc->disk->stat_data -= sz;
alloc->disk->stat_free += esz;
alloc->disk->stat_alloc -= esz;
while( (other=coagulate_possible(base, alloc, f, exp, esz)) ) {
coagulated = 1;
udb_alloc_unlink_fl(base, alloc, other, exp);
if(other < f)
f = other;
exp++;
esz <<= 1;
}
if(coagulated) {
udb_alloc_push_fl(base, alloc, f, exp);
} else {
fp->type = udb_chunk_type_free;
fp->flags = 0;
udb_alloc_push_fl_noinit(base, alloc, f, exp);
}
alloc->udb->glob_data->dirty_alloc = udb_dirty_clean;
if(alloc->udb->inhibit_compact) {
alloc->udb->useful_compact = 1;
return 1;
}
return udb_alloc_compact(base, alloc);
}
udb_void udb_alloc_init(udb_alloc* alloc, void* d, size_t sz)
{
udb_void r = udb_alloc_space(alloc, sz);
if(!r) return r;
memcpy(UDB_REL(alloc->udb->base, r), d, sz);
return r;
}
udb_void udb_alloc_realloc(udb_alloc* alloc, udb_void r, size_t osz, size_t sz)
{
void* base = alloc->udb->base;
udb_void c, n, newd;
udb_chunk_d* cp, *np;
uint64_t avail;
uint8_t cp_type;
if(r == 0)
return udb_alloc_space(alloc, sz);
if(sz == 0) {
if(!udb_alloc_free(alloc, r, osz))
log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
return 0;
}
c = chunk_from_dataptr(r);
cp = UDB_CHUNK(c);
cp_type = cp->type;
if(cp->exp == UDB_EXP_XL) {
avail = UDB_XL_CHUNK(c)->size - sizeof(udb_xl_chunk_d)
- sizeof(uint64_t)*2;
} else {
avail = ((uint64_t)1<<cp->exp) - sizeof(udb_chunk_d) - 1;
}
if(sz <= avail)
return r;
newd = udb_alloc_space(alloc, sz);
if(!newd) return 0;
base = alloc->udb->base;
cp = NULL;
n = chunk_from_dataptr(newd);
np = UDB_CHUNK(n);
np->type = cp_type;
memcpy(UDB_REL(base, newd), UDB_REL(base, r), osz);
chunk_fix_ptrs(base, alloc->udb, np, newd, osz, r);
if(!udb_alloc_free(alloc, r, osz))
log_msg(LOG_ERR, "udb_alloc_realloc: free failed");
return newd;
}
int udb_alloc_grow(udb_alloc* alloc, size_t sz, size_t num)
{
const uint64_t mb = 1024*1024;
int exp = udb_alloc_exp_needed(sz);
uint64_t esz;
uint64_t want;
if(exp == UDB_EXP_XL)
esz = (sz&(mb-1))+mb;
else esz = (uint64_t)1<<exp;
want = grow_end_calc(alloc, exp) + esz*(num-1);
assert(want >= alloc->udb->base_size);
if(!udb_base_grow_and_remap(alloc->udb, want)) {
log_msg(LOG_ERR, "failed to grow the specified amount");
return 0;
}
return 1;
}
void udb_alloc_set_type(udb_alloc* alloc, udb_void r, udb_chunk_type tp)
{
void* base = alloc->udb->base;
udb_void f = chunk_from_dataptr(r);
udb_chunk_d* fp = UDB_CHUNK(f);
assert(fp->type != udb_chunk_type_free);
assert(tp != udb_chunk_type_free);
fp->type = tp;
}
int udb_valid_offset(udb_base* udb, udb_void to, size_t destsize)
{
return ( (to+destsize) <= udb->base_size &&
to >= (udb->glob_data->hsize-2*sizeof(udb_rel_ptr)) &&
(to+destsize) <= udb->alloc->disk->nextgrow);
}
int udb_valid_dataptr(udb_base* udb, udb_void to)
{
void* base = udb->base;
udb_void ch;
int exp;
uint64_t esz;
if(!udb_valid_offset(udb, to, sizeof(uint64_t)))
return 0;
ch = chunk_from_dataptr(to);
if(!udb_valid_offset(udb, ch, sizeof(udb_chunk_d)))
return 0;
exp = UDB_CHUNK(ch)->exp;
if(exp == UDB_EXP_XL) {
uint64_t xlsz;
if(!udb_valid_offset(udb, ch, sizeof(udb_xl_chunk_d)))
return 0;
xlsz = UDB_XL_CHUNK(ch)->size;
if(!udb_valid_offset(udb, ch+xlsz-1, 1))
return 0;
if(*((uint8_t*)UDB_REL(base, ch+xlsz-1)) != UDB_EXP_XL)
return 0;
if(*((uint64_t*)UDB_REL(base, ch+xlsz-sizeof(uint64_t)*2))
!= xlsz)
return 0;
return 1;
}
if(exp < UDB_ALLOC_CHUNK_MINEXP || exp > UDB_ALLOC_CHUNKS_MAX)
return 0;
esz = 1<<exp;
if(!udb_valid_offset(udb, ch+esz-1, 1))
return 0;
if(*((uint8_t*)UDB_REL(base, ch+esz-1)) != exp)
return 0;
return 1;
}
int udb_valid_rptr(udb_base* udb, udb_void rptr, udb_void to)
{
void* base = udb->base;
udb_void p;
if(!udb_valid_offset(udb, rptr, sizeof(udb_rel_ptr)))
return 0;
if(!udb_valid_dataptr(udb, to))
return 0;
p = UDB_CHUNK(chunk_from_dataptr(to))->ptrlist;
while(p) {
if(!udb_valid_offset(udb, p, sizeof(udb_rel_ptr)))
return 0;
if(p == rptr)
return 1;
p = UDB_REL_PTR(p)->next;
}
return 0;
}
void udb_rel_ptr_init(udb_rel_ptr* ptr)
{
memset(ptr, 0, sizeof(*ptr));
}
void udb_rel_ptr_unlink(void* base, udb_rel_ptr* ptr)
{
if(!ptr->data)
return;
if(ptr->prev) {
UDB_REL_PTR(ptr->prev)->next = ptr->next;
} else {
UDB_CHUNK(chunk_from_dataptr(ptr->data))->ptrlist = ptr->next;
}
if(ptr->next) {
UDB_REL_PTR(ptr->next)->prev = ptr->prev;
}
}
void udb_rel_ptr_link(void* base, udb_rel_ptr* ptr, udb_void to)
{
udb_chunk_d* chunk = UDB_CHUNK(chunk_from_dataptr(to));
ptr->prev = 0;
ptr->next = chunk->ptrlist;
if(ptr->next)
UDB_REL_PTR(ptr->next)->prev = UDB_SYSTOREL(base, ptr);
chunk->ptrlist = UDB_SYSTOREL(base, ptr);
ptr->data = to;
}
void udb_rel_ptr_set(void* base, udb_rel_ptr* ptr, udb_void to)
{
assert(to == 0 || to > 64);
udb_rel_ptr_unlink(base, ptr);
if(to)
udb_rel_ptr_link(base, ptr, to);
else ptr->data = to;
}
void udb_rel_ptr_edit(void* base, udb_void list, udb_void to)
{
udb_void p = list;
while(p) {
UDB_REL_PTR(p)->data = to;
p = UDB_REL_PTR(p)->next;
}
}
#ifdef UDB_CHECK
static void
udb_check_ptrs_valid(udb_base* udb)
{
size_t i;
udb_ptr* p, *prev;
for(i=0; i<udb->ram_size; i++) {
prev = NULL;
for(p=udb->ram_hash[i]; p; p=p->next) {
assert(p->prev == prev);
assert((size_t)(chunk_hash_ptr(p->data)&udb->ram_mask)
== i);
assert(p->base == &udb->base);
prev = p;
}
}
}
#endif
void udb_ptr_init(udb_ptr* ptr, udb_base* udb)
{
#ifdef UDB_CHECK
udb_check_ptrs_valid(udb);
#endif
memset(ptr, 0, sizeof(*ptr));
ptr->base = &udb->base;
}
void udb_ptr_set(udb_ptr* ptr, udb_base* udb, udb_void newval)
{
assert(newval == 0 || newval > 64);
if(ptr->data)
udb_base_unlink_ptr(udb, ptr);
ptr->data = newval;
if(newval)
udb_base_link_ptr(udb, ptr);
}
int udb_ptr_alloc_space(udb_ptr* ptr, udb_base* udb, udb_chunk_type type,
size_t sz)
{
udb_void r;
r = udb_alloc_space(udb->alloc, sz);
if(!r) return 0;
udb_alloc_set_type(udb->alloc, r, type);
udb_ptr_init(ptr, udb);
udb_ptr_set(ptr, udb, r);
return 1;
}
void udb_ptr_free_space(udb_ptr* ptr, udb_base* udb, size_t sz)
{
if(ptr->data) {
udb_void d = ptr->data;
udb_ptr_set(ptr, udb, 0);
udb_alloc_free(udb->alloc, d, sz);
}
}
udb_chunk_type udb_ptr_get_type(udb_ptr* ptr)
{
udb_void f;
if(!ptr || ptr->data == 0) return udb_chunk_type_internal;
f = chunk_from_dataptr(ptr->data);
return ((udb_chunk_d*)UDB_REL(*ptr->base, f))->type;
}