#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)hash_page.c 10.55 (Sleepycat) 1/3/99";
#endif
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "hash.h"
static int __ham_lock_bucket __P((DBC *, db_lockmode_t));
#ifdef DEBUG_SLOW
static void __account_page(DB *, db_pgno_t, int);
#endif
int
__ham_item(dbc, mode)
DBC *dbc;
db_lockmode_t mode;
{
DB *dbp;
HASH_CURSOR *hcp;
db_pgno_t next_pgno;
int ret;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(hcp, H_DELETED))
return (EINVAL);
F_CLR(hcp, H_OK | H_NOMORE);
if ((ret = __ham_get_cpage(dbc, mode)) != 0)
return (ret);
if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID
&& hcp->seek_size < P_FREESPACE(hcp->pagep))
hcp->seek_found_page = hcp->pgno;
if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno == PGNO_INVALID)
memcpy(&hcp->dup_len,
HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)) +
hcp->dup_off, sizeof(db_indx_t));
else if (F_ISSET(hcp, H_ISDUP)) {
if (hcp->dpagep == NULL && (ret = __ham_get_page(dbp,
hcp->dpgno, &hcp->dpagep)) != 0)
return (ret);
if (hcp->dndx >= NUM_ENT(hcp->dpagep)) {
if (NEXT_PGNO(hcp->dpagep) == PGNO_INVALID) {
if (F_ISSET(hcp, H_DUPONLY)) {
F_CLR(hcp, H_OK);
F_SET(hcp, H_NOMORE);
return (0);
}
if ((ret = __ham_put_page(dbp,
hcp->dpagep, 0)) != 0)
return (ret);
F_CLR(hcp, H_ISDUP);
hcp->dpagep = NULL;
hcp->dpgno = PGNO_INVALID;
hcp->dndx = NDX_INVALID;
hcp->bndx++;
} else if ((ret = __ham_next_cpage(dbc,
NEXT_PGNO(hcp->dpagep), 0, H_ISDUP)) != 0)
return (ret);
}
}
if (hcp->bndx >= (db_indx_t)H_NUMPAIRS(hcp->pagep)) {
if (NEXT_PGNO(hcp->pagep) == PGNO_INVALID) {
F_SET(hcp, H_NOMORE);
if (hcp->dpagep != NULL &&
(ret = __ham_put_page(dbp, hcp->dpagep, 0)) != 0)
return (ret);
hcp->dpgno = PGNO_INVALID;
return (DB_NOTFOUND);
}
next_pgno = NEXT_PGNO(hcp->pagep);
hcp->bndx = 0;
if ((ret = __ham_next_cpage(dbc, next_pgno, 0, 0)) != 0)
return (ret);
}
F_SET(hcp, H_OK);
return (0);
}
int
__ham_item_reset(dbc)
DBC *dbc;
{
HASH_CURSOR *hcp;
DB *dbp;
int ret;
ret = 0;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
if (hcp->pagep != NULL)
ret = __ham_put_page(dbp, hcp->pagep, 0);
if (ret == 0 && hcp->dpagep != NULL)
ret = __ham_put_page(dbp, hcp->dpagep, 0);
__ham_item_init(hcp);
return (ret);
}
void
__ham_item_init(hcp)
HASH_CURSOR *hcp;
{
if (hcp->lock && hcp->dbc->txn == NULL)
(void)lock_put(hcp->dbc->dbp->dbenv->lk_info, hcp->lock);
hcp->bucket = BUCKET_INVALID;
hcp->lbucket = BUCKET_INVALID;
hcp->lock = 0;
hcp->pagep = NULL;
hcp->pgno = PGNO_INVALID;
hcp->bndx = NDX_INVALID;
hcp->dpagep = NULL;
hcp->dpgno = PGNO_INVALID;
hcp->dndx = NDX_INVALID;
hcp->dup_off = 0;
hcp->dup_len = 0;
hcp->dup_tlen = 0;
hcp->seek_size = 0;
hcp->seek_found_page = PGNO_INVALID;
hcp->flags = 0;
}
int
__ham_item_done(dbc, dirty)
DBC *dbc;
int dirty;
{
DB *dbp;
HASH_CURSOR *hcp;
int ret, t_ret;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
t_ret = ret = 0;
if (hcp->pagep)
ret = __ham_put_page(dbp, hcp->pagep,
dirty && hcp->dpagep == NULL);
hcp->pagep = NULL;
if (hcp->dpagep)
t_ret = __ham_put_page(dbp, hcp->dpagep, dirty);
hcp->dpagep = NULL;
if (ret == 0 && t_ret != 0)
ret = t_ret;
return (ret != 0 ? ret : t_ret);
}
int
__ham_item_last(dbc, mode)
DBC *dbc;
db_lockmode_t mode;
{
HASH_CURSOR *hcp;
int ret;
hcp = (HASH_CURSOR *)dbc->internal;
if ((ret = __ham_item_reset(dbc)) != 0)
return (ret);
hcp->bucket = hcp->hdr->max_bucket;
F_SET(hcp, H_OK);
return (__ham_item_prev(dbc, mode));
}
int
__ham_item_first(dbc, mode)
DBC *dbc;
db_lockmode_t mode;
{
HASH_CURSOR *hcp;
int ret;
hcp = (HASH_CURSOR *)dbc->internal;
if ((ret = __ham_item_reset(dbc)) != 0)
return (ret);
F_SET(hcp, H_OK);
hcp->bucket = 0;
return (__ham_item_next(dbc, mode));
}
int
__ham_item_prev(dbc, mode)
DBC *dbc;
db_lockmode_t mode;
{
DB *dbp;
HASH_CURSOR *hcp;
db_pgno_t next_pgno;
int ret;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
F_CLR(hcp, H_OK | H_NOMORE | H_DELETED);
if (F_ISSET(hcp, H_ISDUP)) {
if (hcp->dpgno == PGNO_INVALID) {
if (hcp->dup_off != 0)
if ((ret = __ham_get_cpage(dbc, mode)) != 0)
return (ret);
else {
HASH_CURSOR *h;
h = hcp;
memcpy(&h->dup_len, HKEYDATA_DATA(
H_PAIRDATA(h->pagep, h->bndx))
+ h->dup_off - sizeof(db_indx_t),
sizeof(db_indx_t));
hcp->dup_off -=
DUP_SIZE(hcp->dup_len);
hcp->dndx--;
return (__ham_item(dbc, mode));
}
} else if (hcp->dndx > 0) {
hcp->dndx--;
return (__ham_item(dbc, mode));
} else if ((ret = __ham_get_cpage(dbc, mode)) != 0)
return (ret);
else if (PREV_PGNO(hcp->dpagep) == PGNO_INVALID) {
if (F_ISSET(hcp, H_DUPONLY)) {
F_CLR(hcp, H_OK);
F_SET(hcp, H_NOMORE);
return (0);
} else {
F_CLR(hcp, H_ISDUP);
hcp->dpgno = PGNO_INVALID;
if (hcp->dpagep != NULL)
(void)__ham_put_page(dbp,
hcp->dpagep, 0);
hcp->dpagep = NULL;
}
} else if ((ret = __ham_next_cpage(dbc,
PREV_PGNO(hcp->dpagep), 0, H_ISDUP)) != 0)
return (ret);
else {
hcp->dndx = NUM_ENT(hcp->pagep) - 1;
return (__ham_item(dbc, mode));
}
}
if (F_ISSET(hcp, H_DUPONLY)) {
F_CLR(hcp, H_OK);
F_SET(hcp, H_NOMORE);
return (0);
}
if (hcp->bndx == 0) {
if ((ret = __ham_get_cpage(dbc, mode)) != 0)
return (ret);
hcp->pgno = PREV_PGNO(hcp->pagep);
if (hcp->pgno == PGNO_INVALID) {
F_SET(hcp, H_NOMORE);
return (DB_NOTFOUND);
} else if ((ret =
__ham_next_cpage(dbc, hcp->pgno, 0, 0)) != 0)
return (ret);
else
hcp->bndx = H_NUMPAIRS(hcp->pagep);
}
if (hcp->bndx == NDX_INVALID) {
if (hcp->pagep == NULL)
next_pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
else
goto got_page;
do {
if ((ret = __ham_next_cpage(dbc, next_pgno, 0, 0)) != 0)
return (ret);
got_page: next_pgno = NEXT_PGNO(hcp->pagep);
hcp->bndx = H_NUMPAIRS(hcp->pagep);
} while (next_pgno != PGNO_INVALID);
if (hcp->bndx == 0) {
F_SET(hcp, H_NOMORE);
return (DB_NOTFOUND);
}
}
hcp->bndx--;
return (__ham_item(dbc, mode));
}
int
__ham_item_next(dbc, mode)
DBC *dbc;
db_lockmode_t mode;
{
HASH_CURSOR *hcp;
hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(hcp, H_DELETED)) {
if (hcp->bndx != NDX_INVALID &&
F_ISSET(hcp, H_ISDUP) &&
hcp->dpgno == PGNO_INVALID &&
hcp->dup_tlen == hcp->dup_off) {
if (F_ISSET(hcp, H_DUPONLY)) {
F_CLR(hcp, H_OK);
F_SET(hcp, H_NOMORE);
return (0);
} else {
F_CLR(hcp, H_ISDUP);
hcp->dpgno = PGNO_INVALID;
hcp->bndx++;
}
} else if (!F_ISSET(hcp, H_ISDUP) &&
F_ISSET(hcp, H_DUPONLY)) {
F_CLR(hcp, H_OK);
F_SET(hcp, H_NOMORE);
return (0);
}
F_CLR(hcp, H_DELETED);
} else if (hcp->bndx == NDX_INVALID) {
hcp->bndx = 0;
hcp->dpgno = PGNO_INVALID;
F_CLR(hcp, H_ISDUP);
} else if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID)
hcp->dndx++;
else if (F_ISSET(hcp, H_ISDUP)) {
if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >=
hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) {
F_CLR(hcp, H_OK);
F_SET(hcp, H_NOMORE);
return (0);
}
hcp->dndx++;
hcp->dup_off += DUP_SIZE(hcp->dup_len);
if (hcp->dup_off >= hcp->dup_tlen) {
F_CLR(hcp, H_ISDUP);
hcp->dpgno = PGNO_INVALID;
hcp->bndx++;
}
} else if (F_ISSET(hcp, H_DUPONLY)) {
F_CLR(hcp, H_OK);
F_SET(hcp, H_NOMORE);
return (0);
} else
hcp->bndx++;
return (__ham_item(dbc, mode));
}
void
__ham_putitem(p, dbt, type)
PAGE *p;
const DBT *dbt;
int type;
{
u_int16_t n, off;
n = NUM_ENT(p);
if (type == H_OFFPAGE) {
off = HOFFSET(p) - dbt->size;
HOFFSET(p) = p->inp[n] = off;
memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
} else {
off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
HOFFSET(p) = p->inp[n] = off;
PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type);
}
NUM_ENT(p) += 1;
}
void
__ham_reputpair(p, psize, ndx, key, data)
PAGE *p;
u_int32_t psize, ndx;
const DBT *key, *data;
{
db_indx_t i, movebytes, newbytes;
u_int8_t *from;
movebytes =
(ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - HOFFSET(p);
newbytes = key->size + data->size;
from = (u_int8_t *)p + HOFFSET(p);
memmove(from - newbytes, from, movebytes);
for (i = NUM_ENT(p) - 1; ; i-- ) {
p->inp[i + 2] = p->inp[i] - newbytes;
if (i == H_KEYINDEX(ndx))
break;
}
p->inp[H_KEYINDEX(ndx)] =
(ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - key->size;
p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size;
memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size);
memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size);
HOFFSET(p) -= newbytes;
NUM_ENT(p) += 2;
}
int
__ham_del_pair(dbc, reclaim_page)
DBC *dbc;
int reclaim_page;
{
DB *dbp;
HASH_CURSOR *hcp;
DBT data_dbt, key_dbt;
DB_ENV *dbenv;
DB_LSN new_lsn, *n_lsn, tmp_lsn;
PAGE *p;
db_indx_t ndx;
db_pgno_t chg_pgno, pgno;
int ret, tret;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
dbenv = dbp->dbenv;
ndx = hcp->bndx;
if (hcp->pagep == NULL &&
(ret = __ham_get_page(dbp, hcp->pgno, &hcp->pagep)) != 0)
return (ret);
p = hcp->pagep;
ret = 0;
if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) {
memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))),
sizeof(db_pgno_t));
ret = __db_doff(dbc, pgno, __ham_del_page);
}
if (ret == 0)
switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) {
case H_OFFPAGE:
memcpy(&pgno,
HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
sizeof(db_pgno_t));
ret = __db_doff(dbc, pgno, __ham_del_page);
break;
case H_OFFDUP:
memcpy(&pgno,
HOFFDUP_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
sizeof(db_pgno_t));
ret = __db_ddup(dbc, pgno, __ham_del_page);
F_CLR(hcp, H_ISDUP);
break;
case H_DUPLICATE:
F_CLR(hcp, H_ISDUP);
break;
}
if (ret)
return (ret);
if (DB_LOGGING(dbc)) {
key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
key_dbt.size =
LEN_HITEM(p, hcp->hdr->pagesize, H_KEYINDEX(ndx));
data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
data_dbt.size =
LEN_HITEM(p, hcp->hdr->pagesize, H_DATAINDEX(ndx));
if ((ret = __ham_insdel_log(dbenv->lg_info,
dbc->txn, &new_lsn, 0, DELPAIR,
dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
&LSN(p), &key_dbt, &data_dbt)) != 0)
return (ret);
LSN(p) = new_lsn;
}
__ham_dpair(dbp, p, ndx);
if (!F_ISSET(dbp, DB_AM_LOCKING))
--hcp->hdr->nelem;
if (reclaim_page && NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
NEXT_PGNO(p) != PGNO_INVALID) {
PAGE *n_pagep, *nn_pagep;
db_pgno_t tmp_pgno;
if ((ret =
__ham_get_page(dbp, NEXT_PGNO(p), &n_pagep)) != 0)
return (ret);
if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
if ((ret =
__ham_get_page(dbp, NEXT_PGNO(n_pagep),
&nn_pagep)) != 0) {
(void) __ham_put_page(dbp, n_pagep, 0);
return (ret);
}
}
if (DB_LOGGING(dbc)) {
key_dbt.data = n_pagep;
key_dbt.size = hcp->hdr->pagesize;
if ((ret = __ham_copypage_log(dbenv->lg_info,
dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(p),
&LSN(p), PGNO(n_pagep), &LSN(n_pagep),
NEXT_PGNO(n_pagep),
NEXT_PGNO(n_pagep) == PGNO_INVALID ? NULL :
&LSN(nn_pagep), &key_dbt)) != 0)
return (ret);
LSN(p) = new_lsn;
LSN(n_pagep) = new_lsn;
if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
LSN(nn_pagep) = new_lsn;
}
if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
PREV_PGNO(nn_pagep) = PGNO(p);
(void)__ham_put_page(dbp, nn_pagep, 1);
}
tmp_pgno = PGNO(p);
tmp_lsn = LSN(p);
memcpy(p, n_pagep, hcp->hdr->pagesize);
PGNO(p) = tmp_pgno;
LSN(p) = tmp_lsn;
PREV_PGNO(p) = PGNO_INVALID;
hcp->bndx = 0;
hcp->pgno = PGNO(p);
F_SET(hcp, H_DELETED);
chg_pgno = PGNO(p);
if ((ret = __ham_dirty_page(dbp, p)) != 0 ||
(ret = __ham_del_page(dbc, n_pagep)) != 0)
return (ret);
} else if (reclaim_page &&
NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
PAGE *n_pagep, *p_pagep;
if ((ret =
__ham_get_page(dbp, PREV_PGNO(p), &p_pagep)) != 0)
return (ret);
if (NEXT_PGNO(p) != PGNO_INVALID) {
if ((ret = __ham_get_page(dbp,
NEXT_PGNO(p), &n_pagep)) != 0) {
(void)__ham_put_page(dbp, p_pagep, 0);
return (ret);
}
n_lsn = &LSN(n_pagep);
} else {
n_pagep = NULL;
n_lsn = NULL;
}
NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
if (n_pagep != NULL)
PREV_PGNO(n_pagep) = PGNO(p_pagep);
if (DB_LOGGING(dbc)) {
if ((ret = __ham_newpage_log(dbenv->lg_info,
dbc->txn, &new_lsn, 0, DELOVFL,
dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
return (ret);
LSN(p_pagep) = new_lsn;
if (n_pagep)
LSN(n_pagep) = new_lsn;
LSN(p) = new_lsn;
}
hcp->pgno = NEXT_PGNO(p);
hcp->bndx = 0;
hcp->pagep = NULL;
chg_pgno = PGNO(p);
ret = __ham_del_page(dbc, p);
if ((tret = __ham_put_page(dbp, p_pagep, 1)) != 0 &&
ret == 0)
ret = tret;
if (n_pagep != NULL &&
(tret = __ham_put_page(dbp, n_pagep, 1)) != 0 &&
ret == 0)
ret = tret;
if (ret != 0)
return (ret);
} else {
F_SET(hcp, H_DELETED);
chg_pgno = hcp->pgno;
ret = __ham_dirty_page(dbp, p);
}
__ham_c_update(hcp, chg_pgno, 0, 0, 0);
hcp->dpgno = PGNO_INVALID;
F_CLR(hcp, H_OK);
return (ret);
}
int
__ham_replpair(dbc, dbt, make_dup)
DBC *dbc;
DBT *dbt;
u_int32_t make_dup;
{
DB *dbp;
HASH_CURSOR *hcp;
DBT old_dbt, tdata, tmp;
DB_LSN new_lsn;
int32_t change;
u_int32_t len;
int is_big, ret, type;
u_int8_t *beg, *dest, *end, *hk, *src;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
change = dbt->size - dbt->dlen;
hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
if (is_big)
memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
else
len = LEN_HKEYDATA(hcp->pagep,
dbp->pgsize, H_DATAINDEX(hcp->bndx));
if (dbt->doff + dbt->dlen > len)
change += dbt->doff + dbt->dlen - len;
if (change > (int32_t)P_FREESPACE(hcp->pagep) || is_big) {
tmp.flags = 0;
F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL);
if ((ret =
__db_ret(dbp, hcp->pagep, H_KEYINDEX(hcp->bndx),
&tmp, &dbc->rkey.data, &dbc->rkey.size)) != 0)
return (ret);
if (dbt->doff == 0 && dbt->dlen == len) {
ret = __ham_del_pair(dbc, 0);
if (ret == 0)
ret = __ham_add_el(dbc, &tmp, dbt, H_KEYDATA);
} else {
type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
HPAGE_PTYPE(hk) : H_KEYDATA;
tdata.flags = 0;
F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL);
if ((ret = __db_ret(dbp, hcp->pagep,
H_DATAINDEX(hcp->bndx), &tdata, &dbc->rdata.data,
&dbc->rdata.size)) != 0)
goto err;
if ((ret = __ham_del_pair(dbc, 0)) != 0) {
__os_free(tdata.data, tdata.size);
goto err;
}
if (change > 0) {
if ((ret = __os_realloc(&tdata.data,
tdata.size + change)) != 0)
return (ret);
memset((u_int8_t *)tdata.data + tdata.size,
0, change);
}
end = (u_int8_t *)tdata.data + tdata.size;
src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
if (src < end && tdata.size > dbt->doff + dbt->dlen) {
len = tdata.size - dbt->doff - dbt->dlen;
dest = src + change;
memmove(dest, src, len);
}
memcpy((u_int8_t *)tdata.data + dbt->doff,
dbt->data, dbt->size);
tdata.size += change;
ret = __ham_add_el(dbc, &tmp, &tdata, type);
__os_free(tdata.data, tdata.size);
}
err: __os_free(tmp.data, tmp.size);
return (ret);
}
beg = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
beg += dbt->doff;
if (DB_LOGGING(dbc)) {
old_dbt.data = beg;
old_dbt.size = dbt->dlen;
if ((ret = __ham_replace_log(dbp->dbenv->lg_info,
dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(hcp->pagep),
(u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep),
(u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
return (ret);
LSN(hcp->pagep) = new_lsn;
}
__ham_onpage_replace(hcp->pagep, dbp->pgsize,
(u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt);
return (0);
}
void
__ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
PAGE *pagep;
size_t pgsize;
u_int32_t ndx;
int32_t off;
int32_t change;
DBT *dbt;
{
db_indx_t i;
int32_t len;
u_int8_t *src, *dest;
int zero_me;
if (change != 0) {
zero_me = 0;
src = (u_int8_t *)(pagep) + HOFFSET(pagep);
if (off < 0)
len = pagep->inp[ndx] - HOFFSET(pagep);
else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) +
LEN_HKEYDATA(pagep, pgsize, ndx) - src;
zero_me = 1;
} else
len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src;
dest = src - change;
memmove(dest, src, len);
if (zero_me)
memset(dest + len, 0, change);
for (i = ndx; i < NUM_ENT(pagep); i++)
pagep->inp[i] -= change;
HOFFSET(pagep) -= change;
}
if (off >= 0)
memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off,
dbt->data, dbt->size);
else
memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
}
int
__ham_split_page(dbc, obucket, nbucket)
DBC *dbc;
u_int32_t obucket, nbucket;
{
DB *dbp;
HASH_CURSOR *hcp;
DBT key, page_dbt;
DB_ENV *dbenv;
DB_LSN new_lsn;
PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
db_indx_t n;
db_pgno_t bucket_pgno, next_pgno;
u_int32_t big_len, len;
int ret, tret;
void *big_buf;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
dbenv = dbp->dbenv;
temp_pagep = old_pagep = new_pagep = NULL;
bucket_pgno = BUCKET_TO_PAGE(hcp, obucket);
if ((ret = __ham_get_page(dbp, bucket_pgno, &old_pagep)) != 0)
return (ret);
if ((ret = __ham_new_page(dbp, BUCKET_TO_PAGE(hcp, nbucket), P_HASH,
&new_pagep)) != 0)
goto err;
temp_pagep = hcp->split_buf;
memcpy(temp_pagep, old_pagep, hcp->hdr->pagesize);
if (DB_LOGGING(dbc)) {
page_dbt.size = hcp->hdr->pagesize;
page_dbt.data = old_pagep;
if ((ret = __ham_splitdata_log(dbenv->lg_info,
dbc->txn, &new_lsn, 0, dbp->log_fileid, SPLITOLD,
PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0)
goto err;
}
P_INIT(old_pagep, hcp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID,
PGNO_INVALID, 0, P_HASH);
if (DB_LOGGING(dbc))
LSN(old_pagep) = new_lsn;
big_len = 0;
big_buf = NULL;
key.flags = 0;
while (temp_pagep != NULL) {
for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) {
if ((ret =
__db_ret(dbp, temp_pagep, H_KEYINDEX(n),
&key, &big_buf, &big_len)) != 0)
goto err;
if (__ham_call_hash(hcp, key.data, key.size)
== obucket)
pp = &old_pagep;
else
pp = &new_pagep;
len = LEN_HITEM(temp_pagep, hcp->hdr->pagesize,
H_DATAINDEX(n)) +
LEN_HITEM(temp_pagep, hcp->hdr->pagesize,
H_KEYINDEX(n)) +
2 * sizeof(db_indx_t);
if (P_FREESPACE(*pp) < len) {
if (DB_LOGGING(dbc)) {
page_dbt.size = hcp->hdr->pagesize;
page_dbt.data = *pp;
if ((ret = __ham_splitdata_log(
dbenv->lg_info, dbc->txn,
&new_lsn, 0, dbp->log_fileid,
SPLITNEW, PGNO(*pp), &page_dbt,
&LSN(*pp))) != 0)
goto err;
LSN(*pp) = new_lsn;
}
if ((ret =
__ham_add_ovflpage(dbc, *pp, 1, pp)) != 0)
goto err;
}
__ham_copy_item(dbp->pgsize,
temp_pagep, H_KEYINDEX(n), *pp);
__ham_copy_item(dbp->pgsize,
temp_pagep, H_DATAINDEX(n), *pp);
}
next_pgno = NEXT_PGNO(temp_pagep);
if (PGNO(temp_pagep) != bucket_pgno && (ret =
__ham_del_page(dbc, temp_pagep)) != 0)
goto err;
if (next_pgno == PGNO_INVALID)
temp_pagep = NULL;
else if ((ret =
__ham_get_page(dbp, next_pgno, &temp_pagep)) != 0)
goto err;
if (temp_pagep != NULL && DB_LOGGING(dbc)) {
page_dbt.size = hcp->hdr->pagesize;
page_dbt.data = temp_pagep;
if ((ret = __ham_splitdata_log(dbenv->lg_info,
dbc->txn, &new_lsn, 0, dbp->log_fileid,
SPLITOLD, PGNO(temp_pagep),
&page_dbt, &LSN(temp_pagep))) != 0)
goto err;
LSN(temp_pagep) = new_lsn;
}
}
if (big_buf != NULL)
__os_free(big_buf, big_len);
if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
(ret = __ham_del_page(dbc, temp_pagep)) != 0)
goto err;
if (DB_LOGGING(dbc)) {
page_dbt.size = hcp->hdr->pagesize;
page_dbt.data = old_pagep;
if ((ret = __ham_splitdata_log(dbenv->lg_info,
dbc->txn, &new_lsn, 0, dbp->log_fileid,
SPLITNEW, PGNO(old_pagep),
&page_dbt, &LSN(old_pagep))) != 0)
goto err;
LSN(old_pagep) = new_lsn;
page_dbt.data = new_pagep;
if ((ret = __ham_splitdata_log(dbenv->lg_info,
dbc->txn, &new_lsn, 0, dbp->log_fileid,
SPLITNEW, PGNO(new_pagep), &page_dbt, &LSN(new_pagep))) != 0)
goto err;
LSN(new_pagep) = new_lsn;
}
ret = __ham_put_page(dbp, old_pagep, 1);
if ((tret = __ham_put_page(dbp, new_pagep, 1)) != 0 &&
ret == 0)
ret = tret;
if (0) {
err: if (old_pagep != NULL)
(void)__ham_put_page(dbp, old_pagep, 1);
if (new_pagep != NULL)
(void)__ham_put_page(dbp, new_pagep, 1);
if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
(void)__ham_put_page(dbp, temp_pagep, 1);
}
return (ret);
}
int
__ham_add_el(dbc, key, val, type)
DBC *dbc;
const DBT *key, *val;
int type;
{
DB *dbp;
HASH_CURSOR *hcp;
const DBT *pkey, *pdata;
DBT key_dbt, data_dbt;
DB_LSN new_lsn;
HOFFPAGE doff, koff;
db_pgno_t next_pgno;
u_int32_t data_size, key_size, pairsize, rectype;
int do_expand, is_keybig, is_databig, ret;
int key_type, data_type;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
do_expand = 0;
if (hcp->pagep == NULL && (ret = __ham_get_page(dbp,
hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page :
hcp->pgno, &hcp->pagep)) != 0)
return (ret);
key_size = HKEYDATA_PSIZE(key->size);
data_size = HKEYDATA_PSIZE(val->size);
is_keybig = ISBIG(hcp, key->size);
is_databig = ISBIG(hcp, val->size);
if (is_keybig)
key_size = HOFFPAGE_PSIZE;
if (is_databig)
data_size = HOFFPAGE_PSIZE;
pairsize = key_size + data_size;
while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) !=
PGNO_INVALID) {
if (P_FREESPACE(hcp->pagep) >= pairsize)
break;
next_pgno = NEXT_PGNO(hcp->pagep);
if ((ret =
__ham_next_cpage(dbc, next_pgno, 0, 0)) != 0)
return (ret);
}
if (P_FREESPACE(hcp->pagep) < pairsize) {
do_expand = 1;
if ((ret = __ham_add_ovflpage(dbc,
hcp->pagep, 1, &hcp->pagep)) != 0)
return (ret);
hcp->pgno = PGNO(hcp->pagep);
}
hcp->bndx = H_NUMPAIRS(hcp->pagep);
F_CLR(hcp, H_DELETED);
if (is_keybig) {
koff.type = H_OFFPAGE;
UMRW(koff.unused[0]);
UMRW(koff.unused[1]);
UMRW(koff.unused[2]);
if ((ret = __db_poff(dbc,
key, &koff.pgno, __ham_overflow_page)) != 0)
return (ret);
koff.tlen = key->size;
key_dbt.data = &koff;
key_dbt.size = sizeof(koff);
pkey = &key_dbt;
key_type = H_OFFPAGE;
} else {
pkey = key;
key_type = H_KEYDATA;
}
if (is_databig) {
doff.type = H_OFFPAGE;
UMRW(doff.unused[0]);
UMRW(doff.unused[1]);
UMRW(doff.unused[2]);
if ((ret = __db_poff(dbc,
val, &doff.pgno, __ham_overflow_page)) != 0)
return (ret);
doff.tlen = val->size;
data_dbt.data = &doff;
data_dbt.size = sizeof(doff);
pdata = &data_dbt;
data_type = H_OFFPAGE;
} else {
pdata = val;
data_type = type;
}
if (DB_LOGGING(dbc)) {
rectype = PUTPAIR;
if (is_databig)
rectype |= PAIR_DATAMASK;
if (is_keybig)
rectype |= PAIR_KEYMASK;
if ((ret = __ham_insdel_log(dbp->dbenv->lg_info,
dbc->txn, &new_lsn, 0, rectype,
dbp->log_fileid, PGNO(hcp->pagep),
(u_int32_t)H_NUMPAIRS(hcp->pagep),
&LSN(hcp->pagep), pkey, pdata)) != 0)
return (ret);
LSN(hcp->pagep) = new_lsn;
}
__ham_putitem(hcp->pagep, pkey, key_type);
__ham_putitem(hcp->pagep, pdata, data_type);
hcp->pgno = PGNO(hcp->pagep);
if (!F_ISSET(dbp, DB_AM_LOCKING))
hcp->hdr->nelem++;
if (do_expand || (hcp->hdr->ffactor != 0 &&
(u_int32_t)H_NUMPAIRS(hcp->pagep) > hcp->hdr->ffactor))
F_SET(hcp, H_EXPAND);
return (0);
}
void
__ham_copy_item(pgsize, src_page, src_ndx, dest_page)
size_t pgsize;
PAGE *src_page;
u_int32_t src_ndx;
PAGE *dest_page;
{
u_int32_t len;
void *src, *dest;
src = P_ENTRY(src_page, src_ndx);
len = LEN_HITEM(src_page, pgsize, src_ndx);
HOFFSET(dest_page) -= len;
dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
NUM_ENT(dest_page)++;
memcpy(dest, src, len);
}
int
__ham_add_ovflpage(dbc, pagep, release, pp)
DBC *dbc;
PAGE *pagep;
int release;
PAGE **pp;
{
DB *dbp;
HASH_CURSOR *hcp;
DB_LSN new_lsn;
PAGE *new_pagep;
int ret;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
if ((ret = __ham_overflow_page(dbc, P_HASH, &new_pagep)) != 0)
return (ret);
if (DB_LOGGING(dbc)) {
if ((ret = __ham_newpage_log(dbp->dbenv->lg_info,
dbc->txn, &new_lsn, 0, PUTOVFL,
dbp->log_fileid, PGNO(pagep), &LSN(pagep),
PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
return (ret);
LSN(pagep) = LSN(new_pagep) = new_lsn;
}
NEXT_PGNO(pagep) = PGNO(new_pagep);
PREV_PGNO(new_pagep) = PGNO(pagep);
if (release)
ret = __ham_put_page(dbp, pagep, 1);
hcp->stats.hash_overflows++;
*pp = new_pagep;
return (ret);
}
int
__ham_new_page(dbp, addr, type, pp)
DB *dbp;
u_int32_t addr, type;
PAGE **pp;
{
PAGE *pagep;
int ret;
if ((ret = memp_fget(dbp->mpf,
&addr, DB_MPOOL_CREATE, &pagep)) != 0)
return (ret);
P_INIT(pagep, dbp->pgsize, addr, PGNO_INVALID, PGNO_INVALID, 0, type);
*pp = pagep;
return (0);
}
int
__ham_del_page(dbc, pagep)
DBC *dbc;
PAGE *pagep;
{
DB *dbp;
HASH_CURSOR *hcp;
DB_LSN new_lsn;
int ret;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
ret = 0;
DIRTY_META(dbp, hcp, ret);
if (ret != 0) {
if (ret != EAGAIN)
__db_err(dbp->dbenv,
"free_ovflpage: unable to lock meta data page %s\n",
strerror(ret));
(void)__ham_put_page(dbp, pagep, 0);
return (ret);
}
if (DB_LOGGING(dbc)) {
if ((ret = __ham_newpgno_log(dbp->dbenv->lg_info,
dbc->txn, &new_lsn, 0, DELPGNO,
dbp->log_fileid, PGNO(pagep), hcp->hdr->last_freed,
(u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID,
&LSN(pagep), &hcp->hdr->lsn)) != 0)
return (ret);
hcp->hdr->lsn = new_lsn;
LSN(pagep) = new_lsn;
}
#ifdef DIAGNOSTIC
{
db_pgno_t __pgno;
DB_LSN __lsn;
__pgno = pagep->pgno;
__lsn = pagep->lsn;
memset(pagep, 0xdb, dbp->pgsize);
pagep->pgno = __pgno;
pagep->lsn = __lsn;
}
#endif
TYPE(pagep) = P_INVALID;
NEXT_PGNO(pagep) = hcp->hdr->last_freed;
hcp->hdr->last_freed = PGNO(pagep);
return (__ham_put_page(dbp, pagep, 1));
}
int
__ham_put_page(dbp, pagep, is_dirty)
DB *dbp;
PAGE *pagep;
int32_t is_dirty;
{
#ifdef DEBUG_SLOW
__account_page(dbp, ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1);
#endif
return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0)));
}
int
__ham_dirty_page(dbp, pagep)
DB *dbp;
PAGE *pagep;
{
return (memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY));
}
int
__ham_get_page(dbp, addr, pagep)
DB *dbp;
db_pgno_t addr;
PAGE **pagep;
{
int ret;
ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep);
#ifdef DEBUG_SLOW
if (*pagep != NULL)
__account_page(dbp, addr, 1);
#endif
return (ret);
}
int
__ham_overflow_page(dbc, type, pp)
DBC *dbc;
u_int32_t type;
PAGE **pp;
{
DB *dbp;
HASH_CURSOR *hcp;
DB_LSN *lsnp, new_lsn;
PAGE *p;
db_pgno_t new_addr, next_free, newalloc_flag;
u_int32_t offset, splitnum;
int ret;
ret = 0;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
DIRTY_META(dbp, hcp, ret);
if (ret != 0)
return (ret);
new_addr = hcp->hdr->last_freed;
if (new_addr != PGNO_INVALID) {
if ((ret = __ham_get_page(dbp, new_addr, &p)) != 0)
return (ret);
next_free = NEXT_PGNO(p);
lsnp = &LSN(p);
newalloc_flag = 0;
} else {
splitnum = hcp->hdr->ovfl_point;
hcp->hdr->spares[splitnum]++;
offset = hcp->hdr->spares[splitnum] -
(splitnum ? hcp->hdr->spares[splitnum - 1] : 0);
new_addr = PGNO_OF(hcp, hcp->hdr->ovfl_point, offset);
if (new_addr > MAX_PAGES(hcp)) {
__db_err(dbp->dbenv, "hash: out of file pages");
hcp->hdr->spares[splitnum]--;
return (ENOMEM);
}
next_free = PGNO_INVALID;
p = NULL;
lsnp = NULL;
newalloc_flag = 1;
}
if (DB_LOGGING(dbc)) {
if ((ret = __ham_newpgno_log(dbp->dbenv->lg_info,
dbc->txn, &new_lsn, 0, ALLOCPGNO,
dbp->log_fileid, new_addr, next_free,
0, newalloc_flag, type, lsnp, &hcp->hdr->lsn)) != 0)
return (ret);
hcp->hdr->lsn = new_lsn;
if (lsnp != NULL)
*lsnp = new_lsn;
}
if (p != NULL) {
hcp->hdr->last_freed = next_free;
P_INIT(p, hcp->hdr->pagesize, PGNO(p), PGNO_INVALID,
PGNO_INVALID, 0, (u_int8_t)type);
} else {
if ((ret = __ham_new_page(dbp, new_addr, type, &p)) != 0)
return (ret);
}
if (DB_LOGGING(dbc))
LSN(p) = new_lsn;
*pp = p;
return (0);
}
#ifdef DEBUG
db_pgno_t
__bucket_to_page(hcp, n)
HASH_CURSOR *hcp;
db_pgno_t n;
{
int ret_val;
ret_val = n + 1;
if (n != 0)
ret_val += hcp->hdr->spares[__db_log2(n + 1) - 1];
return (ret_val);
}
#endif
void
__ham_init_ovflpages(dbc)
DBC *dbc;
{
DB *dbp;
HASH_CURSOR *hcp;
DB_LSN new_lsn;
PAGE *p;
db_pgno_t last_pgno, new_pgno;
u_int32_t i, curpages, numpages;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
curpages = hcp->hdr->spares[hcp->hdr->ovfl_point] -
hcp->hdr->spares[hcp->hdr->ovfl_point - 1];
numpages = hcp->hdr->ovfl_point + 1 - curpages;
last_pgno = hcp->hdr->last_freed;
new_pgno = PGNO_OF(hcp, hcp->hdr->ovfl_point, curpages + 1);
if (DB_LOGGING(dbc)) {
(void)__ham_ovfl_log(dbp->dbenv->lg_info,
dbc->txn, &new_lsn, 0, dbp->log_fileid, new_pgno,
numpages, last_pgno, hcp->hdr->ovfl_point, &hcp->hdr->lsn);
hcp->hdr->lsn = new_lsn;
} else
ZERO_LSN(new_lsn);
hcp->hdr->spares[hcp->hdr->ovfl_point] += numpages;
for (i = numpages; i > 0; i--) {
if (__ham_new_page(dbp,
PGNO_OF(hcp, hcp->hdr->ovfl_point, curpages + i),
P_INVALID, &p) != 0)
break;
LSN(p) = new_lsn;
NEXT_PGNO(p) = last_pgno;
last_pgno = PGNO(p);
(void)__ham_put_page(dbp, p, 1);
}
hcp->hdr->last_freed = last_pgno;
}
int
__ham_get_cpage(dbc, mode)
DBC *dbc;
db_lockmode_t mode;
{
DB *dbp;
HASH_CURSOR *hcp;
int ret;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
if (F_ISSET(dbp, DB_AM_LOCKING)) {
if (hcp->lock != 0 && hcp->lbucket != hcp->bucket) {
if (dbc->txn == NULL &&
!F_ISSET(hcp, H_ORIGINAL) && (ret =
lock_put(dbp->dbenv->lk_info, hcp->lock)) != 0)
return (ret);
F_CLR(hcp, H_ORIGINAL);
hcp->lock = 0;
}
if (hcp->lock == 0 && (ret = __ham_lock_bucket(dbc, mode)) != 0)
return (ret);
hcp->lbucket = hcp->bucket;
}
if (hcp->pagep == NULL) {
if (hcp->pgno == PGNO_INVALID) {
hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
hcp->bndx = 0;
}
if ((ret =
__ham_get_page(dbp, hcp->pgno, &hcp->pagep)) != 0)
return (ret);
}
if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL)
if ((ret =
__ham_get_page(dbp, hcp->dpgno, &hcp->dpagep)) != 0)
return (ret);
return (0);
}
int
__ham_next_cpage(dbc, pgno, dirty, flags)
DBC *dbc;
db_pgno_t pgno;
int dirty;
u_int32_t flags;
{
DB *dbp;
HASH_CURSOR *hcp;
PAGE *p;
int ret;
dbp = dbc->dbp;
hcp = (HASH_CURSOR *)dbc->internal;
if (LF_ISSET(H_ISDUP) && hcp->dpagep != NULL &&
(ret = __ham_put_page(dbp, hcp->dpagep, dirty)) != 0)
return (ret);
else if (!LF_ISSET(H_ISDUP) && hcp->pagep != NULL &&
(ret = __ham_put_page(dbp, hcp->pagep, dirty)) != 0)
return (ret);
if ((ret = __ham_get_page(dbp, pgno, &p)) != 0)
return (ret);
if (LF_ISSET(H_ISDUP)) {
hcp->dpagep = p;
hcp->dpgno = pgno;
hcp->dndx = 0;
} else {
hcp->pagep = p;
hcp->pgno = pgno;
hcp->bndx = 0;
}
return (0);
}
static int
__ham_lock_bucket(dbc, mode)
DBC *dbc;
db_lockmode_t mode;
{
HASH_CURSOR *hcp;
int ret;
hcp = (HASH_CURSOR *)dbc->internal;
dbc->lock.pgno = (db_pgno_t)(hcp->bucket);
if (dbc->txn == NULL)
ret = lock_get(dbc->dbp->dbenv->lk_info, dbc->locker, 0,
&dbc->lock_dbt, mode, &hcp->lock);
else
ret = lock_tget(dbc->dbp->dbenv->lk_info, dbc->txn, 0,
&dbc->lock_dbt, mode, &hcp->lock);
return (ret < 0 ? EAGAIN : ret);
}
void
__ham_dpair(dbp, p, pndx)
DB *dbp;
PAGE *p;
u_int32_t pndx;
{
db_indx_t delta, n;
u_int8_t *dest, *src;
delta = H_PAIRSIZE(p, dbp->pgsize, pndx);
if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) {
src = (u_int8_t *)p + HOFFSET(p);
dest = src + delta;
memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p));
}
for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) {
p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta;
p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta;
}
HOFFSET(p) = HOFFSET(p) + delta;
NUM_ENT(p) = NUM_ENT(p) - 2;
}