#include "WIndex.h"
#include <File.h>
#include <fs_attr.h>
#include <Message.h>
#include <Node.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#define IVERSION 1
static int32 kCRCTable = 0;
int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2);
void gen_crc_table();
unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr,
int data_blk_size);
FileEntry::FileEntry(void)
{
}
FileEntry::FileEntry(const char *entryString)
:
BString(entryString)
{
}
status_t
WIndex::SetTo(const char *dataPath, const char *indexPath)
{
BFile* dataFile;
BFile indexFile;
dataFile = new BFile();
if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) {
delete dataFile;
return B_ERROR;
} else {
bool buildIndex = true;
SetTo(dataFile);
time_t mtime;
time_t modified;
dataFile->GetModificationTime(&mtime);
if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) {
attr_info info;
if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) {
uint32 version = 0;
indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0,
&version, 4);
if (IVERSION == version) {
if ((indexFile.GetAttrInfo("WINDEX:modified", &info)
== B_OK)) {
indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0,
&modified, 4);
if (mtime == modified) {
if (UnflattenIndex(&indexFile) == B_OK)
buildIndex = false;
}
}
}
}
indexFile.Unset();
}
if (buildIndex) {
InitIndex();
BuildIndex();
if (indexFile.SetTo(indexPath,
B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) {
FlattenIndex(&indexFile);
indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0,
&mtime, 4);
uint32 version = IVERSION;
indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0,
&version, 4);
}
}
}
return B_OK;
}
FileEntry::~FileEntry(void)
{
}
WIndex::WIndex(int32 count)
{
fEntryList = NULL;
fDataFile = NULL;
fEntriesPerBlock = count;
fEntrySize = sizeof(WIndexEntry);
if (!atomic_or(&kCRCTable, 1))
gen_crc_table();
}
WIndex::WIndex(BPositionIO *dataFile, int32 count)
{
fEntryList = NULL;
fDataFile = dataFile;
fEntriesPerBlock = count;
fEntrySize = sizeof(WIndexEntry);
if (!atomic_or(&kCRCTable, 1))
gen_crc_table();
}
WIndex::~WIndex(void)
{
if (fEntryList)
free(fEntryList);
delete fDataFile;
}
status_t
WIndex::UnflattenIndex(BPositionIO *io)
{
if (fEntryList)
free(fEntryList);
WIndexHead head;
io->Seek(0, SEEK_SET);
io->Read(&head, sizeof(head));
io->Seek(head.offset, SEEK_SET);
fEntrySize = head.entrySize;
fEntries = head.entries;
fMaxEntries = fEntriesPerBlock;
fBlockSize = fEntriesPerBlock * fEntrySize;
fBlocks = fEntries / fEntriesPerBlock + 1;;
fIsSorted = true;
int32 size = (head.entries + 1) * head.entrySize;
if (!(fEntryList = (uint8 *)malloc(size)))
return B_ERROR;
if (fEntries)
io->Read(fEntryList, size);
return B_OK;
}
status_t
WIndex::FlattenIndex(BPositionIO *io)
{
if (fEntries && !fIsSorted)
SortItems();
WIndexHead head;
head.entries = fEntries;
head.entrySize = fEntrySize;
head.offset = sizeof(WIndexHead);
io->Seek(0, SEEK_SET);
io->Write(&head, sizeof(head));
if (fEntries)
io->Write(fEntryList, head.entries * head.entrySize);
return B_OK;
}
int32
WIndex::Lookup(int32 key)
{
if (!fEntries)
return -1;
if (!fIsSorted)
SortItems();
int32 M, Lb, Ub;
Lb = 0;
Ub = fEntries - 1;
while (true) {
M = (Lb + Ub) / 2;
if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
Ub = M - 1;
else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
Lb = M + 1;
else
return M;
if (Lb > Ub)
return -1;
}
}
status_t
WIndex::AddItem(WIndexEntry *entry)
{
if (_BlockCheck() == B_ERROR)
return B_ERROR;
memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry,
fEntrySize);
fEntries++;
fIsSorted = false;
return B_OK;
}
void
WIndex::SortItems(void)
{
qsort(fEntryList, fEntries, fEntrySize,
(int(*)(const void *, const void *))cmp_i_entries);
fIsSorted = true;
}
status_t
WIndex::_BlockCheck(void)
{
if (fEntries < fMaxEntries)
return B_OK;
fBlocks = fEntries / fEntriesPerBlock + 1;
uint8* tmpEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks);
if (!tmpEntryList) {
free(fEntryList);
fEntryList = NULL;
return B_ERROR;
} else {
fEntryList = tmpEntryList;
}
return B_OK;
}
status_t
WIndex::InitIndex(void)
{
if (fEntryList)
free(fEntryList);
fIsSorted = 0;
fEntries = 0;
fMaxEntries = fEntriesPerBlock;
fBlockSize = fEntriesPerBlock * fEntrySize;
fBlocks = 1;
fEntryList = (uint8 *)malloc(fBlockSize);
if (!fEntryList)
return B_ERROR;
return B_OK;
}
int32
WIndex::GetKey(const char *s)
{
int32 key = 0;
key = update_crc(0, s, strlen(s));
if (key < 0)
key = ~key;
return key;
}
int32
cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2)
{
return e1->key - e2->key;
}
status_t
WIndex::SetTo(BPositionIO *dataFile)
{
fDataFile = dataFile;
return B_OK;
}
void
WIndex::Unset(void)
{
fDataFile = NULL;
}
int32
WIndex::FindFirst(const char *word)
{
if (!fEntries)
return -1;
int32 index;
char nword[256];
int32 key;
NormalizeWord(word, nword);
key = GetKey(nword);
if ((index = Lookup(key)) < 0)
return -1;
while ((ItemAt(index - 1))->key == key)
index--;
return index;
}
FileEntry*
WIndex::GetEntry(int32 index)
{
if ((index >= fEntries)||(index < 0))
return NULL;
WIndexEntry *ientry;
FileEntry *dentry;
char *buffer;
dentry = new FileEntry();
ientry = ItemAt(index);
int32 size;
fDataFile->Seek(ientry->offset, SEEK_SET);
buffer = dentry->LockBuffer(256);
fDataFile->Read(buffer, 256);
size = _GetEntrySize(ientry, buffer);
dentry->UnlockBuffer(size);
return dentry;
}
size_t
WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData)
{
(void)entry;
return strcspn(entryData, "\n\r");
}
FileEntry*
WIndex::GetEntry(const char *word)
{
return GetEntry(FindFirst(word));
}
char*
WIndex::NormalizeWord(const char *word, char *dest)
{
const char *src;
char *dst;
src = word;
dst = dest;
while (*src) {
if (*src != '.')
*dst++ = *src;
src++;
}
*dst = 0;
for (dst = dest; *dst; dst++)
*dst = tolower(*dst);
return dest;
}
#define POLYNOMIAL 0x04c11db7L
static unsigned long crc_table[256];
void
gen_crc_table()
{
int i, j;
unsigned long crc_accum;
for (i = 0; i < 256; i++) {
crc_accum = ((unsigned long) i << 24);
for (j = 0; j < 8; j++) {
if (crc_accum & 0x80000000L)
crc_accum = (crc_accum << 1) ^ POLYNOMIAL;
else
crc_accum = (crc_accum << 1);
}
crc_table[i] = crc_accum;
}
return;
}
unsigned long
update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size)
{
int i, j;
for (j = 0; j < data_blk_size; j++) {
i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff;
crc_accum = (crc_accum << 8) ^ crc_table[i];
}
return crc_accum;
}