root/src/add-ons/kernel/file_systems/bfs/BPlusTree.h
/*
 * Copyright 2001-2015, Axel Dörfler, axeld@pinc-software.de.
 * This file may be used under the terms of the MIT License.
 */
#ifndef B_PLUS_TREE_H
#define B_PLUS_TREE_H


#include "bfs.h"

#include "Utility.h"

#if !_BOOT_MODE
#include "Journal.h"
class Inode;
#else
#define Inode BFS::Stream

namespace BFS {

class Stream;
class Transaction;
class TransactionListener {};
#endif // _BOOT_MODE

//      #pragma mark - on-disk structures

struct bplustree_node;

#define BPLUSTREE_NULL                  -1LL
#define BPLUSTREE_FREE                  -2LL

struct bplustree_header {
        uint32          magic;
        uint32          node_size;
        uint32          max_number_of_levels;
        uint32          data_type;
        int64           root_node_pointer;
        int64           free_node_pointer;
        int64           maximum_size;

        uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
        uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
        uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
        off_t RootNode() const
                { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
        off_t FreeNode() const
                { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
        off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
        uint32 MaxNumberOfLevels() const
                { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }

        inline bool CheckNode(const bplustree_node* node) const;
        inline bool IsValidLink(off_t link) const;
        bool IsValid() const;
} _PACKED;

#define BPLUSTREE_MAGIC                         0x69f6c2e8
#define BPLUSTREE_NODE_SIZE             1024
#define BPLUSTREE_MAX_KEY_LENGTH        256
#define BPLUSTREE_MIN_KEY_LENGTH        1

enum bplustree_types {
        BPLUSTREE_STRING_TYPE   = 0,
        BPLUSTREE_INT32_TYPE    = 1,
        BPLUSTREE_UINT32_TYPE   = 2,
        BPLUSTREE_INT64_TYPE    = 3,
        BPLUSTREE_UINT64_TYPE   = 4,
        BPLUSTREE_FLOAT_TYPE    = 5,
        BPLUSTREE_DOUBLE_TYPE   = 6
};


struct duplicate_array;


template <typename T>
struct  __attribute__((packed)) Unaligned {
        T value;

        Unaligned<T>& operator=(const T& newValue)
        {
                value = newValue; return *this;
        }
        operator T() const { return value; }
};


struct bplustree_node {
                        int64                           left_link;
                        int64                           right_link;
                        int64                           overflow_link;
                        uint16                          all_key_count;
                        uint16                          all_key_length;

                        off_t                           LeftLink() const
                                                                        { return BFS_ENDIAN_TO_HOST_INT64(
                                                                                left_link); }
                        off_t                           RightLink() const
                                                                        { return BFS_ENDIAN_TO_HOST_INT64(
                                                                                right_link); }
                        off_t                           OverflowLink() const
                                                                        { return BFS_ENDIAN_TO_HOST_INT64(
                                                                                overflow_link); }
                        uint16                          NumKeys() const
                                                                        { return BFS_ENDIAN_TO_HOST_INT16(
                                                                                all_key_count); }
                        uint16                          AllKeyLength() const
                                                                        { return BFS_ENDIAN_TO_HOST_INT16(
                                                                                all_key_length); }

        inline  Unaligned<uint16>*      KeyLengths() const;
        inline  Unaligned<off_t>*   Values() const;
        inline  uint8*                          Keys() const;
        inline  int32                           Used() const;
                        uint8*                          KeyAt(int32 index, uint16* keyLength) const;

        inline  bool                            IsLeaf() const;

                        void                            Initialize();
                        uint8                           CountDuplicates(off_t offset,
                                                                        bool isFragment) const;
                        off_t                           DuplicateAt(off_t offset, bool isFragment,
                                                                        int8 index) const;
                        uint32                          FragmentsUsed(uint32 nodeSize) const;
        inline  duplicate_array*        FragmentAt(int8 index) const;
        inline  duplicate_array*        DuplicateArray() const;

        static inline uint8                     LinkType(off_t link);
        static inline off_t                     MakeLink(uint8 type, off_t link,
                                                                        uint32 fragmentIndex = 0);
        static inline bool                      IsDuplicate(off_t link);
        static inline off_t                     FragmentOffset(off_t link);
        static inline uint32            FragmentIndex(off_t link);
        static inline uint32            MaxFragments(uint32 nodeSize);

                        status_t                        CheckIntegrity(uint32 nodeSize) const;
} _PACKED;

//#define BPLUSTREE_NODE 0
#define BPLUSTREE_DUPLICATE_NODE 2
#define BPLUSTREE_DUPLICATE_FRAGMENT 3

#define NUM_FRAGMENT_VALUES 7
#define NUM_DUPLICATE_VALUES 125

//**************************************

enum bplustree_traversing {
        BPLUSTREE_FORWARD = 1,
        BPLUSTREE_BACKWARD = -1,

        BPLUSTREE_BEGIN = 0,
        BPLUSTREE_END = 1
};


//      #pragma mark - in-memory structures


class BPlusTree;
struct TreeCheck;
class TreeIterator;


#if !_BOOT_MODE
// needed for searching (utilizing a stack)
struct node_and_key {
        off_t   nodeOffset;
        uint16  keyIndex;
};
#endif // !_BOOT_MODE


class CachedNode {
public:
        CachedNode(BPlusTree* tree)
                :
                fTree(tree),
                fNode(NULL),
                fOffset(0),
                fBlockNumber(0),
                fBlock(NULL),
                fWritable(false)
        {
        }

        CachedNode(BPlusTree* tree, off_t offset, bool check = true)
                :
                fTree(tree),
                fNode(NULL),
                fOffset(0),
                fBlockNumber(0),
                fBlock(NULL),
                fWritable(false)
        {
                SetTo(offset, check);
        }

        ~CachedNode()
        {
                Unset();
#if _BOOT_MODE
                free(fBlock);
#endif
        }

                        const bplustree_node* SetTo(off_t offset, bool check = true);
                        status_t                        SetTo(off_t offset,
                                                                        const bplustree_node** _node,
                                                                        bool check = true);

                        const bplustree_header* SetToHeader();
                        void                            Unset();

#if !_BOOT_MODE
                        bplustree_node*         SetToWritable(Transaction& transaction,
                                                                        off_t offset, bool check = true);
                        bplustree_node*         MakeWritable(Transaction& transaction);
                        bplustree_header*       SetToWritableHeader(Transaction& transaction);

                        void                            UnsetUnchanged(Transaction& transaction);

                        status_t                        Free(Transaction& transaction, off_t offset);
                        status_t                        Allocate(Transaction& transaction,
                                                                        bplustree_node** _node, off_t* _offset);
#endif // !_BOOT_MODE

                        bool                            IsWritable() const { return fWritable; }
                        bplustree_node*         Node() const { return fNode; }

protected:
                        status_t                        InternalSetTo(Transaction* transaction,
                                                                        off_t offset);

                        BPlusTree*                      fTree;
                        bplustree_node*         fNode;
                        off_t                           fOffset;
                        off_t                           fBlockNumber;
                        uint8*                          fBlock;
                        bool                            fWritable;
};


class BPlusTree : public TransactionListener {
public:
#if !_BOOT_MODE
                                                                BPlusTree(Transaction& transaction,
                                                                        Inode* stream,
                                                                        int32 nodeSize = BPLUSTREE_NODE_SIZE);
#endif
                                                                BPlusTree(Inode* stream);
                                                                BPlusTree();
                                                                ~BPlusTree();

#if !_BOOT_MODE
                        status_t                        SetTo(Transaction& transaction, Inode* stream,
                                                                        int32 nodeSize = BPLUSTREE_NODE_SIZE);
#endif
                        status_t                        SetTo(Inode* stream);
                        status_t                        SetStream(Inode* stream);

                        status_t                        InitCheck();

                        size_t                          NodeSize() const { return fNodeSize; }
                        Inode*                          Stream() const { return fStream; }

#if !_BOOT_MODE
                        status_t                        Validate(bool repair, bool& _errorsFound);
                        status_t                        MakeEmpty();

                        status_t                        Remove(Transaction& transaction,
                                                                        const uint8* key, uint16 keyLength,
                                                                        off_t value);
                        status_t                        Insert(Transaction& transaction,
                                                                        const uint8* key, uint16 keyLength,
                                                                        off_t value);

                        status_t                        Remove(Transaction& transaction, const char* key,
                                                                        off_t value);
                        status_t                        Insert(Transaction& transaction, const char* key,
                                                                        off_t value);
                        status_t                        Insert(Transaction& transaction, int32 key,
                                                                        off_t value);
                        status_t                        Insert(Transaction& transaction, uint32 key,
                                                                        off_t value);
                        status_t                        Insert(Transaction& transaction, int64 key,
                                                                        off_t value);
                        status_t                        Insert(Transaction& transaction, uint64 key,
                                                                        off_t value);
                        status_t                        Insert(Transaction& transaction, float key,
                                                                        off_t value);
                        status_t                        Insert(Transaction& transaction, double key,
                                                                        off_t value);

                        status_t                        Replace(Transaction& transaction,
                                                                        const uint8* key, uint16 keyLength,
                                                                        off_t value);
#endif // !_BOOT_MODE

                        status_t                        Find(const uint8* key, uint16 keyLength,
                                                                        off_t* value);

#if !_BOOT_MODE
        static  int32                           TypeCodeToKeyType(type_code code);
        static  int32                           ModeToKeyType(mode_t mode);

protected:
        virtual void                            TransactionDone(bool success);
        virtual void                            RemovedFromTransaction();
#endif // !_BOOT_MODE

private:
                                                                BPlusTree(const BPlusTree& other);
                                                                BPlusTree& operator=(const BPlusTree& other);
                                                                        // no implementation

                        int32                           _CompareKeys(const void* key1, int keylength1,
                                                                        const void* key2, int keylength2);
                        status_t                        _FindKey(const bplustree_node* node,
                                                                        const uint8* key, uint16 keyLength,
                                                                        uint16* index = NULL, off_t* next = NULL);
#if !_BOOT_MODE
                        status_t                        _SeekDown(Stack<node_and_key>& stack,
                                                                        const uint8* key, uint16 keyLength);

                        status_t                        _FindFreeDuplicateFragment(
                                                                        Transaction& transaction,
                                                                        const bplustree_node* node,
                                                                        CachedNode& cached, off_t* _offset,
                                                                        bplustree_node** _fragment, uint32* _index);
                        status_t                        _InsertDuplicate(Transaction& transaction,
                                                                        CachedNode& cached,
                                                                        const bplustree_node* node, uint16 index,
                                                                        off_t value);
                        void                            _InsertKey(bplustree_node* node, uint16 index,
                                                                        uint8* key, uint16 keyLength, off_t value);
                        status_t                        _SplitNode(bplustree_node* node,
                                                                        off_t nodeOffset, bplustree_node* other,
                                                                        off_t otherOffset, uint16* _keyIndex,
                                                                        uint8* key, uint16* _keyLength,
                                                                        off_t* _value);

                        status_t                        _RemoveDuplicate(Transaction& transaction,
                                                                        const bplustree_node* node,
                                                                        CachedNode& cached, uint16 keyIndex,
                                                                        off_t value);
                        void                            _RemoveKey(bplustree_node* node, uint16 index);

                        void                            _UpdateIterators(off_t offset, off_t nextOffset,
                                                                        uint16 keyIndex, uint16 splitAt,
                                                                        int8 change);
                        void                            _AddIterator(TreeIterator* iterator);
                        void                            _RemoveIterator(TreeIterator* iterator);

                        status_t                        _ValidateChildren(TreeCheck& check,
                                                                        uint32 level, off_t offset,
                                                                        const uint8* largestKey, uint16 keyLength,
                                                                        const bplustree_node* parent);
                        status_t                        _ValidateChild(TreeCheck& check,
                                                                        CachedNode& cached, uint32 level,
                                                                        off_t offset, off_t lastOffset,
                                                                        off_t nextOffset, const uint8* key,
                                                                        uint16 keyLength);
#endif // !_BOOT_MODE

private:
                        friend class TreeIterator;
                        friend class CachedNode;
                        friend struct TreeCheck;

                        Inode*                          fStream;
                        bplustree_header        fHeader;
                        int32                           fNodeSize;
                        bool                            fAllowDuplicates;
                        bool                            fInTransaction;
                        status_t                        fStatus;

#if !_BOOT_MODE
                        mutex                           fIteratorLock;
                        SinglyLinkedList<TreeIterator> fIterators;
#endif
};


//      #pragma mark - helper classes/functions


class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
public:
                                                                TreeIterator(BPlusTree* tree);
                                                                ~TreeIterator();

                        status_t                        Goto(int8 to);
                        status_t                        Traverse(int8 direction, void* key,
                                                                        uint16* keyLength, uint16 maxLength,
                                                                        off_t* value, uint16* duplicate = NULL);
                        status_t                        Find(const uint8* key, uint16 keyLength);

                        status_t                        Rewind();
                        status_t                        GetNextEntry(void* key, uint16* keyLength,
                                                                        uint16 maxLength, off_t* value,
                                                                        uint16* duplicate = NULL);
                        status_t                        GetPreviousEntry(void* key, uint16* keyLength,
                                                                        uint16 maxLength, off_t* value,
                                                                        uint16* duplicate = NULL);
                        void                            SkipDuplicates();

                        BPlusTree*                      Tree() const { return fTree; }

#ifdef DEBUG
                        void                            Dump();
#endif

private:
                        friend class BPlusTree;

                        // called by BPlusTree
                        void                            Update(off_t offset, off_t nextOffset,
                                                                        uint16 keyIndex, uint16 splitAt,
                                                                        int8 change);
                        void                            Stop();

private:
                        BPlusTree*                      fTree;
                        off_t                           fCurrentNodeOffset;
                                                                        // traverse position
                        int32                           fCurrentKey;
                        off_t                           fDuplicateNode;
                        uint16                          fDuplicate;
                        uint16                          fNumDuplicates;
                        bool                            fIsFragment;
};


//      #pragma mark - BPlusTree's inline functions
//      (most of them may not be needed)


#if !_BOOT_MODE
inline status_t
BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
{
        if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
                return B_BAD_TYPE;
        return Remove(transaction, (uint8*)key, strlen(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
{
        if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
                return B_BAD_TYPE;
        return Insert(transaction, (uint8*)key, strlen(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
{
        if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
                return B_BAD_TYPE;
        return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
{
        if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
                return B_BAD_TYPE;
        return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
{
        if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
                return B_BAD_TYPE;
        return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
{
        if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
                return B_BAD_TYPE;
        return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, float key, off_t value)
{
        if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
                return B_BAD_TYPE;
        return Insert(transaction, (uint8*)&key, sizeof(key), value);
}


inline status_t
BPlusTree::Insert(Transaction& transaction, double key, off_t value)
{
        if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
                return B_BAD_TYPE;
        return Insert(transaction, (uint8*)&key, sizeof(key), value);
}
#endif // !_BOOT_MODE


//      #pragma mark - TreeIterator inline functions


inline status_t
TreeIterator::Rewind()
{
        return Goto(BPLUSTREE_BEGIN);
}


inline status_t
TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
        off_t* value, uint16* duplicate)
{
        return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
                duplicate);
}


inline status_t
TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
        off_t* value, uint16* duplicate)
{
        return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
                duplicate);
}


//      #pragma mark - bplustree_header inline functions


inline bool
bplustree_header::CheckNode(const bplustree_node* node) const
{
        // sanity checks (links, all_key_count)
        return IsValidLink(node->LeftLink())
                && IsValidLink(node->RightLink())
                && IsValidLink(node->OverflowLink())
                && (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
                                <= (int8*)node + NodeSize();
}


inline bool
bplustree_header::IsValidLink(off_t link) const
{
        return link == BPLUSTREE_NULL
                || (link > 0 && link <= MaximumSize() - NodeSize());
}


//      #pragma mark - bplustree_node inline functions


inline Unaligned<uint16>*
bplustree_node::KeyLengths() const
{
        return (Unaligned<uint16>*)(((char*)this) + key_align(sizeof(bplustree_node)
                + AllKeyLength()));
}


inline Unaligned<off_t>*
bplustree_node::Values() const
{
        return (Unaligned<off_t>*)(
                (char*)KeyLengths() + NumKeys() * sizeof(uint16));
}


inline uint8*
bplustree_node::Keys() const
{
        return (uint8*)this + sizeof(bplustree_node);
}


inline int32
bplustree_node::Used() const
{
        return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
                * (sizeof(uint16) + sizeof(off_t));
}


inline bool
bplustree_node::IsLeaf() const
{
        return OverflowLink() == BPLUSTREE_NULL;
}


inline duplicate_array*
bplustree_node::FragmentAt(int8 index) const
{
        return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
}


inline duplicate_array*
bplustree_node::DuplicateArray() const
{
        return (duplicate_array*)&overflow_link;
}


inline uint8
bplustree_node::LinkType(off_t link)
{
        return *(uint64*)&link >> 62;
}


inline off_t
bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
{
        return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
                | (fragmentIndex & 0x3ff);
}


inline bool
bplustree_node::IsDuplicate(off_t link)
{
        return (LinkType(link)
                & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
}


inline off_t
bplustree_node::FragmentOffset(off_t link)
{
        return link & 0x3ffffffffffffc00LL;
}


inline uint32
bplustree_node::FragmentIndex(off_t link)
{
        return (uint32)(link & 0x3ff);
}


inline uint32
bplustree_node::MaxFragments(uint32 nodeSize)
{
        return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
}


#if _BOOT_MODE
} // namespace BFS

#undef Inode
#endif

#endif  // B_PLUS_TREE_H