root/src/add-ons/kernel/file_systems/bfs/CheckVisitor.cpp
/*
 * Copyright 2002-2020, Axel Dörfler, axeld@pinc-software.de.
 * Copyright 2012, Andreas Henriksson, sausageboy@gmail.com
 * This file may be used under the terms of the MIT License.
 */


//! File system error checking


#include "CheckVisitor.h"

#include "BlockAllocator.h"
#include "BPlusTree.h"
#include "Inode.h"
#include "Volume.h"


struct check_index {
        check_index()
                :
                inode(NULL)
        {
        }

        char                            name[B_FILE_NAME_LENGTH];
        block_run                       run;
        Inode*                          inode;
};


CheckVisitor::CheckVisitor(Volume* volume)
        :
        FileSystemVisitor(volume),
        fCheckBitmap(NULL)
{
}


CheckVisitor::~CheckVisitor()
{
        free(fCheckBitmap);
}


status_t
CheckVisitor::StartBitmapPass()
{
        if (!_ControlValid())
                return B_BAD_VALUE;

        // Lock the volume's journal and block allocator
        GetVolume()->GetJournal(0)->Lock(NULL, true);
        recursive_lock_lock(&GetVolume()->Allocator().Lock());

        size_t size = _BitmapSize();
        fCheckBitmap = (uint32*)malloc(size);
        if (fCheckBitmap == NULL) {
                recursive_lock_unlock(&GetVolume()->Allocator().Lock());
                GetVolume()->GetJournal(0)->Unlock(NULL, true);
                return B_NO_MEMORY;
        }

        memset(&Control().stats, 0, sizeof(check_control::stats));

        // initialize bitmap
        memset(fCheckBitmap, 0, size);
        for (off_t block = GetVolume()->ToBlock(GetVolume()->Log())
                + GetVolume()->Log().Length(); block-- > 0;) {
                _SetCheckBitmapAt(block);
        }

        Control().pass = BFS_CHECK_PASS_BITMAP;
        Control().stats.block_size = GetVolume()->BlockSize();

        // TODO: check reserved area in bitmap!

        Start(VISIT_REGULAR | VISIT_INDICES | VISIT_REMOVED
                | VISIT_ATTRIBUTE_DIRECTORIES);

        return B_OK;
}


status_t
CheckVisitor::WriteBackCheckBitmap()
{
        if (GetVolume()->IsReadOnly())
                return B_OK;

        // calculate the number of used blocks in the check bitmap
        size_t size = _BitmapSize();
        off_t usedBlocks = 0LL;

        // TODO: update the allocation groups used blocks info
        for (uint32 i = size >> 2; i-- > 0;) {
                uint32 compare = 1;
                // Count the number of bits set
                for (int16 j = 0; j < 32; j++, compare <<= 1) {
                        if ((compare & fCheckBitmap[i]) != 0)
                                usedBlocks++;
                }
        }

        Control().stats.freed = GetVolume()->UsedBlocks() - usedBlocks
                + Control().stats.missing;
        if (Control().stats.freed < 0)
                Control().stats.freed = 0;

        // Should we fix errors? Were there any errors we can fix?
        if ((Control().flags & BFS_FIX_BITMAP_ERRORS) != 0
                && (Control().stats.freed != 0 || Control().stats.missing != 0)) {
                // If so, write the check bitmap back over the original one,
                // and use transactions here to play safe - we even use several
                // transactions, so that we don't blow the maximum log size
                // on large disks, since we don't need to make this atomic.
#if 0
                // prints the blocks that differ
                off_t block = 0;
                for (int32 i = 0; i < fNumGroups; i++) {
                        AllocationBlock cached(fVolume);
                        for (uint32 j = 0; j < fGroups[i].NumBlocks(); j++) {
                                cached.SetTo(fGroups[i], j);
                                for (uint32 k = 0; k < cached.NumBlockBits(); k++) {
                                        if (cached.IsUsed(k) != _CheckBitmapIsUsedAt(block)) {
                                                dprintf("differ block %lld (should be %d)\n", block,
                                                        _CheckBitmapIsUsedAt(block));
                                        }
                                        block++;
                                }
                        }
                }
#endif

                GetVolume()->SuperBlock().used_blocks
                        = HOST_ENDIAN_TO_BFS_INT64(usedBlocks);

                size_t blockSize = GetVolume()->BlockSize();
                off_t numBitmapBlocks = GetVolume()->NumBitmapBlocks();

                for (uint32 i = 0; i < numBitmapBlocks; i += 512) {
                        Transaction transaction(GetVolume(), 1 + i);

                        uint32 blocksToWrite = 512;
                        if (blocksToWrite + i > numBitmapBlocks)
                                blocksToWrite = numBitmapBlocks - i;

                        status_t status = transaction.WriteBlocks(1 + i,
                                (uint8*)fCheckBitmap + i * blockSize, blocksToWrite);
                        if (status < B_OK) {
                                FATAL(("error writing bitmap: %s\n", strerror(status)));
                                return status;
                        }
                        transaction.Done();
                }
        }

        return B_OK;
}


status_t
CheckVisitor::StartIndexPass()
{
        // if we don't have indices to rebuild, this pass is done
        if (indices.IsEmpty())
                return B_ENTRY_NOT_FOUND;

        Control().pass = BFS_CHECK_PASS_INDEX;

        status_t status = _PrepareIndices();
        if (status != B_OK) {
                Control().status = status;
                return status;
        }

        Start(VISIT_REGULAR);
        return Next();
}


status_t
CheckVisitor::StopChecking()
{
        if (GetVolume()->IsReadOnly()) {
                // We can't fix errors on this volume
                Control().flags = 0;
        }

        if (Control().status != B_ENTRY_NOT_FOUND)
                FATAL(("CheckVisitor didn't run through\n"));

        _FreeIndices();

        recursive_lock_unlock(&GetVolume()->Allocator().Lock());
        GetVolume()->GetJournal(0)->Unlock(NULL, true);
        return B_OK;
}


status_t
CheckVisitor::VisitDirectoryEntry(Inode* inode, Inode* parent,
        const char* treeName)
{
        Control().inode = inode->ID();
        Control().mode = inode->Mode();

        if (Pass() != BFS_CHECK_PASS_BITMAP)
                return B_OK;

        // check if the inode's name is the same as in the b+tree
        if (inode->IsRegularNode()) {
                RecursiveLocker locker(inode->SmallDataLock());
                NodeGetter node(GetVolume());
                status_t status = node.SetTo(inode);
                if (status != B_OK) {
                        Control().errors |= BFS_COULD_NOT_OPEN;
                        Control().status = status;
                        return B_OK;
                }

                const char* localName = inode->Name(node.Node());
                if (localName == NULL || strcmp(localName, treeName)) {
                        Control().errors |= BFS_NAMES_DONT_MATCH;
                        FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", treeName,
                                localName));

                        if ((Control().flags & BFS_FIX_NAME_MISMATCHES) != 0) {
                                // Rename the inode
                                Transaction transaction(GetVolume(), inode->BlockNumber());

                                // Note, this may need extra blocks, but the inode will
                                // only be checked afterwards, so that it won't be lost
                                status_t status = inode->SetName(transaction, treeName);
                                if (status == B_OK)
                                        status = inode->WriteBack(transaction);
                                if (status == B_OK)
                                        status = transaction.Done();
                                if (status != B_OK) {
                                        Control().status = status;
                                        return B_OK;
                                }
                        }
                }
        }

        // Check for the correct mode of the node (if the mode of the
        // file don't fit to its parent, there is a serious problem)
        if (((parent->Mode() & S_ATTR_DIR) != 0
                        && !inode->IsAttribute())
                || ((parent->Mode() & S_INDEX_DIR) != 0
                        && !inode->IsIndex())
                || (is_directory(parent->Mode())
                        && !inode->IsRegularNode())) {
                FATAL(("inode at %" B_PRIdOFF " is of wrong type: %o (parent "
                        "%o at %" B_PRIdOFF ")!\n", inode->BlockNumber(),
                        inode->Mode(), parent->Mode(), parent->BlockNumber()));

                // if we are allowed to fix errors, we should remove the file
                if ((Control().flags & BFS_REMOVE_WRONG_TYPES) != 0
                        && (Control().flags & BFS_FIX_BITMAP_ERRORS) != 0) {
                        Control().status = _RemoveInvalidNode(parent, NULL, inode,
                                treeName);
                } else
                        Control().status = B_ERROR;

                Control().errors |= BFS_WRONG_TYPE;
        }
        return B_OK;
}


status_t
CheckVisitor::VisitInode(Inode* inode, const char* treeName)
{
        Control().inode = inode->ID();
        Control().mode = inode->Mode();
                // (we might have set these in VisitDirectoryEntry already)

        // set name
        if (treeName == NULL) {
                if (inode->GetName(Control().name) < B_OK) {
                        if (inode->IsContainer())
                                strcpy(Control().name, "(dir has no name)");
                        else
                                strcpy(Control().name, "(node has no name)");
                }
        } else
                strcpy(Control().name, treeName);

        status_t status = B_OK;

        switch (Pass()) {
                case BFS_CHECK_PASS_BITMAP:
                {
                        status = _CheckInodeBlocks(inode, NULL);
                        if (status != B_OK)
                                return status;

                        // Check the B+tree as well
                        if (inode->IsContainer()) {
                                bool repairErrors = (Control().flags & BFS_FIX_BPLUSTREES) != 0;
                                bool errorsFound = false;

                                status = inode->Tree()->Validate(repairErrors, errorsFound);

                                if (errorsFound) {
                                        Control().errors |= BFS_INVALID_BPLUSTREE;
                                        if (inode->IsIndex() && treeName != NULL && repairErrors) {
                                                // We completely rebuild corrupt indices
                                                check_index* index = new(std::nothrow) check_index;
                                                if (index == NULL)
                                                        return B_NO_MEMORY;

                                                strlcpy(index->name, treeName, sizeof(index->name));
                                                index->run = inode->BlockRun();
                                                Indices().Push(index);
                                        }
                                }
                        }

                        break;
                }

                case BFS_CHECK_PASS_INDEX:
                        status = _AddInodeToIndex(inode);
                        break;
        }

        Control().status = status;
        return B_OK;
}


status_t
CheckVisitor::OpenInodeFailed(status_t reason, ino_t id, Inode* parent,
                char* treeName, TreeIterator* iterator)
{
        FATAL(("Could not open inode at %" B_PRIdOFF ": %s\n", id,
                strerror(reason)));

        if (treeName != NULL)
                strlcpy(Control().name, treeName, B_FILE_NAME_LENGTH);
        else
                strcpy(Control().name, "(node has no name)");

        Control().inode = id;
        Control().errors = BFS_COULD_NOT_OPEN;

        // TODO: check other error codes; B_IO_ERROR might be a temporary
        // issue, so it should be guarded by a force mode
        if (reason == B_BAD_VALUE || reason == B_BAD_DATA || reason == B_IO_ERROR) {
                // Remove inode from the tree if we can
                if (parent != NULL && iterator != NULL
                        && (Control().flags & BFS_REMOVE_INVALID) != 0) {
                        Control().status = _RemoveInvalidNode(parent, iterator->Tree(),
                                NULL, treeName);
                } else
                        Control().status = B_ERROR;
        } else {
                Control().status = B_OK;
        }

        return B_OK;
}


status_t
CheckVisitor::OpenBPlusTreeFailed(Inode* inode)
{
        FATAL(("Could not open b+tree from inode at %" B_PRIdOFF "\n",
                inode->ID()));
        return B_OK;
}


status_t
CheckVisitor::TreeIterationFailed(status_t reason, Inode* parent)
{
        // Iterating over the B+tree failed - we let the checkfs run
        // fail completely, as we would delete all files we cannot
        // access.
        // TODO: maybe have a force parameter that actually does that.
        // TODO: we also need to be able to repair broken B+trees!
        return reason;
}


status_t
CheckVisitor::_RemoveInvalidNode(Inode* parent, BPlusTree* tree,
        Inode* inode, const char* name)
{
        // It's safe to start a transaction, because Inode::Remove()
        // won't touch the block bitmap (which we hold the lock for)
        // if we set the INODE_DONT_FREE_SPACE flag - since we fix
        // the bitmap anyway.
        Transaction transaction(GetVolume(), parent->BlockNumber());
        status_t status;

        if (inode != NULL) {
                inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE);

                status = parent->Remove(transaction, name, NULL, false, true);
        } else {
                parent->WriteLockInTransaction(transaction);

                // does the file even exist?
                off_t id;
                status = tree->Find((uint8*)name, (uint16)strlen(name), &id);
                if (status == B_OK)
                        status = tree->Remove(transaction, name, id);
        }

        if (status == B_OK) {
                entry_cache_remove(GetVolume()->ID(), parent->ID(), name);
                transaction.Done();
        }

        return status;
}


bool
CheckVisitor::_ControlValid()
{
        if (Control().magic != BFS_IOCTL_CHECK_MAGIC) {
                FATAL(("invalid check_control!\n"));
                return false;
        }

        return true;
}


bool
CheckVisitor::_CheckBitmapIsUsedAt(off_t block) const
{
        size_t size = _BitmapSize();
        uint32 index = block / 32;      // 32bit resolution
        if (index > size / 4)
                return false;

        return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index])
                & (1UL << (block & 0x1f));
}


void
CheckVisitor::_SetCheckBitmapAt(off_t block)
{
        size_t size = _BitmapSize();
        uint32 index = block / 32;      // 32bit resolution
        if (index > size / 4)
                return;

        fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f));
}


size_t
CheckVisitor::_BitmapSize() const
{
        return GetVolume()->BlockSize() * GetVolume()->NumBitmapBlocks();
}


status_t
CheckVisitor::_CheckInodeBlocks(Inode* inode, const char* name)
{
        status_t status = _CheckAllocated(inode->BlockRun(), "inode");
        if (status != B_OK)
                return status;

        if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) {
                // symlinks may not have a valid data stream
                if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH)
                        return B_BAD_DATA;

                return B_OK;
        }

        data_stream* data = &inode->Node().data;

        // check the direct range

        if (data->max_direct_range) {
                for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) {
                        if (data->direct[i].IsZero())
                                break;

                        status = _CheckAllocated(data->direct[i], "direct");
                        if (status < B_OK)
                                return status;

                        Control().stats.direct_block_runs++;
                        Control().stats.blocks_in_direct
                                += data->direct[i].Length();
                }
        }

        CachedBlock cached(GetVolume());

        // check the indirect range

        if (data->max_indirect_range) {
                status = _CheckAllocated(data->indirect, "indirect");
                if (status != B_OK)
                        return status;

                off_t block = GetVolume()->ToBlock(data->indirect);

                for (int32 i = 0; i < data->indirect.Length(); i++) {
                        status = cached.SetTo(block + i);
                        if (status != B_OK)
                                RETURN_ERROR(status);

                        block_run* runs = (block_run*)cached.Block();
                        int32 runsPerBlock = GetVolume()->BlockSize() / sizeof(block_run);
                        int32 index = 0;
                        for (; index < runsPerBlock; index++) {
                                if (runs[index].IsZero())
                                        break;

                                status = _CheckAllocated(runs[index], "indirect->run");
                                if (status < B_OK)
                                        return status;

                                Control().stats.indirect_block_runs++;
                                Control().stats.blocks_in_indirect
                                        += runs[index].Length();
                        }
                        Control().stats.indirect_array_blocks++;

                        if (index < runsPerBlock)
                                break;
                }
        }

        // check the double indirect range

        if (data->max_double_indirect_range) {
                status = _CheckAllocated(data->double_indirect, "double indirect");
                if (status != B_OK)
                        return status;

                int32 runsPerBlock = runs_per_block(GetVolume()->BlockSize());
                int32 runsPerArray = runsPerBlock * data->double_indirect.Length();

                CachedBlock cachedDirect(GetVolume());

                for (int32 indirectIndex = 0; indirectIndex < runsPerArray;
                                indirectIndex++) {
                        // get the indirect array block
                        status = cached.SetTo(GetVolume()->ToBlock(data->double_indirect)
                                        + indirectIndex / runsPerBlock);
                        if (status != B_OK)
                                return status;

                        block_run* array = (block_run*)cached.Block();
                        block_run indirect = array[indirectIndex % runsPerBlock];
                        // are we finished yet?
                        if (indirect.IsZero())
                                return B_OK;

                        status = _CheckAllocated(indirect, "double indirect->runs");
                        if (status != B_OK)
                                return status;

                        int32 maxIndex
                                = ((uint32)indirect.Length() << GetVolume()->BlockShift())
                                        / sizeof(block_run);

                        for (int32 index = 0; index < maxIndex; ) {
                                status = cachedDirect.SetTo(GetVolume()->ToBlock(indirect)
                                                + index / runsPerBlock);
                                if (status != B_OK)
                                        return status;

                                block_run* runs = (block_run*)cachedDirect.Block();

                                do {
                                        // are we finished yet?
                                        if (runs[index % runsPerBlock].IsZero())
                                                return B_OK;

                                        status = _CheckAllocated(runs[index % runsPerBlock],
                                                "double indirect->runs->run");
                                        if (status != B_OK)
                                                return status;

                                        Control().stats.double_indirect_block_runs++;
                                        Control().stats.blocks_in_double_indirect
                                                += runs[index % runsPerBlock].Length();
                                } while ((++index % runsPerBlock) != 0);
                        }

                        Control().stats.double_indirect_array_blocks++;
                }
        }

        return B_OK;
}


status_t
CheckVisitor::_CheckAllocated(block_run run, const char* type)
{
        BlockAllocator& allocator = GetVolume()->Allocator();

        // make sure the block run is valid
        if (!allocator.IsValidBlockRun(run, type)) {
                Control().errors |= BFS_INVALID_BLOCK_RUN;
                return B_OK;
        }

        status_t status;

        off_t start = GetVolume()->ToBlock(run);
        off_t end = start + run.Length();

        // check if the run is allocated in the block bitmap on disk
        off_t block = start;

        while (block < end) {
                off_t firstMissing;
                status = allocator.CheckBlocks(block, end - block, true, &firstMissing);
                if (status == B_OK)
                        break;
                else if (status != B_BAD_DATA)
                        return status;

                off_t afterLastMissing;
                status = allocator.CheckBlocks(firstMissing, end - firstMissing, false,
                        &afterLastMissing);
                if (status == B_OK)
                        afterLastMissing = end;
                else if (status != B_BAD_DATA)
                        return status;

                PRINT(("%s: block_run(%" B_PRId32 ", %" B_PRIu16 ", %" B_PRIu16 ")"
                        ": blocks %" B_PRIdOFF " - %" B_PRIdOFF " are not allocated!\n",
                        type, run.AllocationGroup(), run.Start(),
                        run.Length(), firstMissing, afterLastMissing - 1));

                Control().stats.missing += afterLastMissing - firstMissing;

                block = afterLastMissing;
        }

        // set bits in check bitmap, while checking if they're already set
        off_t firstSet = -1;

        for (block = start; block < end; block++) {
                if (_CheckBitmapIsUsedAt(block)) {
                        if (firstSet == -1) {
                                firstSet = block;
                                Control().errors |= BFS_BLOCKS_ALREADY_SET;
                        }
                        Control().stats.already_set++;
                } else {
                        if (firstSet != -1) {
                                FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF
                                        " - %" B_PRIdOFF " are already set!\n", type,
                                        (int)run.AllocationGroup(), run.Start(), run.Length(),
                                        firstSet, block - 1));
                                firstSet = -1;
                        }
                        _SetCheckBitmapAt(block);
                }
        }

        return B_OK;
}


status_t
CheckVisitor::_PrepareIndices()
{
        int32 count = 0;

        for (int32 i = 0; i < Indices().CountItems(); i++) {
                check_index* index = Indices().Array()[i];
                Vnode vnode(GetVolume(), index->run);
                Inode* inode;
                status_t status = vnode.Get(&inode);
                if (status != B_OK) {
                        FATAL(("check: Could not open index at %" B_PRIdOFF "\n",
                                GetVolume()->ToBlock(index->run)));
                        return status;
                }

                BPlusTree* tree = inode->Tree();
                if (tree == NULL) {
                        // TODO: We can't yet repair those
                        continue;
                }

                status = tree->MakeEmpty();
                if (status != B_OK)
                        return status;

                index->inode = inode;
                vnode.Keep();
                count++;
        }

        return count == 0 ? B_ENTRY_NOT_FOUND : B_OK;
}


void
CheckVisitor::_FreeIndices()
{
        for (int32 i = 0; i < Indices().CountItems(); i++) {
                check_index* index = Indices().Array()[i];
                if (index->inode != NULL) {
                        put_vnode(GetVolume()->FSVolume(),
                                GetVolume()->ToVnode(index->inode->BlockRun()));
                }
                delete index;
        }
        Indices().MakeEmpty();
}


status_t
CheckVisitor::_AddInodeToIndex(Inode* inode)
{
        Transaction transaction(GetVolume(), inode->BlockNumber());

        for (int32 i = 0; i < Indices().CountItems(); i++) {
                check_index* index = Indices().Array()[i];
                if (index->inode == NULL)
                        continue;

                index->inode->WriteLockInTransaction(transaction);

                BPlusTree* tree = index->inode->Tree();
                if (tree == NULL)
                        return B_ERROR;

                status_t status = B_OK;

                if (!strcmp(index->name, "name")) {
                        if (inode->InNameIndex()) {
                                char name[B_FILE_NAME_LENGTH];
                                if (inode->GetName(name, B_FILE_NAME_LENGTH) != B_OK)
                                        return B_ERROR;

                                status = tree->Insert(transaction, name, inode->ID());
                        }
                } else if (!strcmp(index->name, "last_modified")) {
                        if (inode->InLastModifiedIndex()) {
                                status = tree->Insert(transaction, inode->OldLastModified(),
                                        inode->ID());
                        }
                } else if (!strcmp(index->name, "size")) {
                        if (inode->InSizeIndex())
                                status = tree->Insert(transaction, inode->Size(), inode->ID());
                } else {
                        uint8 key[MAX_INDEX_KEY_LENGTH];
                        size_t keyLength = sizeof(key);
                        if (inode->ReadAttribute(index->name, B_ANY_TYPE, 0, key,
                                        &keyLength) == B_OK) {
                                status = tree->Insert(transaction, key, keyLength, inode->ID());
                        }
                }

                if (status != B_OK)
                        return status;
        }

        return transaction.Done();
}