root/src/add-ons/kernel/file_systems/ext2/Journal.cpp
/*
 * Copyright 2010, Haiku Inc. All rights reserved.
 * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
 * This file may be used under the terms of the MIT License.
 *
 * Authors:
 *              Janito V. Ferreira Filho
 */


#include "Journal.h"

#include <new>
#include <string.h>
#include <unistd.h>

#include <fs_cache.h>

#include "CachedBlock.h"
#include "CRCTable.h"
#include "HashRevokeManager.h"


//#define TRACE_EXT2
#ifdef TRACE_EXT2
#       define TRACE(x...) dprintf("\33[34mext2:\33[0m " x)
#else
#       define TRACE(x...) ;
#endif
#define ERROR(x...) dprintf("\33[34mext2:\33[0m " x)
#define WARN(x...) dprintf("\33[34mext2:\33[0m " x)


class LogEntry : public DoublyLinkedListLinkImpl<LogEntry> {
public:
                                                        LogEntry(Journal* journal, uint32 logStart,
                                                                uint32 length);
                                                        ~LogEntry();

                        uint32                  Start() const { return fStart; }
                        uint32                  CommitID() const { return fCommitID; }

                        Journal*                GetJournal() { return fJournal; }

private:
                        Journal*                fJournal;
                        uint32                  fStart;
                        uint32                  fCommitID;
};


LogEntry::LogEntry(Journal* journal, uint32 logStart, uint32 commitID)
        :
        fJournal(journal),
        fStart(logStart),
        fCommitID(commitID)
{
}


LogEntry::~LogEntry()
{
}


void
JournalHeader::MakeDescriptor(uint32 sequence)
{
        this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
        this->sequence = B_HOST_TO_BENDIAN_INT32(sequence);
        this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_DESCRIPTOR_BLOCK);
}


void
JournalHeader::MakeCommit(uint32 sequence)
{
        this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
        this->sequence = B_HOST_TO_BENDIAN_INT32(sequence);
        this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_COMMIT_BLOCK);
}


Journal::Journal(Volume* fsVolume, Volume* jVolume)
        :
        fJournalVolume(jVolume),
        fJournalBlockCache(jVolume->BlockCache()),
        fFilesystemVolume(fsVolume),
        fFilesystemBlockCache(fsVolume->BlockCache()),
        fRevokeManager(NULL),
        fInitStatus(B_OK),
        fBlockSize(sizeof(JournalSuperBlock)),
        fFirstCommitID(0),
        fFirstCacheCommitID(0),
        fFirstLogBlock(1),
        fLogSize(0),
        fVersion(0),
        fLogStart(0),
        fLogEnd(0),
        fFreeBlocks(0),
        fMaxTransactionSize(0),
        fCurrentCommitID(0),
        fHasSubTransaction(false),
        fSeparateSubTransactions(false),
        fUnwrittenTransactions(0),
        fTransactionID(0)
{
        recursive_lock_init(&fLock, "ext2 journal");
        mutex_init(&fLogEntriesLock, "ext2 journal log entries");

        HashRevokeManager* revokeManager = new(std::nothrow) HashRevokeManager(
                fsVolume->Has64bitFeature());
        TRACE("Journal::Journal(): Allocated a hash revoke manager at %p\n",
                revokeManager);

        if (revokeManager == NULL)
                fInitStatus = B_NO_MEMORY;
        else {
                fInitStatus = revokeManager->Init();

                if (fInitStatus == B_OK) {
                        fRevokeManager = revokeManager;
                        fInitStatus = _LoadSuperBlock();
                } else
                        delete revokeManager;
        }
}


Journal::Journal()
        :
        fJournalVolume(NULL),
        fJournalBlockCache(NULL),
        fFilesystemVolume(NULL),
        fFilesystemBlockCache(NULL),
        fOwner(NULL),
        fRevokeManager(NULL),
        fInitStatus(B_OK),
        fBlockSize(sizeof(JournalSuperBlock)),
        fFirstCommitID(0),
        fFirstCacheCommitID(0),
        fFirstLogBlock(1),
        fLogSize(0),
        fVersion(0),
        fIsStarted(false),
        fLogStart(0),
        fLogEnd(0),
        fFreeBlocks(0),
        fMaxTransactionSize(0),
        fCurrentCommitID(0),
        fHasSubTransaction(false),
        fSeparateSubTransactions(false),
        fUnwrittenTransactions(0),
        fTransactionID(0),
        fChecksumEnabled(false),
        fChecksumV3Enabled(false),
        fFeature64bits(false)
{
        recursive_lock_init(&fLock, "ext2 journal");
        mutex_init(&fLogEntriesLock, "ext2 journal log entries");
}


Journal::~Journal()
{
        TRACE("Journal destructor.\n");

        TRACE("Journal::~Journal(): Attempting to delete revoke manager at %p\n",
                fRevokeManager);
        delete fRevokeManager;

        recursive_lock_destroy(&fLock);
        mutex_destroy(&fLogEntriesLock);
}


status_t
Journal::InitCheck()
{
        return fInitStatus;
}


status_t
Journal::Uninit()
{
        if (!fIsStarted)
                return B_OK;

        status_t status = FlushLogAndBlocks();

        if (status == B_OK) {
                // Mark journal as clean
                fLogStart = 0;
                status = _SaveSuperBlock();
        }

        fIsStarted = false;

        return status;
}


/*virtual*/ status_t
Journal::StartLog()
{
        fLogStart = fFirstLogBlock;
        fLogEnd = fFirstLogBlock;
        fFreeBlocks = 0;
        fIsStarted = true;

        fCurrentCommitID = fFirstCommitID;

        return _SaveSuperBlock();
}


status_t
Journal::RestartLog()
{
        fFirstCommitID = 1;

        return B_OK;
}


/*virtual*/ status_t
Journal::Lock(Transaction* owner, bool separateSubTransactions)
{
        TRACE("Journal::Lock()\n");
        status_t status = recursive_lock_lock(&fLock);
        if (status != B_OK)
                return status;

        TRACE("Journal::Lock(): Aquired lock\n");

        if (!fSeparateSubTransactions && recursive_lock_get_recursion(&fLock) > 1) {
                // reuse current transaction
                TRACE("Journal::Lock(): Reusing current transaction\n");
                return B_OK;
        }

        if (separateSubTransactions)
                fSeparateSubTransactions = true;

        if (owner != NULL)
                owner->SetParent(fOwner);

        fOwner = owner;

        if (fOwner != NULL) {
                if (fUnwrittenTransactions > 0) {
                        // start a sub transaction
                        TRACE("Journal::Lock(): Starting sub transaction\n");
                        cache_start_sub_transaction(fFilesystemBlockCache, fTransactionID);
                        fHasSubTransaction = true;
                } else {
                        TRACE("Journal::Lock(): Starting new transaction\n");
                        fTransactionID = cache_start_transaction(fFilesystemBlockCache);
                }

                if (fTransactionID < B_OK) {
                        recursive_lock_unlock(&fLock);
                        return fTransactionID;
                }

                cache_add_transaction_listener(fFilesystemBlockCache, fTransactionID,
                        TRANSACTION_IDLE, _TransactionIdle, this);
        }

        return B_OK;
}


/*virtual*/ status_t
Journal::Unlock(Transaction* owner, bool success)
{
        TRACE("Journal::Unlock(): Lock recursion: %" B_PRId32 "\n",
                recursive_lock_get_recursion(&fLock));
        if (fSeparateSubTransactions
                || recursive_lock_get_recursion(&fLock) == 1) {
                // we only end the transaction if we unlock it
                if (owner != NULL) {
                        TRACE("Journal::Unlock(): Calling _TransactionDone\n");
                        status_t status = _TransactionDone(success);
                        if (status != B_OK)
                                return status;

                        TRACE("Journal::Unlock(): Returned from _TransactionDone\n");
                        bool separateSubTransactions = fSeparateSubTransactions;
                        fSeparateSubTransactions = true;
                        TRACE("Journal::Unlock(): Notifying listeners for: %p\n", owner);
                        owner->NotifyListeners(success);
                        TRACE("Journal::Unlock(): Done notifying listeners\n");
                        fSeparateSubTransactions = separateSubTransactions;

                        fOwner = owner->Parent();
                } else
                        fOwner = NULL;

                if (fSeparateSubTransactions
                        && recursive_lock_get_recursion(&fLock) == 1)
                        fSeparateSubTransactions = false;
        } else
                owner->MoveListenersTo(fOwner);

        TRACE("Journal::Unlock(): Unlocking the lock\n");

        recursive_lock_unlock(&fLock);
        return B_OK;
}


status_t
Journal::MapBlock(off_t logical, fsblock_t& physical)
{
        TRACE("Journal::MapBlock()\n");
        physical = logical;

        return B_OK;
}


inline uint32
Journal::FreeLogBlocks() const
{
        TRACE("Journal::FreeLogBlocks(): start: %" B_PRIu32 ", end: %" B_PRIu32
                ", size: %" B_PRIu32 "\n", fLogStart, fLogEnd, fLogSize);
        return fLogStart <= fLogEnd
                ? fLogSize - fLogEnd + fLogStart - 1
                : fLogStart - fLogEnd;
}


status_t
Journal::FlushLogAndBlocks()
{
        return _FlushLog(true, true);
}


int32
Journal::TransactionID() const
{
        return fTransactionID;
}


status_t
Journal::_WritePartialTransactionToLog(JournalHeader* descriptorBlock,
        bool detached, uint8** _escapedData, uint32 &logBlock, off_t& blockNumber,
        long& cookie, ArrayDeleter<uint8>& escapedDataDeleter, uint32& blockCount,
        bool& finished)
{
        TRACE("Journal::_WritePartialTransactionToLog()\n");

        uint32 descriptorBlockPos = logBlock;
        uint8* escapedData = *_escapedData;

        JournalBlockTag* tag = (JournalBlockTag*)descriptorBlock->data;
        JournalBlockTag* lastTag = (JournalBlockTag*)((uint8*)descriptorBlock
                + fBlockSize - sizeof(JournalHeader));

        finished = false;
        status_t status = B_OK;

        while (tag < lastTag && status == B_OK) {
                tag->SetBlockNumber(blockNumber);
                tag->SetFlags(0);

                CachedBlock data(fFilesystemVolume);
                const JournalHeader* blockData = (JournalHeader*)data.SetTo(
                        blockNumber);
                if (blockData == NULL) {
                        panic("Got a NULL pointer while iterating through transaction "
                                "blocks.\n");
                        return B_ERROR;
                }

                void* finalData;

                if (blockData->CheckMagic()) {
                        // The journaled block starts with the magic value
                        // We must remove it to prevent confusion
                        TRACE("Journal::_WritePartialTransactionToLog(): Block starts with "
                                "magic number. Escaping it\n");
                        tag->SetEscapedFlag();

                        if (escapedData == NULL) {
                                TRACE("Journal::_WritePartialTransactionToLog(): Allocating "
                                        "space for escaped block (%" B_PRIu32 ")\n", fBlockSize);
                                escapedData = new(std::nothrow) uint8[fBlockSize];
                                if (escapedData == NULL) {
                                        TRACE("Journal::_WritePartialTransactionToLof(): Failed to "
                                                "allocate buffer for escaped data block\n");
                                        return B_NO_MEMORY;
                                }
                                escapedDataDeleter.SetTo(escapedData);
                                *_escapedData = escapedData;

                                ((int32*)escapedData)[0] = 0; // Remove magic
                        }

                        memcpy(escapedData + 4, blockData->data, fBlockSize - 4);
                        finalData = escapedData;
                } else
                        finalData = (void*)blockData;

                // TODO: use iovecs?

                logBlock = _WrapAroundLog(logBlock + 1);

                fsblock_t physicalBlock;
                status = MapBlock(logBlock, physicalBlock);
                if (status != B_OK)
                        return status;

                off_t logOffset = physicalBlock * fBlockSize;

                TRACE("Journal::_WritePartialTransactionToLog(): Writing from memory: "
                        "%p, to disk: %" B_PRIdOFF "\n", finalData, logOffset);
                size_t written = write_pos(fJournalVolume->Device(), logOffset,
                        finalData, fBlockSize);
                if (written != fBlockSize) {
                        TRACE("Failed to write journal block.\n");
                        return B_IO_ERROR;
                }

                TRACE("Journal::_WritePartialTransactionToLog(): Wrote a journal block "
                        "at: %" B_PRIu32 "\n", logBlock);

                blockCount++;
                tag++;

                status = cache_next_block_in_transaction(fFilesystemBlockCache,
                        fTransactionID, detached, &cookie, &blockNumber, NULL, NULL);
        }

        finished = status != B_OK;

        // Write descriptor block
        --tag;
        tag->SetLastTagFlag();

        fsblock_t physicalBlock;
        status = MapBlock(descriptorBlockPos, physicalBlock);
        if (status != B_OK)
                return status;

        off_t descriptorBlockOffset = physicalBlock * fBlockSize;

        TRACE("Journal::_WritePartialTransactionToLog(): Writing to: %" B_PRIdOFF
                "\n", descriptorBlockOffset);
        size_t written = write_pos(fJournalVolume->Device(),
                descriptorBlockOffset, descriptorBlock, fBlockSize);
        if (written != fBlockSize) {
                TRACE("Failed to write journal descriptor block.\n");
                return B_IO_ERROR;
        }

        blockCount++;
        logBlock = _WrapAroundLog(logBlock + 1);

        return B_OK;
}


status_t
Journal::_WriteTransactionToLog()
{
        TRACE("Journal::_WriteTransactionToLog()\n");
        // Transaction enters the Flush state
        bool detached = false;
        TRACE("Journal::_WriteTransactionToLog(): Attempting to get transaction "
                "size\n");
        size_t size = _FullTransactionSize();
        TRACE("Journal::_WriteTransactionToLog(): transaction size: %" B_PRIuSIZE
                "\n", size);

        if (size > fMaxTransactionSize) {
                TRACE("Journal::_WriteTransactionToLog(): not enough free space "
                        "for the transaction. Attempting to free some space.\n");
                size = _MainTransactionSize();
                TRACE("Journal::_WriteTransactionToLog(): main transaction size: %"
                        B_PRIuSIZE "\n", size);

                if (fHasSubTransaction && size < fMaxTransactionSize) {
                        TRACE("Journal::_WriteTransactionToLog(): transaction doesn't fit, "
                                "but it can be separated\n");
                        detached = true;
                } else {
                        // Error: transaction can't fit in log
                        panic("transaction too large (size: %" B_PRIuSIZE ", max size: %"
                                B_PRIu32 ", log size: %" B_PRIu32 ")\n", size,
                                fMaxTransactionSize, fLogSize);
                        return B_BUFFER_OVERFLOW;
                }
        }

        TRACE("Journal::_WriteTransactionToLog(): free log blocks: %" B_PRIu32
                "\n", FreeLogBlocks());
        if (size > FreeLogBlocks()) {
                TRACE("Journal::_WriteTransactionToLog(): Syncing block cache\n");
                cache_sync_transaction(fFilesystemBlockCache, fTransactionID);

                if (size > FreeLogBlocks()) {
                        panic("Transaction fits, but sync didn't result in enough"
                                "free space.\n\tGot %" B_PRIu32 " when at least %" B_PRIuSIZE
                                " was expected.", FreeLogBlocks(), size);
                }
        }

        TRACE("Journal::_WriteTransactionToLog(): finished managing space for "
                "the transaction\n");

        fHasSubTransaction = false;
        if (!fIsStarted)
                StartLog();

        // Prepare Descriptor block
        TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for "
                "the descriptor block, block size %" B_PRIu32 "\n", fBlockSize);
        JournalHeader* descriptorBlock =
                (JournalHeader*)new(std::nothrow) uint8[fBlockSize];
        if (descriptorBlock == NULL) {
                TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer "
                        "for the descriptor block\n");
                return B_NO_MEMORY;
        }
        ArrayDeleter<uint8> descriptorBlockDeleter((uint8*)descriptorBlock);

        descriptorBlock->MakeDescriptor(fCurrentCommitID);

        // Prepare Commit block
        TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for "
                "the commit block, block size %" B_PRIu32 "\n", fBlockSize);
        JournalHeader* commitBlock =
                (JournalHeader*)new(std::nothrow) uint8[fBlockSize];
        if (commitBlock == NULL) {
                TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer "
                        "for the commit block\n");
                return B_NO_MEMORY;
        }
        ArrayDeleter<uint8> commitBlockDeleter((uint8*)commitBlock);

        commitBlock->MakeCommit(fCurrentCommitID + 1);
        memset(commitBlock->data, 0, fBlockSize - sizeof(JournalHeader));
                // TODO: This probably isn't necessary

        uint8* escapedData = NULL;
        ArrayDeleter<uint8> escapedDataDeleter;

        off_t blockNumber;
        long cookie = 0;

        status_t status = cache_next_block_in_transaction(fFilesystemBlockCache,
                fTransactionID, detached, &cookie, &blockNumber, NULL, NULL);
        if (status != B_OK) {
                TRACE("Journal::_WriteTransactionToLog(): Transaction has no blocks to "
                        "write\n");
                return B_OK;
        }

        uint32 blockCount = 0;

        uint32 logBlock = _WrapAroundLog(fLogEnd);

        bool finished = false;

        status = _WritePartialTransactionToLog(descriptorBlock, detached,
                &escapedData, logBlock, blockNumber, cookie, escapedDataDeleter,
                blockCount, finished);
        if (!finished && status != B_OK)
                return status;

        uint32 commitBlockPos = logBlock;

        while (!finished) {
                descriptorBlock->IncrementSequence();

                status = _WritePartialTransactionToLog(descriptorBlock, detached,
                        &escapedData, logBlock, blockNumber, cookie, escapedDataDeleter,
                        blockCount, finished);
                if (!finished && status != B_OK)
                        return status;

                // It is okay to write the commit blocks of the partial transactions
                // as long as the commit block of the first partial transaction isn't
                // written. When it recovery reaches where the first commit should be
                // and doesn't find it, it considers it found the end of the log.

                fsblock_t physicalBlock;
                status = MapBlock(logBlock, physicalBlock);
                if (status != B_OK)
                        return status;

                off_t logOffset = physicalBlock * fBlockSize;

                TRACE("Journal::_WriteTransactionToLog(): Writting commit block to "
                        "%" B_PRIdOFF "\n", logOffset);
                off_t written = write_pos(fJournalVolume->Device(), logOffset,
                        commitBlock, fBlockSize);
                if (written != fBlockSize) {
                        TRACE("Failed to write journal commit block.\n");
                        return B_IO_ERROR;
                }

                commitBlock->IncrementSequence();
                blockCount++;

                logBlock = _WrapAroundLog(logBlock + 1);
        }

        // Transaction will enter the Commit state
        fsblock_t physicalBlock;
        status = MapBlock(commitBlockPos, physicalBlock);
        if (status != B_OK)
                return status;

        off_t logOffset = physicalBlock * fBlockSize;

        TRACE("Journal::_WriteTransactionToLog(): Writing to: %" B_PRIdOFF "\n",
                logOffset);
        off_t written = write_pos(fJournalVolume->Device(), logOffset, commitBlock,
                fBlockSize);
        if (written != fBlockSize) {
                TRACE("Failed to write journal commit block.\n");
                return B_IO_ERROR;
        }

        blockCount++;
        fLogEnd = _WrapAroundLog(fLogEnd + blockCount);

        status = _SaveSuperBlock();

        // Transaction will enter Finished state
        LogEntry *logEntry = new LogEntry(this, fLogEnd, fCurrentCommitID++);
        TRACE("Journal::_WriteTransactionToLog(): Allocating log entry at %p\n",
                logEntry);
        if (logEntry == NULL) {
                panic("no memory to allocate log entries!");
                return B_NO_MEMORY;
        }

        mutex_lock(&fLogEntriesLock);
        fLogEntries.Add(logEntry);
        mutex_unlock(&fLogEntriesLock);

        if (detached) {
                fTransactionID = cache_detach_sub_transaction(fFilesystemBlockCache,
                        fTransactionID, _TransactionWritten, logEntry);
                fUnwrittenTransactions = 1;

                if (status == B_OK && _FullTransactionSize() > fLogSize) {
                        // If the transaction is too large after writing, there is no way to
                        // recover, so let this transaction fail.
                        ERROR("transaction too large (%" B_PRIuSIZE " blocks, log size %"
                                B_PRIu32 ")!\n", _FullTransactionSize(), fLogSize);
                        return B_BUFFER_OVERFLOW;
                }
        } else {
                cache_end_transaction(fFilesystemBlockCache, fTransactionID,
                        _TransactionWritten, logEntry);
                fUnwrittenTransactions = 0;
        }

        return B_OK;
}


status_t
Journal::_SaveSuperBlock()
{
        TRACE("Journal::_SaveSuperBlock()\n");
        fsblock_t physicalBlock;
        status_t status = MapBlock(0, physicalBlock);
        if (status != B_OK)
                return status;

        off_t superblockPos = physicalBlock * fBlockSize;

        JournalSuperBlock superblock;
        size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos,
                &superblock, sizeof(superblock));

        if (bytesRead != sizeof(superblock))
                return B_IO_ERROR;

        superblock.SetFirstCommitID(fFirstCommitID);
        superblock.SetLogStart(fLogStart);

        if (fChecksumEnabled)
                superblock.SetChecksum(_Checksum(&superblock));

        TRACE("Journal::SaveSuperBlock(): Write to %" B_PRIdOFF "\n",
                superblockPos);
        size_t bytesWritten = write_pos(fJournalVolume->Device(), superblockPos,
                &superblock, sizeof(superblock));

        if (bytesWritten != sizeof(superblock))
                return B_IO_ERROR;

        TRACE("Journal::_SaveSuperBlock(): Done\n");

        return B_OK;
}


status_t
Journal::_LoadSuperBlock()
{
        STATIC_ASSERT(sizeof(struct JournalHeader) == 12);
        STATIC_ASSERT(sizeof(struct JournalSuperBlock) == 1024);

        TRACE("Journal::_LoadSuperBlock()\n");
        fsblock_t superblockPos;

        status_t status = MapBlock(0, superblockPos);
        if (status != B_OK)
                return status;

        TRACE("Journal::_LoadSuperBlock(): superblock physical block: %" B_PRIu64
                "\n", superblockPos);

        JournalSuperBlock superblock;
        size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos
                * fJournalVolume->BlockSize(), &superblock, sizeof(superblock));

        if (bytesRead != sizeof(superblock)) {
                ERROR("Journal::_LoadSuperBlock(): failed to read superblock\n");
                return B_IO_ERROR;
        }

        if (!superblock.header.CheckMagic()) {
                ERROR("Journal::_LoadSuperBlock(): Invalid superblock magic %" B_PRIx32
                        "\n", superblock.header.Magic());
                return B_BAD_VALUE;
        }

        if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V1) {
                TRACE("Journal::_LoadSuperBlock(): Journal superblock version 1\n");
                fVersion = 1;
        } else if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V2) {
                TRACE("Journal::_LoadSuperBlock(): Journal superblock version 2\n");
                fVersion = 2;
        } else {
                ERROR("Journal::_LoadSuperBlock(): Invalid superblock version\n");
                return B_BAD_VALUE;
        }

        if (fVersion >= 2) {
                TRACE("Journal::_LoadSuperBlock(): incompatible features %" B_PRIx32
                        ", read-only features %" B_PRIx32 "\n",
                        superblock.IncompatibleFeatures(),
                        superblock.ReadOnlyCompatibleFeatures());

                status = _CheckFeatures(&superblock);

                if (status != B_OK)
                        return status;

                if (fChecksumEnabled) {
                        if (superblock.Checksum() != _Checksum(&superblock)) {
                                ERROR("Journal::_LoadSuperBlock(): Invalid checksum\n");
                                return B_BAD_DATA;
                        }
                        fChecksumSeed = calculate_crc32c(0xffffffff, (uint8*)superblock.uuid,
                                sizeof(superblock.uuid));
                }
        }

        fBlockSize = superblock.BlockSize();
        fFirstCommitID = superblock.FirstCommitID();
        fFirstLogBlock = superblock.FirstLogBlock();
        fLogStart = superblock.LogStart();
        fLogSize = superblock.NumBlocks();

        uint32 descriptorTags = (fBlockSize - sizeof(JournalHeader))
                / sizeof(JournalBlockTag);
                // Maximum tags per descriptor block
        uint32 maxDescriptors = (fLogSize - 1) / (descriptorTags + 2);
                // Maximum number of full journal transactions
        fMaxTransactionSize = maxDescriptors * descriptorTags;
        fMaxTransactionSize += (fLogSize - 1) - fMaxTransactionSize - 2;
                // Maximum size of a "logical" transaction
                // TODO: Why is "superblock.MaxTransactionBlocks();" zero?
        //fFirstCacheCommitID = fFirstCommitID - fTransactionID /*+ 1*/;

        TRACE("Journal::_LoadSuperBlock(): block size: %" B_PRIu32 ", first commit"
                " id: %" B_PRIu32 ", first log block: %" B_PRIu32 ", log start: %"
                B_PRIu32 ", log size: %" B_PRIu32 ", max transaction size: %" B_PRIu32
                "\n", fBlockSize, fFirstCommitID, fFirstLogBlock, fLogStart,
                fLogSize, fMaxTransactionSize);

        return B_OK;
}


status_t
Journal::_CheckFeatures(JournalSuperBlock* superblock)
{
        uint32 readonly = superblock->ReadOnlyCompatibleFeatures();
        uint32 incompatible = superblock->IncompatibleFeatures();
        bool hasReadonly = (readonly & ~JOURNAL_KNOWN_READ_ONLY_COMPATIBLE_FEATURES)
                != 0;
        bool hasIncompatible = (incompatible
                & ~JOURNAL_KNOWN_INCOMPATIBLE_FEATURES) != 0;
        if (hasReadonly || hasIncompatible ) {
                ERROR("Journal::_CheckFeatures(): Unsupported features: %" B_PRIx32
                        " %" B_PRIx32 "\n", readonly, incompatible);
                return B_UNSUPPORTED;
        }

        bool hasCsumV2 =
                (superblock->IncompatibleFeatures() & JOURNAL_FEATURE_INCOMPATIBLE_CSUM_V2) != 0;
        bool hasCsumV3 =
                (superblock->IncompatibleFeatures() & JOURNAL_FEATURE_INCOMPATIBLE_CSUM_V3) != 0;
        if (hasCsumV2 && hasCsumV3) {
                return B_BAD_VALUE;
        }

        fChecksumEnabled = hasCsumV2 && hasCsumV3;
        fChecksumV3Enabled = hasCsumV3;
        fFeature64bits =
                (superblock->IncompatibleFeatures() & JOURNAL_FEATURE_INCOMPATIBLE_64BIT) != 0;
        return B_OK;
}


uint32
Journal::_Checksum(JournalSuperBlock* superblock)
{
        uint32 oldChecksum = superblock->checksum;
        superblock->checksum = 0;
        uint32 checksum = calculate_crc32c(0xffffffff, (uint8*)superblock,
                sizeof(JournalSuperBlock));
        superblock->checksum = oldChecksum;
        return checksum;
}


bool
Journal::_Checksum(uint8* block, bool set)
{
        JournalBlockTail *tail = (JournalBlockTail*)(block + fBlockSize
                - sizeof(JournalBlockTail));
        uint32 oldChecksum = tail->checksum;
        tail->checksum = 0;
        uint32 checksum = calculate_crc32c(0xffffffff, block, fBlockSize);
        if (set) {
                tail->checksum = checksum;
        } else {
                tail->checksum = oldChecksum;
        }
        return checksum == oldChecksum;
}


uint32
Journal::_CountTags(JournalHeader* descriptorBlock)
{
        uint32 count = 0;
        size_t tagSize = _TagSize();
        size_t size = fBlockSize;

        if (fChecksumEnabled)
                size -= sizeof(JournalBlockTail);

        JournalBlockTag* tags = (JournalBlockTag*)descriptorBlock->data;
                // Skip the header
        JournalBlockTag* lastTag = (JournalBlockTag*)
                (descriptorBlock + size - tagSize);

        while (tags < lastTag && (tags->Flags() & JOURNAL_FLAG_LAST_TAG) == 0) {
                if ((tags->Flags() & JOURNAL_FLAG_SAME_UUID) == 0)
                        tags = (JournalBlockTag*)((uint8*)tags + 16); // Skip new UUID

                TRACE("Journal::_CountTags(): Tag block: %" B_PRIu32 "\n",
                        tags->BlockNumber());

                tags = (JournalBlockTag*)((uint8*)tags + tagSize); // Go to next tag
                count++;
        }

        if ((tags->Flags() & JOURNAL_FLAG_LAST_TAG) != 0)
                count++;

        TRACE("Journal::_CountTags(): counted tags: %" B_PRIu32 "\n", count);

        return count;
}


size_t
Journal::_TagSize()
{
        if (fChecksumV3Enabled)
                return sizeof(JournalBlockTagV3);

        size_t size = sizeof(JournalBlockTag);
        if (fChecksumEnabled)
                size += sizeof(uint16);
        if (!fFeature64bits)
                size -= sizeof(uint32);
        return size;
}


/*virtual*/ status_t
Journal::Recover()
{
        TRACE("Journal::Recover()\n");
        if (fLogStart == 0) // Journal was cleanly unmounted
                return B_OK;

        TRACE("Journal::Recover(): Journal needs recovery\n");

        uint32 lastCommitID;

        status_t status = _RecoverPassScan(lastCommitID);
        if (status != B_OK)
                return status;

        status = _RecoverPassRevoke(lastCommitID);
        if (status != B_OK)
                return status;

        return _RecoverPassReplay(lastCommitID);
}


// First pass: Find the end of the log
status_t
Journal::_RecoverPassScan(uint32& lastCommitID)
{
        TRACE("Journal Recover: 1st Pass: Scan\n");

        CachedBlock cached(fJournalVolume);
        JournalHeader* header;
        uint32 nextCommitID = fFirstCommitID;
        uint32 nextBlock = fLogStart;
        fsblock_t nextBlockPos;

        status_t status = MapBlock(nextBlock, nextBlockPos);
        if (status != B_OK)
                return status;

        header = (JournalHeader*)cached.SetTo(nextBlockPos);

        while (header->CheckMagic() && header->Sequence() == nextCommitID) {
                uint32 blockType = header->BlockType();

                if (blockType == JOURNAL_DESCRIPTOR_BLOCK) {
                        if (fChecksumEnabled && !_Checksum((uint8*)header, false)) {
                                ERROR("Journal::_RecoverPassScan(): Invalid checksum\n");
                                return B_BAD_DATA;
                        }
                        uint32 tags = _CountTags(header);
                        nextBlock += tags;
                        TRACE("Journal recover pass scan: Found a descriptor block with "
                                "%" B_PRIu32 " tags\n", tags);
                } else if (blockType == JOURNAL_COMMIT_BLOCK) {
                        nextCommitID++;
                        TRACE("Journal recover pass scan: Found a commit block. Next "
                                "commit ID: %" B_PRIu32 "\n", nextCommitID);
                } else if (blockType != JOURNAL_REVOKE_BLOCK) {
                        TRACE("Journal recover pass scan: Reached an unrecognized block, "
                                "assuming as log's end.\n");
                        break;
                } else {
                        TRACE("Journal recover pass scan: Found a revoke block, "
                                "skipping it\n");
                }

                nextBlock = _WrapAroundLog(nextBlock + 1);

                status = MapBlock(nextBlock, nextBlockPos);
                if (status != B_OK)
                        return status;

                header = (JournalHeader*)cached.SetTo(nextBlockPos);
        }

        TRACE("Journal Recovery pass scan: Last detected transaction ID: %"
                B_PRIu32 "\n", nextCommitID);

        lastCommitID = nextCommitID;
        return B_OK;
}


// Second pass: Collect all revoked blocks
status_t
Journal::_RecoverPassRevoke(uint32 lastCommitID)
{
        TRACE("Journal Recover: 2nd Pass: Revoke\n");

        CachedBlock cached(fJournalVolume);
        JournalHeader* header;
        uint32 nextCommitID = fFirstCommitID;
        uint32 nextBlock = fLogStart;
        fsblock_t nextBlockPos;

        status_t status = MapBlock(nextBlock, nextBlockPos);
        if (status != B_OK)
                return status;

        header = (JournalHeader*)cached.SetTo(nextBlockPos);

        while (nextCommitID < lastCommitID) {
                if (!header->CheckMagic() || header->Sequence() != nextCommitID) {
                        // Somehow the log is different than the expexted
                        return B_ERROR;
                }

                uint32 blockType = header->BlockType();

                if (blockType == JOURNAL_DESCRIPTOR_BLOCK)
                        nextBlock += _CountTags(header);
                else if (blockType == JOURNAL_COMMIT_BLOCK)
                        nextCommitID++;
                else if (blockType == JOURNAL_REVOKE_BLOCK) {
                        TRACE("Journal::_RecoverPassRevoke(): Found a revoke block\n");
                        status = fRevokeManager->ScanRevokeBlock(
                                (JournalRevokeHeader*)header, nextCommitID);

                        if (status != B_OK)
                                return status;
                } else {
                        WARN("Journal::_RecoverPassRevoke(): Found an unrecognized block\n");
                        break;
                }

                nextBlock = _WrapAroundLog(nextBlock + 1);

                status = MapBlock(nextBlock, nextBlockPos);
                if (status != B_OK)
                        return status;

                header = (JournalHeader*)cached.SetTo(nextBlockPos);
        }

        if (nextCommitID != lastCommitID) {
                // Possibly because of some sort of IO error
                TRACE("Journal::_RecoverPassRevoke(): Incompatible commit IDs\n");
                return B_ERROR;
        }

        TRACE("Journal recovery pass revoke: Revoked blocks: %" B_PRIu32 "\n",
                fRevokeManager->NumRevokes());

        return B_OK;
}


// Third pass: Replay log
status_t
Journal::_RecoverPassReplay(uint32 lastCommitID)
{
        TRACE("Journal Recover: 3rd Pass: Replay\n");

        uint32 nextCommitID = fFirstCommitID;
        uint32 nextBlock = fLogStart;
        fsblock_t nextBlockPos;

        status_t status = MapBlock(nextBlock, nextBlockPos);
        if (status != B_OK)
                return status;

        CachedBlock cached(fJournalVolume);
        JournalHeader* header = (JournalHeader*)cached.SetTo(nextBlockPos);

        int count = 0;

        uint8* data = new(std::nothrow) uint8[fBlockSize];
        if (data == NULL) {
                TRACE("Journal::_RecoverPassReplay(): Failed to allocate memory for "
                        "data\n");
                return B_NO_MEMORY;
        }

        ArrayDeleter<uint8> dataDeleter(data);

        while (nextCommitID < lastCommitID) {
                if (!header->CheckMagic() || header->Sequence() != nextCommitID) {
                        // Somehow the log is different than the expected
                        ERROR("Journal::_RecoverPassReplay(): Weird problem with block\n");
                        return B_ERROR;
                }

                uint32 blockType = header->BlockType();

                if (blockType == JOURNAL_DESCRIPTOR_BLOCK) {
                        JournalBlockTag* last_tag = (JournalBlockTag*)((uint8*)header
                                + fBlockSize - sizeof(JournalBlockTag));

                        for (JournalBlockTag* tag = (JournalBlockTag*)header->data;
                                tag <= last_tag; ++tag) {
                                nextBlock = _WrapAroundLog(nextBlock + 1);

                                status = MapBlock(nextBlock, nextBlockPos);
                                if (status != B_OK)
                                        return status;

                                if (!fRevokeManager->Lookup(tag->BlockNumber(),
                                                nextCommitID)) {
                                        // Block isn't revoked
                                        size_t read = read_pos(fJournalVolume->Device(),
                                                nextBlockPos * fBlockSize, data, fBlockSize);
                                        if (read != fBlockSize)
                                                return B_IO_ERROR;

                                        if ((tag->Flags() & JOURNAL_FLAG_ESCAPED) != 0) {
                                                // Block is escaped
                                                ((int32*)data)[0]
                                                        = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
                                        }

                                        TRACE("Journal::_RevoverPassReplay(): Write to %" B_PRIu32
                                                "\n", tag->BlockNumber() * fBlockSize);
                                        size_t written = write_pos(fFilesystemVolume->Device(),
                                                tag->BlockNumber() * fBlockSize, data, fBlockSize);

                                        if (written != fBlockSize)
                                                return B_IO_ERROR;

                                        ++count;
                                }

                                if ((tag->Flags() & JOURNAL_FLAG_LAST_TAG) != 0)
                                        break;
                                if ((tag->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) {
                                        // TODO: Check new UUID with file system UUID
                                        tag += 2;
                                                // sizeof(JournalBlockTag) = 8
                                                // sizeof(UUID) = 16
                                }
                        }
                } else if (blockType == JOURNAL_COMMIT_BLOCK)
                        nextCommitID++;
                else if (blockType != JOURNAL_REVOKE_BLOCK) {
                        WARN("Journal::_RecoverPassReplay(): Found an unrecognized block\n");
                        break;
                } // If blockType == JOURNAL_REVOKE_BLOCK we just skip it

                nextBlock = _WrapAroundLog(nextBlock + 1);

                status = MapBlock(nextBlock, nextBlockPos);
                if (status != B_OK)
                        return status;

                header = (JournalHeader*)cached.SetTo(nextBlockPos);
        }

        if (nextCommitID != lastCommitID) {
                // Possibly because of some sort of IO error
                return B_ERROR;
        }

        TRACE("Journal recovery pass replay: Replayed blocks: %u\n", count);

        return B_OK;
}


status_t
Journal::_FlushLog(bool canWait, bool flushBlocks)
{
        TRACE("Journal::_FlushLog()\n");
        status_t status = canWait ? recursive_lock_lock(&fLock)
                : recursive_lock_trylock(&fLock);

        TRACE("Journal::_FlushLog(): Acquired fLock, recursion: %" B_PRId32 "\n",
                recursive_lock_get_recursion(&fLock));
        if (status != B_OK)
                return status;

        if (recursive_lock_get_recursion(&fLock) > 1) {
                // Called from inside a transaction
                recursive_lock_unlock(&fLock);
                TRACE("Journal::_FlushLog(): Called from a transaction. Leaving...\n");
                return B_OK;
        }

        if (fUnwrittenTransactions != 0 && _FullTransactionSize() != 0) {
                status = _WriteTransactionToLog();
                if (status < B_OK)
                        panic("Failed flushing transaction: %s\n", strerror(status));
        }

        TRACE("Journal::_FlushLog(): Attempting to flush journal volume at %p\n",
                fJournalVolume);

        // TODO: Not sure this is correct. Need to review...
        // NOTE: Not correct. Causes double lock of a block cache mutex
        // TODO: Need some other way to synchronize the journal...
        /*status = fJournalVolume->FlushDevice();
        if (status != B_OK)
                return status;*/

        TRACE("Journal::_FlushLog(): Flushed journal volume\n");

        if (flushBlocks) {
                TRACE("Journal::_FlushLog(): Attempting to flush file system volume "
                        "at %p\n", fFilesystemVolume);
                status = fFilesystemVolume->FlushDevice();
                if (status == B_OK)
                        TRACE("Journal::_FlushLog(): Flushed file system volume\n");
        }

        TRACE("Journal::_FlushLog(): Finished. Releasing lock\n");

        recursive_lock_unlock(&fLock);

        TRACE("Journal::_FlushLog(): Done, final status: %s\n", strerror(status));
        return status;
}


inline uint32
Journal::_WrapAroundLog(uint32 block)
{
        TRACE("Journal::_WrapAroundLog()\n");
        if (block >= fLogSize)
                return block - fLogSize + fFirstLogBlock;
        else
                return block;
}


size_t
Journal::_CurrentTransactionSize() const
{
        TRACE("Journal::_CurrentTransactionSize(): transaction %" B_PRIu32 "\n",
                fTransactionID);

        size_t count;

        if (fHasSubTransaction) {
                count = cache_blocks_in_sub_transaction(fFilesystemBlockCache,
                        fTransactionID);

                TRACE("\tSub transaction size: %" B_PRIuSIZE "\n", count);
        } else {
                count =  cache_blocks_in_transaction(fFilesystemBlockCache,
                        fTransactionID);

                TRACE("\tTransaction size: %" B_PRIuSIZE "\n", count);
        }

        return count;
}


size_t
Journal::_FullTransactionSize() const
{
        TRACE("Journal::_FullTransactionSize(): transaction %" B_PRIu32 "\n",
                fTransactionID);
        TRACE("\tFile sytem block cache: %p\n", fFilesystemBlockCache);

        size_t count = cache_blocks_in_transaction(fFilesystemBlockCache,
                 fTransactionID);

        TRACE("\tFull transaction size: %" B_PRIuSIZE "\n", count);

        return count;
}


size_t
Journal::_MainTransactionSize() const
{
        TRACE("Journal::_MainTransactionSize(): transaction %" B_PRIu32 "\n",
                fTransactionID);

        size_t count =  cache_blocks_in_main_transaction(fFilesystemBlockCache,
                fTransactionID);

        TRACE("\tMain transaction size: %" B_PRIuSIZE "\n", count);

        return count;
}


status_t
Journal::_TransactionDone(bool success)
{
        if (!success) {
                if (fHasSubTransaction) {
                        TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32
                                " failed, aborting subtransaction\n", fTransactionID);
                        cache_abort_sub_transaction(fFilesystemBlockCache, fTransactionID);
                        // parent is unaffected
                } else {
                        TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32
                                " failed, aborting\n", fTransactionID);
                        cache_abort_transaction(fFilesystemBlockCache, fTransactionID);
                        fUnwrittenTransactions = 0;
                }

                TRACE("Journal::_TransactionDone(): returning B_OK\n");
                return B_OK;
        }

        // If possible, delay flushing the transaction
        uint32 size = _FullTransactionSize();
        TRACE("Journal::_TransactionDone(): full transaction size: %" B_PRIu32
                ", max transaction size: %" B_PRIu32 ", free log blocks: %" B_PRIu32
                "\n", size, fMaxTransactionSize, FreeLogBlocks());
        if (fMaxTransactionSize > 0 && size < fMaxTransactionSize) {
                TRACE("Journal::_TransactionDone(): delaying flush of transaction "
                        "%" B_PRIu32 "\n", fTransactionID);

                // Make sure the transaction fits in the log
                if (size < FreeLogBlocks())
                        cache_sync_transaction(fFilesystemBlockCache, fTransactionID);

                fUnwrittenTransactions++;
                TRACE("Journal::_TransactionDone(): returning B_OK\n");
                return B_OK;
        }

        return _WriteTransactionToLog();
}


/*static*/ void
Journal::_TransactionWritten(int32 transactionID, int32 event, void* _logEntry)
{
        LogEntry* logEntry = (LogEntry*)_logEntry;

        TRACE("Journal::_TransactionWritten(): Transaction %" B_PRIu32
                " checkpointed\n", transactionID);

        Journal* journal = logEntry->GetJournal();

        TRACE("Journal::_TransactionWritten(): log entry: %p, journal: %p\n",
                logEntry, journal);
        TRACE("Journal::_TransactionWritten(): log entries: %p\n",
                &journal->fLogEntries);

        mutex_lock(&journal->fLogEntriesLock);

        TRACE("Journal::_TransactionWritten(): first log entry: %p\n",
                journal->fLogEntries.First());
        if (logEntry == journal->fLogEntries.First()) {
                TRACE("Journal::_TransactionWritten(): Moving start of log to %"
                        B_PRIu32 "\n", logEntry->Start());
                journal->fLogStart = logEntry->Start();
                journal->fFirstCommitID = logEntry->CommitID();
                TRACE("Journal::_TransactionWritten(): Setting commit ID to %" B_PRIu32
                        "\n", logEntry->CommitID());

                if (journal->_SaveSuperBlock() != B_OK)
                        panic("ext2: Failed to write journal superblock\n");
        }

        TRACE("Journal::_TransactionWritten(): Removing log entry\n");
        journal->fLogEntries.Remove(logEntry);

        TRACE("Journal::_TransactionWritten(): Unlocking entries list\n");
        mutex_unlock(&journal->fLogEntriesLock);

        TRACE("Journal::_TransactionWritten(): Deleting log entry at %p\n", logEntry);
        delete logEntry;
}


/*static*/ void
Journal::_TransactionIdle(int32 transactionID, int32 event, void* _journal)
{
        Journal* journal = (Journal*)_journal;
        journal->_FlushLog(false, false);
}