root/src/kits/storage/mime/SnifferRules.cpp
/*
 * Copyright 2002-2006, Haiku Inc.
 * Distributed under the terms of the MIT License.
 *
 * Authors:
 *              Tyler Dauwalder
 *              Ingo Weinhold, bonefish@users.sf.net
 */

/*!
        \file SnifferRules.cpp
        SnifferRules class implementation
*/

#include <mime/SnifferRules.h>

#include <stdio.h>
#include <sys/stat.h>

#include <Directory.h>
#include <Entry.h>
#include <File.h>
#include <MimeType.h>
#include <mime/database_support.h>
#include <mime/DatabaseDirectory.h>
#include <mime/DatabaseLocation.h>
#include <mime/MimeSniffer.h>
#include <StorageDefs.h>
#include <storage_support.h>
#include <String.h>

#include "../sniffer/Parser.h"
#include "../sniffer/Rule.h"
#include "../sniffer/Data.h"


#define DBG(x) x
//#define DBG(x)
#define OUT printf

namespace BPrivate {
namespace Storage {
namespace Mime {

using namespace BPrivate::Storage;

/*!
        \struct SnifferRules::sniffer_rule
        \brief A parsed sniffer rule and its corresponding mime type and rule string

        The parse sniffer rule is stored in the \c rule member, which is a pointer
        to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule
        objects (as opposed to \c sniffer_rule pointers) to be used with STL objects
        without unnecessary copying. As a consequence of this decision, the
        \c SnifferRules object managing the rule list is responsible for actually
        deleting each \c sniffer_rule's \c Sniffer::Rule object.
*/

// sniffer_rule Constructor
//! Creates a new \c sniffer_rule object
SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule)
        : rule(rule)
{
}

// sniffer_rule Destructor
//! Destroys the \c sniffer_rule object.
/*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule
        object's \c rule member is *NOT* deleted by this function.
*/
SnifferRules::sniffer_rule::~sniffer_rule()
{
}

// private functions
/*! \brief Returns true if \a left's priority is greater than \a right's

        This may seem slightly backwards, but since sort() using
        operator<() sorts in ascending order, we say "left < right"
        if "left.priority > right.priority" to get them sorted in
        ascending order. Super, no?

        Also, sniffer_rule objects with \c NULL \c rule members are
        treated as having minimal priority (and thus are placed at
        the end of the list of rules).

        Finally, sniffer_rule objects that are otherwise equal are
        sorted in reverse alphabetic order (thus placing sniffer
        rules for supertypes *after* sniffer rules for subtypes
        of said supertype when both rules have identical priorities).
*/
bool operator<(const SnifferRules::sniffer_rule &left, const SnifferRules::sniffer_rule &right)
{
        if (left.rule && right.rule) {
                double leftPriority = left.rule->Priority();
                double rightPriority = right.rule->Priority();
                if (leftPriority > rightPriority) {
                        return true;    // left < right
                } else if (rightPriority > leftPriority) {
                        return false;   // right < left
                } else {
                        return left.type > right.type;
                }
        } else if (left.rule) {
                return true;    // left < right
        } else {
                return false;   // right < left
        }
}

/*!
        \class SnifferRules
        \brief Manages the sniffer rules for the entire database
*/

// Constructor
//! Constructs a new SnifferRules object
SnifferRules::SnifferRules(DatabaseLocation* databaseLocation,
        MimeSniffer* mimeSniffer)
        :
        fDatabaseLocation(databaseLocation),
        fMimeSniffer(mimeSniffer),
        fMaxBytesNeeded(0),
        fHaveDoneFullBuild(false)
{
}

// Destructor
/*! \brief Destroys the \c SnifferRules object and all dynamically allocated
        \c Sniffer::Rule objects scattered throughout the rule list in
        \c sniffer_rule::rule members.
*/
SnifferRules::~SnifferRules()
{
        for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
                   i != fRuleList.end(); i++) {
                delete i->rule;
                i->rule = NULL;
        }
}

// GuessMimeType
/*!     \brief Guesses a MIME type for the supplied entry_ref.

        Only the data in the given entry is considered, not the filename or
        its extension. Please see GuessMimeType(BFile*, const void *, int32,
        BString*) for more details.

        \param ref The entry to sniff
        \param type Pointer to a pre-allocated BString which is set to the
                   resulting MIME type.
        \return
        - \c B_OK: success
        - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
        - error code: failure
*/
status_t
SnifferRules::GuessMimeType(const entry_ref *ref, BString *type)
{
        status_t err = ref && type ? B_OK : B_BAD_VALUE;
        ssize_t bytes = 0;
        char *buffer = NULL;
        BFile file;

        // First find out the max number of bytes we need to read
        // from the file to fully accomodate all of our currently
        // installed sniffer rules
        if (!err) {
                bytes = MaxBytesNeeded();
                if (bytes < 0)
                        err = bytes;
        }

        // Next read that many bytes (or fewer, if the file isn't
        // that long) into a buffer
        if (!err) {
                buffer = new(std::nothrow) char[bytes];
                if (!buffer)
                        err = B_NO_MEMORY;
        }

        if (!err)
                err = file.SetTo(ref, B_READ_ONLY);
        if (!err) {
                bytes = file.Read(buffer, bytes);
                if (bytes < 0)
                        err = bytes;
        }

        // Now sniff the buffer
        if (!err)
                err = GuessMimeType(&file, buffer, bytes, type);

        delete[] buffer;

        return err;
}

// GuessMimeType
/*!     \brief Guesses a MIME type for the given chunk of data.

        Please see GuessMimeType(BFile*, const void *, int32, BString*) for more
        details.

        \param buffer Pointer to a data buffer to sniff
        \param length The length of the data buffer pointed to by \a buffer
        \param type Pointer to a pre-allocated BString which is set to the
                   resulting MIME type.
        \return
        - \c B_OK: success
        - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
        - error code: failure
*/
status_t
SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type)
{
        return GuessMimeType(NULL, buffer, length, type);
}

// SetSnifferRule
/*! Updates the sniffer rule for the given type

        If the a rule currently exists in the rule list for the given type,
        it is first removed before the new rule is inserted.

        The new rule is inserted in its proper, sorted position in the list.

        \param type The type of interest
        \param rule The new sniffer rule
        \return
        - \c B_OK: success
        - other error code: failure
*/
status_t
SnifferRules::SetSnifferRule(const char *type, const char *rule)
{
        status_t err = type && rule ? B_OK : B_BAD_VALUE;
        if (!err && !fHaveDoneFullBuild)
                return B_OK;

        sniffer_rule item(new Sniffer::Rule());
        BString parseError;

        // Check the mem alloc
        if (!err)
                err = item.rule ? B_OK : B_NO_MEMORY;
        // Prepare the sniffer_rule
        if (!err) {
                item.type = type;
                item.rule_string = rule;
                err = Sniffer::parse(rule, item.rule, &parseError);
                if (err)
                        DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n",
                                parseError.String()));
        }
        // Remove any previous rule for this type
        if (!err)
                err = DeleteSnifferRule(type);
        // Insert the new rule at the proper position in
        // the sorted rule list (remembering that our list
        // is sorted in ascending order using
        // operator<(sniffer_rule&, sniffer_rule&))
        if (!err) {
                std::list<sniffer_rule>::iterator i;
                for (i = fRuleList.begin(); i != fRuleList.end(); i++) {
                         if (item < (*i)) {
                                fRuleList.insert(i, item);
                                break;
                         }
                }
                if (i == fRuleList.end())
                        fRuleList.push_back(item);
        }

        return err;
}

// DeleteSnifferRule
/*! \brief Removes the sniffer rule for the given type from the rule list
        \param type The type of interest
        \return
        - \c B_OK: success (even if no rule existed for the given type)
        - other error code: failure
*/
status_t
SnifferRules::DeleteSnifferRule(const char *type)
{
        status_t err = type ? B_OK : B_BAD_VALUE;
        if (!err && !fHaveDoneFullBuild)
                return B_OK;

        // Find the rule in the list and remove it
        for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
                   i != fRuleList.end(); i++) {
                if (i->type == type) {
                        fRuleList.erase(i);
                        break;
                }
        }

        return err;
}

// PrintToStream
//! Dumps the list of sniffer rules in sorted order to standard output
void
SnifferRules::PrintToStream() const
{
        printf("\n");
        printf("--------------\n");
        printf("Sniffer Rules:\n");
        printf("--------------\n");

        if (fHaveDoneFullBuild) {
                for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
                           i != fRuleList.end(); i++) {
                        printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str());
                }
        } else {
                printf("You haven't built your rule list yet, chump. ;-)\n");
        }
}

// BuildRuleList
/*! \brief Crawls through the database, parses each sniffer rule it finds, adds
        each parsed rule to the rule list, and sorts the list by priority, largest first.

        Initial MaxBytesNeeded() info is compiled by this function as well.
*/
status_t
SnifferRules::BuildRuleList()
{
        fRuleList.clear();

        ssize_t maxBytesNeeded = 0;
        ssize_t bytesNeeded = 0;
        DatabaseDirectory root;

        status_t err = root.Init(fDatabaseLocation);
        if (!err) {
                root.Rewind();
                while (true) {
                        BEntry entry;
                        err = root.GetNextEntry(&entry);
                        if (err) {
                                // If we've come to the end of list, it's not an error
                                if (err == B_ENTRY_NOT_FOUND)
                                        err = B_OK;
                                break;
                        } else {
                                // Check that this entry is both a directory and a valid MIME string
                                char supertype[B_PATH_NAME_LENGTH];
                                if (entry.IsDirectory()
                                      && entry.GetName(supertype) == B_OK
                                         && BMimeType::IsValid(supertype)) {
                                        // Make sure the supertype string is all lowercase
                                        BPrivate::Storage::to_lower(supertype);

                                        // First, iterate through this supertype directory and process
                                        // all of its subtypes
                                        DatabaseDirectory dir;
                                        if (dir.Init(fDatabaseLocation, supertype) == B_OK) {
                                                dir.Rewind();
                                                while (true) {
                                                        BEntry subEntry;
                                                        err = dir.GetNextEntry(&subEntry);
                                                        if (err) {
                                                                // If we've come to the end of list, it's not an error
                                                                if (err == B_ENTRY_NOT_FOUND)
                                                                        err = B_OK;
                                                                break;
                                                        } else {
                                                                // Get the subtype's name
                                                                char subtype[B_PATH_NAME_LENGTH];
                                                                if (subEntry.GetName(subtype) == B_OK) {
                                                                        BPrivate::Storage::to_lower(subtype);

                                                                        BString fulltype;
                                                                        fulltype.SetToFormat("%s/%s", supertype, subtype);

                                                                        // Process the subtype
                                                                        ProcessType(fulltype, &bytesNeeded);
                                                                        if (bytesNeeded > maxBytesNeeded)
                                                                                maxBytesNeeded = bytesNeeded;
                                                                }
                                                        }
                                                }
                                        } else {
                                                DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
                                                          "Failed opening supertype directory '%s'\n",
                                                            supertype));
                                        }

                                        // Second, process the supertype
                                        ProcessType(supertype, &bytesNeeded);
                                        if (bytesNeeded > maxBytesNeeded)
                                                maxBytesNeeded = bytesNeeded;
                                }
                        }
                }
        } else {
                DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
                          "Failed opening mime database directory.\n"));
        }

        if (!err) {
                fRuleList.sort();
                fMaxBytesNeeded = maxBytesNeeded;
                fHaveDoneFullBuild = true;
//              PrintToStream();
        } else {
                DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%"
                        B_PRIx32 "\n", err));
        }
        return err;
}

// GuessMimeType
/*!     \brief Guesses a MIME type for the supplied chunk of data.

        This is accomplished by searching through the currently installed
        list of sniffer rules for a rule that matches on the given data buffer.
        Rules are searched in order of priority (higher priority first). Rules
        of equal priority are searched in reverse-alphabetical order (that way
        "supertype/subtype" form rules are checked before "supertype-only" form
        rules if their priorities happen to be identical).

        \param file The file to sniff. May be \c NULL. \a buffer is always given.
        \param buffer Pointer to a data buffer to sniff
        \param length The length of the data buffer pointed to by \a buffer
        \param type Pointer to a pre-allocated BString which is set to the
                   resulting MIME type.
        \return
        - \c B_OK: success
        - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
        - error code: failure
*/
status_t
SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length,
        BString *type)
{
        status_t err = buffer && type ? B_OK : B_BAD_VALUE;
        if (err)
                return err;

        // wrap the buffer in Sniffer::Data
        Sniffer::Data data;
        data.from = 0;
        data.buffer = (const uint8*)buffer;
        data.length = length;

        if (!fHaveDoneFullBuild)
                err = BuildRuleList();

        // first ask the MIME sniffer for a suitable type
        float addonPriority = -1;
        BMimeType mimeType;
        if (!err && fMimeSniffer != NULL) {
                addonPriority = fMimeSniffer->GuessMimeType(file, buffer, length,
                        &mimeType);
        }

        if (!err) {
                // Run through our rule list, which is sorted in order of
                // descreasing priority, and see if one of the rules sniffs
                // out a match
                for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
                           i != fRuleList.end(); i++) {
                        if (i->rule) {
                                // If an add-on identified the type with a priority at least
                                // as great as the remaining rules, we can stop further
                                // processing and return the type found by the add-on.
                                if (i->rule->Priority() <= addonPriority) {
                                        *type = mimeType.Type();
                                        return B_OK;
                                }

                                if (i->rule->Sniff(data)) {
                                        type->SetTo(i->type.c_str());
                                        return B_OK;
                                }
                        } else {
                                DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): "
                                        "NULL sniffer_rule::rule member found in rule list for type == '%s', "
                                        "rule_string == '%s'\n",
                                        i->type.c_str(), i->rule_string.c_str()));
                        }
                }

                // The sniffer add-on manager might have returned a low priority
                // (lower than any of a rule).
                if (addonPriority >= 0) {
                        *type = mimeType.Type();
                        return B_OK;
                }

                // If we get here, we didn't find a damn thing
                err = kMimeGuessFailureError;
        }
        return err;
}

// MaxBytesNeeded
/*! \brief Returns the maxmimum number of bytes needed in a data buffer for
        all the currently installed rules to be able to perform a complete sniff,
        or an error code if something goes wrong.

        If the internal rule list has not yet been built (this includes parsing
        all the installed rules), it will be.

        \return: If the return value is non-negative, it represents     the max number
        of bytes needed to do a complete sniff. Otherwise, the number returned is
        an error code.
*/
ssize_t
SnifferRules::MaxBytesNeeded()
{
        ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList();
        if (!err) {
                err = fMaxBytesNeeded;

                if (fMimeSniffer != NULL) {
                        fMaxBytesNeeded = max_c(fMaxBytesNeeded,
                                (ssize_t)fMimeSniffer->MinimalBufferSize());
                }
        }
        return err;
}

// ProcessType
/*! \brief Handles a portion of the initial rule list construction for
        the given mime type.

        \note To be called by BuildRuleList() *ONLY*. :-)

        \param type The mime type of interest. The mime string is expected to be valid
                    and lowercase. Both "supertype" and "supertype/subtype" mime types
                    are allowed.
        \param bytesNeeded Returns the minimum number of bytes needed for this rule to
                           perform a complete sniff. May not be NULL because I'm lazy
                           and this function is for internal use only anyway.
        \return
        The return value is essentially ignored (as this function prints out the
        debug warning if a parse fails), but that being said:
        - \c B_OK: success
        - \c other error code: failure
*/
status_t
SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded)
{
        status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE;
        if (!err)
                *bytesNeeded = 0;

        BString str;
        BString errorMsg;
        sniffer_rule rule(new Sniffer::Rule());

        // Check the mem alloc
        if (!err)
                err = rule.rule ? B_OK : B_NO_MEMORY;
        // Read the attr
        if (!err) {
                err = fDatabaseLocation->ReadStringAttribute(type, kSnifferRuleAttr,
                        str);
        }
        // Parse the rule
        if (!err) {
                err = Sniffer::parse(str.String(), rule.rule, &errorMsg);
                if (err)
                        DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String()));
        }
        if (!err) {
                // Note the bytes needed
                *bytesNeeded = rule.rule->BytesNeeded();

                // Add the rule to the list
                rule.type = type;
                rule.rule_string = str.String();
                fRuleList.push_back(rule);
        }
        return err;
}

} // namespace Mime
} // namespace Storage
} // namespace BPrivate