root/fs/ntfs3/dir.c
// SPDX-License-Identifier: GPL-2.0
/*
 *
 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
 *
 *  Directory handling functions for NTFS-based filesystems.
 *
 */

#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/nls.h>

#include "debug.h"
#include "ntfs.h"
#include "ntfs_fs.h"

/* Convert little endian UTF-16 to NLS string. */
int ntfs_utf16_to_nls(struct ntfs_sb_info *sbi, const __le16 *name, u32 len,
                      u8 *buf, int buf_len)
{
        int ret, warn;
        u8 *op;
        struct nls_table *nls = sbi->options->nls;

        static_assert(sizeof(wchar_t) == sizeof(__le16));

        if (!nls) {
                /* UTF-16 -> UTF-8 */
                ret = utf16s_to_utf8s((wchar_t *)name, len, UTF16_LITTLE_ENDIAN,
                                      buf, buf_len);
                buf[ret] = '\0';
                return ret;
        }

        op = buf;
        warn = 0;

        while (len--) {
                u16 ec;
                int charlen;
                char dump[5];

                if (buf_len < NLS_MAX_CHARSET_SIZE) {
                        ntfs_warn(sbi->sb,
                                  "filename was truncated while converting.");
                        break;
                }

                ec = le16_to_cpu(*name++);
                charlen = nls->uni2char(ec, op, buf_len);

                if (charlen > 0) {
                        op += charlen;
                        buf_len -= charlen;
                        continue;
                }

                *op++ = '_';
                buf_len -= 1;
                if (warn)
                        continue;

                warn = 1;
                hex_byte_pack(&dump[0], ec >> 8);
                hex_byte_pack(&dump[2], ec);
                dump[4] = 0;

                ntfs_err(sbi->sb, "failed to convert \"%s\" to %s", dump,
                         nls->charset);
        }

        *op = '\0';
        return op - buf;
}

// clang-format off
#define PLANE_SIZE      0x00010000

#define SURROGATE_PAIR  0x0000d800
#define SURROGATE_LOW   0x00000400
#define SURROGATE_BITS  0x000003ff
// clang-format on

/*
 * put_utf16 - Modified version of put_utf16 from fs/nls/nls_base.c
 *
 * Function is sparse warnings free.
 */
static inline void put_utf16(wchar_t *s, unsigned int c,
                             enum utf16_endian endian)
{
        static_assert(sizeof(wchar_t) == sizeof(__le16));
        static_assert(sizeof(wchar_t) == sizeof(__be16));

        switch (endian) {
        default:
                *s = (wchar_t)c;
                break;
        case UTF16_LITTLE_ENDIAN:
                *(__le16 *)s = __cpu_to_le16(c);
                break;
        case UTF16_BIG_ENDIAN:
                *(__be16 *)s = __cpu_to_be16(c);
                break;
        }
}

/*
 * _utf8s_to_utf16s
 *
 * Modified version of 'utf8s_to_utf16s' allows to
 * detect -ENAMETOOLONG without writing out of expected maximum.
 */
static int _utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian,
                            wchar_t *pwcs, int maxout)
{
        u16 *op;
        int size;
        unicode_t u;

        op = pwcs;
        while (inlen > 0 && *s) {
                if (*s & 0x80) {
                        size = utf8_to_utf32(s, inlen, &u);
                        if (size < 0)
                                return -EINVAL;
                        s += size;
                        inlen -= size;

                        if (u >= PLANE_SIZE) {
                                if (maxout < 2)
                                        return -ENAMETOOLONG;

                                u -= PLANE_SIZE;
                                put_utf16(op++,
                                          SURROGATE_PAIR |
                                                  ((u >> 10) & SURROGATE_BITS),
                                          endian);
                                put_utf16(op++,
                                          SURROGATE_PAIR | SURROGATE_LOW |
                                                  (u & SURROGATE_BITS),
                                          endian);
                                maxout -= 2;
                        } else {
                                if (maxout < 1)
                                        return -ENAMETOOLONG;

                                put_utf16(op++, u, endian);
                                maxout--;
                        }
                } else {
                        if (maxout < 1)
                                return -ENAMETOOLONG;

                        put_utf16(op++, *s++, endian);
                        inlen--;
                        maxout--;
                }
        }
        return op - pwcs;
}

/*
 * ntfs_nls_to_utf16 - Convert input string to UTF-16.
 * @name:       Input name.
 * @name_len:   Input name length.
 * @uni:        Destination memory.
 * @max_ulen:   Destination memory.
 * @endian:     Endian of target UTF-16 string.
 *
 * This function is called:
 * - to create NTFS name
 * - to create symlink
 *
 * Return: UTF-16 string length or error (if negative).
 */
int ntfs_nls_to_utf16(struct ntfs_sb_info *sbi, const u8 *name, u32 name_len,
                      struct cpu_str *uni, u32 max_ulen,
                      enum utf16_endian endian)
{
        int ret, slen;
        const u8 *end;
        struct nls_table *nls = sbi->options->nls;
        u16 *uname = uni->name;

        static_assert(sizeof(wchar_t) == sizeof(u16));

        if (!nls) {
                /* utf8 -> utf16 */
                ret = _utf8s_to_utf16s(name, name_len, endian, uname, max_ulen);
                uni->len = ret;
                return ret;
        }

        for (ret = 0, end = name + name_len; name < end; ret++, name += slen) {
                if (ret >= max_ulen)
                        return -ENAMETOOLONG;

                slen = nls->char2uni(name, end - name, uname + ret);
                if (!slen)
                        return -EINVAL;
                if (slen < 0)
                        return slen;
        }

#ifdef __BIG_ENDIAN
        if (endian == UTF16_LITTLE_ENDIAN) {
                int i = ret;

                while (i--) {
                        __cpu_to_le16s(uname);
                        uname++;
                }
        }
#else
        if (endian == UTF16_BIG_ENDIAN) {
                int i = ret;

                while (i--) {
                        __cpu_to_be16s(uname);
                        uname++;
                }
        }
#endif

        uni->len = ret;
        return ret;
}

/*
 * dir_search_u - Helper function.
 */
struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni,
                           struct ntfs_fnd *fnd)
{
        int err = 0;
        struct super_block *sb = dir->i_sb;
        struct ntfs_sb_info *sbi = sb->s_fs_info;
        struct ntfs_inode *ni = ntfs_i(dir);
        struct NTFS_DE *e;
        int diff;
        struct inode *inode = NULL;
        struct ntfs_fnd *fnd_a = NULL;

        if (!fnd) {
                fnd_a = fnd_get();
                if (!fnd_a) {
                        err = -ENOMEM;
                        goto out;
                }
                fnd = fnd_a;
        }

        err = indx_find(&ni->dir, ni, NULL, uni, 0, sbi, &diff, &e, fnd);

        if (err)
                goto out;

        if (diff) {
                err = -ENOENT;
                goto out;
        }

        inode = ntfs_iget5(sb, &e->ref, uni);
        if (!IS_ERR(inode) && is_bad_inode(inode)) {
                iput(inode);
                err = -EINVAL;
        }
out:
        fnd_put(fnd_a);

        return err == -ENOENT ? NULL : err ? ERR_PTR(err) : inode;
}

/*
 * returns false if 'ctx' if full
 */
static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi,
                                 struct ntfs_inode *ni, const struct NTFS_DE *e,
                                 u8 *name, struct dir_context *ctx)
{
        const struct ATTR_FILE_NAME *fname;
        unsigned long ino;
        int name_len;
        u32 dt_type;

        fname = Add2Ptr(e, sizeof(struct NTFS_DE));

        if (fname->type == FILE_NAME_DOS)
                return true;

        if (!mi_is_ref(&ni->mi, &fname->home))
                return true;

        ino = ino_get(&e->ref);

        if (ino == MFT_REC_ROOT)
                return true;

        /* Skip meta files. Unless option to show metafiles is set. */
        if (!sbi->options->showmeta && ntfs_is_meta_file(sbi, ino))
                return true;

        if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN))
                return true;

        if (fname->name_len + sizeof(struct NTFS_DE) > le16_to_cpu(e->size))
                return true;

        name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name,
                                     PATH_MAX);
        if (name_len <= 0) {
                ntfs_warn(sbi->sb, "failed to convert name for inode %lx.",
                          ino);
                return true;
        }

        /*
         * NTFS: symlinks are "dir + reparse" or "file + reparse"
         * Unfortunately reparse attribute is used for many purposes (several dozens).
         * It is not possible here to know is this name symlink or not.
         * To get exactly the type of name we should to open inode (read mft).
         * getattr for opened file (fstat) correctly returns symlink.
         */
        dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;

        /*
         * It is not reliable to detect the type of name using duplicated information
         * stored in parent directory.
         * The only correct way to get the type of name - read MFT record and find ATTR_STD.
         * The code below is not good idea.
         * It does additional locks/reads just to get the type of name.
         * Should we use additional mount option to enable branch below?
         */
        if (fname->dup.extend_data && ino != ni->mi.rno) {
                struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL);
                if (!IS_ERR_OR_NULL(inode)) {
                        dt_type = fs_umode_to_dtype(inode->i_mode);
                        iput(inode);
                }
        }

        return dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
}

/*
 * ntfs_read_hdr - Helper function for ntfs_readdir().
 *
 * returns 0 if ok.
 * returns -EINVAL if directory is corrupted.
 * returns +1 if 'ctx' is full.
 */
static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
                         const struct INDEX_HDR *hdr, u64 vbo, u64 pos,
                         u8 *name, struct dir_context *ctx)
{
        const struct NTFS_DE *e;
        u32 e_size;
        u32 end = le32_to_cpu(hdr->used);
        u32 off = le32_to_cpu(hdr->de_off);

        for (;; off += e_size) {
                if (off + sizeof(struct NTFS_DE) > end)
                        return -EINVAL;

                e = Add2Ptr(hdr, off);
                e_size = le16_to_cpu(e->size);
                if (e_size < sizeof(struct NTFS_DE) || off + e_size > end)
                        return -EINVAL;

                if (de_is_last(e))
                        return 0;

                /* Skip already enumerated. */
                if (vbo + off < pos)
                        continue;

                if (le16_to_cpu(e->key_size) < SIZEOF_ATTRIBUTE_FILENAME)
                        return -EINVAL;

                ctx->pos = vbo + off;

                /* Submit the name to the filldir callback. */
                if (!ntfs_dir_emit(sbi, ni, e, name, ctx)) {
                        /* ctx is full. */
                        return +1;
                }
        }
}

/*
 * ntfs_readdir - file_operations::iterate_shared
 *
 * Use non sorted enumeration.
 * Sorted enumeration may result infinite loop if names tree contains loop.
 */
static int ntfs_readdir(struct file *file, struct dir_context *ctx)
{
        const struct INDEX_ROOT *root;
        size_t bit;
        int err = 0;
        struct inode *dir = file_inode(file);
        struct ntfs_inode *ni = ntfs_i(dir);
        struct super_block *sb = dir->i_sb;
        struct ntfs_sb_info *sbi = sb->s_fs_info;
        loff_t i_size = i_size_read(dir);
        u64 pos = ctx->pos;
        u8 *name = NULL;
        struct indx_node *node = NULL;
        u8 index_bits = ni->dir.index_bits;
        size_t max_bit = i_size >> ni->dir.index_bits;
        loff_t eod = i_size + sbi->record_size;

        /* Name is a buffer of PATH_MAX length. */
        static_assert(NTFS_NAME_LEN * 4 < PATH_MAX);

        if (!pos) {
                /*
                 * ni->dir.version increments each directory change.
                 * Save the initial value of ni->dir.version.
                 */
                file->private_data = (void *)ni->dir.version;
        }

        if (pos >= eod) {
                if (file->private_data == (void *)ni->dir.version) {
                        /* No changes since first readdir. */
                        return 0;
                }

                /*
                 * Handle directories that changed after the initial readdir().
                 *
                 * Some user space code implements recursive removal like this instead
                 * of calling rmdir(2) directly:
                 *
                 *      fd = opendir(path);
                 *      while ((dent = readdir(fd)))
                 *              unlinkat(dirfd(fd), dent->d_name, 0);
                 *      closedir(fd);
                 *
                 * POSIX leaves unspecified what readdir() should return once the
                 * directory has been modified after opendir()/rewinddir(), so this
                 * pattern is not guaranteed to work on all filesystems or platforms.
                 *
                 * In ntfs3 the internal name tree may be reshaped while entries are
                 * being removed, so there is no stable anchor for continuing a
                 * single-pass walk based on the original readdir() order.
                 *
                 * In practice some widely used tools (for example certain rm(1)
                 * implementations) have used this readdir()/unlink() loop, and some
                 * filesystems behave in a way that effectively makes it work in the
                 * common case.
                 *
                 * The code below follows that practice and tries to provide
                 * "rmdir-like" behaviour for such callers on ntfs3, even though the
                 * situation is not strictly defined by the APIs.
                 *
                 * Apple documents the same readdir()/unlink() issue and a workaround
                 * for HFS file systems in:
                 * https://web.archive.org/web/20220122122948/https:/support.apple.com/kb/TA21420?locale=en_US
                 */
                ctx->pos = pos = 3;
                file->private_data = (void *)ni->dir.version;
        }

        if (!dir_emit_dots(file, ctx))
                return 0;

        name = kmalloc(PATH_MAX, GFP_KERNEL);
        if (!name)
                return -ENOMEM;

        if (!ni->mi_loaded && ni->attr_list.size) {
                /*
                 * Directory inode is locked for read.
                 * Load all subrecords to avoid 'write' access to 'ni' during
                 * directory reading.
                 */
                ni_lock(ni);
                if (!ni->mi_loaded && ni->attr_list.size) {
                        err = ni_load_all_mi(ni);
                        if (!err)
                                ni->mi_loaded = true;
                }
                ni_unlock(ni);
                if (err)
                        goto out;
        }

        root = indx_get_root(&ni->dir, ni, NULL, NULL);
        if (!root) {
                err = -EINVAL;
                goto out;
        }

        if (pos >= sbi->record_size) {
                bit = (pos - sbi->record_size) >> index_bits;
        } else {
                /*
                 * Add each name from root in 'ctx'.
                 */
                err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx);
                if (err)
                        goto out;
                bit = 0;
        }

        /*
         * Enumerate indexes until the end of dir.
         */
        for (; bit < max_bit; bit += 1) {
                /* Get the next used index. */
                err = indx_used_bit(&ni->dir, ni, &bit);
                if (err)
                        goto out;

                if (bit == MINUS_ONE_T) {
                        /* no more used indexes. end of dir. */
                        break;
                }

                if (bit >= max_bit) {
                        /* Corrupted directory. */
                        err = -EINVAL;
                        goto out;
                }

                err = indx_read_ra(&ni->dir, ni, bit << ni->dir.idx2vbn_bits,
                                   &node, &file->f_ra);
                if (err)
                        goto out;

                /*
                 * Add each name from index in 'ctx'.
                 */
                err = ntfs_read_hdr(sbi, ni, &node->index->ihdr,
                                    ((u64)bit << index_bits) + sbi->record_size,
                                    pos, name, ctx);
                if (err)
                        goto out;
        }

out:
        kfree(name);
        put_indx_node(node);

        if (!err) {
                /* End of directory. */
                ctx->pos = eod;
        } else if (err == 1) {
                /* 'ctx' is full. */
                err = 0;
        } else if (err == -ENOENT) {
                err = 0;
                ctx->pos = pos;
        } else if (err < 0) {
                if (err == -EINVAL)
                        _ntfs_bad_inode(dir);
                ctx->pos = eod;
        }

        return err;
}

static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
                          size_t *files)
{
        int err = 0;
        struct ntfs_inode *ni = ntfs_i(dir);
        struct NTFS_DE *e = NULL;
        struct INDEX_ROOT *root;
        struct INDEX_HDR *hdr;
        const struct ATTR_FILE_NAME *fname;
        u32 e_size, off, end;
        size_t drs = 0, fles = 0, bit = 0;
        struct indx_node *node = NULL;
        size_t max_indx = i_size_read(&ni->vfs_inode) >> ni->dir.index_bits;

        if (is_empty)
                *is_empty = true;

        root = indx_get_root(&ni->dir, ni, NULL, NULL);
        if (!root)
                return -EINVAL;

        hdr = &root->ihdr;

        for (;;) {
                end = le32_to_cpu(hdr->used);
                off = le32_to_cpu(hdr->de_off);

                for (; off + sizeof(struct NTFS_DE) <= end; off += e_size) {
                        e = Add2Ptr(hdr, off);
                        e_size = le16_to_cpu(e->size);
                        if (e_size < sizeof(struct NTFS_DE) ||
                            off + e_size > end) {
                                /* Looks like corruption. */
                                break;
                        }

                        if (de_is_last(e))
                                break;

                        fname = de_get_fname(e);
                        if (!fname)
                                continue;

                        if (fname->type == FILE_NAME_DOS)
                                continue;

                        if (is_empty) {
                                *is_empty = false;
                                if (!dirs && !files)
                                        goto out;
                        }

                        if (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY)
                                drs += 1;
                        else
                                fles += 1;
                }

                if (bit >= max_indx)
                        goto out;

                err = indx_used_bit(&ni->dir, ni, &bit);
                if (err)
                        goto out;

                if (bit == MINUS_ONE_T)
                        goto out;

                if (bit >= max_indx)
                        goto out;

                err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits,
                                &node);
                if (err)
                        goto out;

                hdr = &node->index->ihdr;
                bit += 1;
        }

out:
        put_indx_node(node);
        if (dirs)
                *dirs = drs;
        if (files)
                *files = fles;

        return err;
}

bool dir_is_empty(struct inode *dir)
{
        bool is_empty = false;

        ntfs_dir_count(dir, &is_empty, NULL, NULL);

        return is_empty;
}

// clang-format off
const struct file_operations ntfs_dir_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .iterate_shared = ntfs_readdir,
        .fsync          = ntfs_file_fsync,
        .open           = ntfs_file_open,
        .unlocked_ioctl = ntfs_ioctl,
#ifdef CONFIG_COMPAT
        .compat_ioctl   = ntfs_compat_ioctl,
#endif
        .setlease       = generic_setlease,
};

#if IS_ENABLED(CONFIG_NTFS_FS)
const struct file_operations ntfs_legacy_dir_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .iterate_shared = ntfs_readdir,
        .open           = ntfs_file_open,
        .setlease       = generic_setlease,
};
#endif
// clang-format on