#include "physical-zone.h"
#include <linux/list.h>
#include "logger.h"
#include "memory-alloc.h"
#include "permassert.h"
#include "block-map.h"
#include "completion.h"
#include "constants.h"
#include "data-vio.h"
#include "dedupe.h"
#include "encodings.h"
#include "flush.h"
#include "int-map.h"
#include "slab-depot.h"
#include "status-codes.h"
#include "vdo.h"
#define LOCK_POOL_CAPACITY (2 * MAXIMUM_VDO_USER_VIOS)
struct pbn_lock_implementation {
enum pbn_lock_type type;
const char *name;
const char *release_reason;
};
static const struct pbn_lock_implementation LOCK_IMPLEMENTATIONS[] = {
[VIO_READ_LOCK] = {
.type = VIO_READ_LOCK,
.name = "read",
.release_reason = "candidate duplicate",
},
[VIO_WRITE_LOCK] = {
.type = VIO_WRITE_LOCK,
.name = "write",
.release_reason = "newly allocated",
},
[VIO_BLOCK_MAP_WRITE_LOCK] = {
.type = VIO_BLOCK_MAP_WRITE_LOCK,
.name = "block map write",
.release_reason = "block map write",
},
};
static inline bool has_lock_type(const struct pbn_lock *lock, enum pbn_lock_type type)
{
return (lock->implementation == &LOCK_IMPLEMENTATIONS[type]);
}
bool vdo_is_pbn_read_lock(const struct pbn_lock *lock)
{
return has_lock_type(lock, VIO_READ_LOCK);
}
static inline void set_pbn_lock_type(struct pbn_lock *lock, enum pbn_lock_type type)
{
lock->implementation = &LOCK_IMPLEMENTATIONS[type];
}
void vdo_downgrade_pbn_write_lock(struct pbn_lock *lock, bool compressed_write)
{
VDO_ASSERT_LOG_ONLY(!vdo_is_pbn_read_lock(lock),
"PBN lock must not already have been downgraded");
VDO_ASSERT_LOG_ONLY(!has_lock_type(lock, VIO_BLOCK_MAP_WRITE_LOCK),
"must not downgrade block map write locks");
VDO_ASSERT_LOG_ONLY(lock->holder_count == 1,
"PBN write lock should have one holder but has %u",
lock->holder_count);
lock->increment_limit =
(compressed_write ? MAXIMUM_REFERENCE_COUNT : MAXIMUM_REFERENCE_COUNT - 1);
set_pbn_lock_type(lock, VIO_READ_LOCK);
}
bool vdo_claim_pbn_lock_increment(struct pbn_lock *lock)
{
u32 claim_number = (u32) atomic_add_return(1, &lock->increments_claimed);
return (claim_number <= lock->increment_limit);
}
void vdo_assign_pbn_lock_provisional_reference(struct pbn_lock *lock)
{
VDO_ASSERT_LOG_ONLY(!lock->has_provisional_reference,
"lock does not have a provisional reference");
lock->has_provisional_reference = true;
}
void vdo_unassign_pbn_lock_provisional_reference(struct pbn_lock *lock)
{
lock->has_provisional_reference = false;
}
static void release_pbn_lock_provisional_reference(struct pbn_lock *lock,
physical_block_number_t locked_pbn,
struct block_allocator *allocator)
{
int result;
if (!vdo_pbn_lock_has_provisional_reference(lock))
return;
result = vdo_release_block_reference(allocator, locked_pbn);
if (result != VDO_SUCCESS) {
vdo_log_error_strerror(result,
"Failed to release reference to %s physical block %llu",
lock->implementation->release_reason,
(unsigned long long) locked_pbn);
}
vdo_unassign_pbn_lock_provisional_reference(lock);
}
typedef union {
struct list_head entry;
struct pbn_lock lock;
} idle_pbn_lock;
struct pbn_lock_pool {
size_t capacity;
size_t borrowed;
struct list_head idle_list;
idle_pbn_lock locks[];
};
static void return_pbn_lock_to_pool(struct pbn_lock_pool *pool, struct pbn_lock *lock)
{
idle_pbn_lock *idle;
memset(lock, 0, sizeof(*lock));
idle = container_of(lock, idle_pbn_lock, lock);
INIT_LIST_HEAD(&idle->entry);
list_add_tail(&idle->entry, &pool->idle_list);
VDO_ASSERT_LOG_ONLY(pool->borrowed > 0, "shouldn't return more than borrowed");
pool->borrowed -= 1;
}
static int make_pbn_lock_pool(size_t capacity, struct pbn_lock_pool **pool_ptr)
{
size_t i;
struct pbn_lock_pool *pool;
int result;
result = vdo_allocate_extended(struct pbn_lock_pool, capacity, idle_pbn_lock,
__func__, &pool);
if (result != VDO_SUCCESS)
return result;
pool->capacity = capacity;
pool->borrowed = capacity;
INIT_LIST_HEAD(&pool->idle_list);
for (i = 0; i < capacity; i++)
return_pbn_lock_to_pool(pool, &pool->locks[i].lock);
*pool_ptr = pool;
return VDO_SUCCESS;
}
static void free_pbn_lock_pool(struct pbn_lock_pool *pool)
{
if (pool == NULL)
return;
VDO_ASSERT_LOG_ONLY(pool->borrowed == 0,
"All PBN locks must be returned to the pool before it is freed, but %zu locks are still on loan",
pool->borrowed);
vdo_free(pool);
}
static int __must_check borrow_pbn_lock_from_pool(struct pbn_lock_pool *pool,
enum pbn_lock_type type,
struct pbn_lock **lock_ptr)
{
int result;
struct list_head *idle_entry;
idle_pbn_lock *idle;
if (pool->borrowed >= pool->capacity)
return vdo_log_error_strerror(VDO_LOCK_ERROR,
"no free PBN locks left to borrow");
pool->borrowed += 1;
result = VDO_ASSERT(!list_empty(&pool->idle_list),
"idle list should not be empty if pool not at capacity");
if (result != VDO_SUCCESS)
return result;
idle_entry = pool->idle_list.prev;
list_del(idle_entry);
memset(idle_entry, 0, sizeof(*idle_entry));
idle = list_entry(idle_entry, idle_pbn_lock, entry);
idle->lock.holder_count = 0;
set_pbn_lock_type(&idle->lock, type);
*lock_ptr = &idle->lock;
return VDO_SUCCESS;
}
static int initialize_zone(struct vdo *vdo, struct physical_zones *zones)
{
int result;
zone_count_t zone_number = zones->zone_count;
struct physical_zone *zone = &zones->zones[zone_number];
result = vdo_int_map_create(VDO_LOCK_MAP_CAPACITY, &zone->pbn_operations);
if (result != VDO_SUCCESS)
return result;
result = make_pbn_lock_pool(LOCK_POOL_CAPACITY, &zone->lock_pool);
if (result != VDO_SUCCESS) {
vdo_int_map_free(zone->pbn_operations);
return result;
}
zone->zone_number = zone_number;
zone->thread_id = vdo->thread_config.physical_threads[zone_number];
zone->allocator = &vdo->depot->allocators[zone_number];
zone->next = &zones->zones[(zone_number + 1) % vdo->thread_config.physical_zone_count];
result = vdo_make_default_thread(vdo, zone->thread_id);
if (result != VDO_SUCCESS) {
free_pbn_lock_pool(vdo_forget(zone->lock_pool));
vdo_int_map_free(zone->pbn_operations);
return result;
}
return result;
}
int vdo_make_physical_zones(struct vdo *vdo, struct physical_zones **zones_ptr)
{
struct physical_zones *zones;
int result;
zone_count_t zone_count = vdo->thread_config.physical_zone_count;
if (zone_count == 0)
return VDO_SUCCESS;
result = vdo_allocate_extended(struct physical_zones, zone_count,
struct physical_zone, __func__, &zones);
if (result != VDO_SUCCESS)
return result;
for (zones->zone_count = 0; zones->zone_count < zone_count; zones->zone_count++) {
result = initialize_zone(vdo, zones);
if (result != VDO_SUCCESS) {
vdo_free_physical_zones(zones);
return result;
}
}
*zones_ptr = zones;
return VDO_SUCCESS;
}
void vdo_free_physical_zones(struct physical_zones *zones)
{
zone_count_t index;
if (zones == NULL)
return;
for (index = 0; index < zones->zone_count; index++) {
struct physical_zone *zone = &zones->zones[index];
free_pbn_lock_pool(vdo_forget(zone->lock_pool));
vdo_int_map_free(vdo_forget(zone->pbn_operations));
}
vdo_free(zones);
}
struct pbn_lock *vdo_get_physical_zone_pbn_lock(struct physical_zone *zone,
physical_block_number_t pbn)
{
return ((zone == NULL) ? NULL : vdo_int_map_get(zone->pbn_operations, pbn));
}
int vdo_attempt_physical_zone_pbn_lock(struct physical_zone *zone,
physical_block_number_t pbn,
enum pbn_lock_type type,
struct pbn_lock **lock_ptr)
{
struct pbn_lock *lock, *new_lock = NULL;
int result;
result = borrow_pbn_lock_from_pool(zone->lock_pool, type, &new_lock);
if (result != VDO_SUCCESS) {
VDO_ASSERT_LOG_ONLY(false, "must always be able to borrow a PBN lock");
return result;
}
result = vdo_int_map_put(zone->pbn_operations, pbn, new_lock, false,
(void **) &lock);
if (result != VDO_SUCCESS) {
return_pbn_lock_to_pool(zone->lock_pool, new_lock);
return result;
}
if (lock != NULL) {
return_pbn_lock_to_pool(zone->lock_pool, vdo_forget(new_lock));
result = VDO_ASSERT(lock->holder_count > 0, "physical block %llu lock held",
(unsigned long long) pbn);
if (result != VDO_SUCCESS)
return result;
*lock_ptr = lock;
} else {
*lock_ptr = new_lock;
}
return VDO_SUCCESS;
}
static int allocate_and_lock_block(struct allocation *allocation)
{
int result;
struct pbn_lock *lock;
VDO_ASSERT_LOG_ONLY(allocation->lock == NULL,
"must not allocate a block while already holding a lock on one");
result = vdo_allocate_block(allocation->zone->allocator, &allocation->pbn);
if (result != VDO_SUCCESS)
return result;
result = vdo_attempt_physical_zone_pbn_lock(allocation->zone, allocation->pbn,
allocation->write_lock_type, &lock);
if (result != VDO_SUCCESS)
return result;
if (lock->holder_count > 0) {
return vdo_log_error_strerror(VDO_LOCK_ERROR,
"Newly allocated block %llu was spuriously locked (holder_count=%u)",
(unsigned long long) allocation->pbn,
lock->holder_count);
}
lock->holder_count += 1;
allocation->lock = lock;
vdo_assign_pbn_lock_provisional_reference(lock);
return VDO_SUCCESS;
}
static void retry_allocation(struct vdo_waiter *waiter, void __always_unused *context)
{
struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter);
data_vio->allocation.wait_for_clean_slab = false;
data_vio->allocation.first_allocation_zone = data_vio->allocation.zone->zone_number;
continue_data_vio(data_vio);
}
static bool continue_allocating(struct data_vio *data_vio)
{
struct allocation *allocation = &data_vio->allocation;
struct physical_zone *zone = allocation->zone;
struct vdo_completion *completion = &data_vio->vio.completion;
int result = VDO_SUCCESS;
bool was_waiting = allocation->wait_for_clean_slab;
bool tried_all = (allocation->first_allocation_zone == zone->next->zone_number);
vdo_reset_completion(completion);
if (tried_all && !was_waiting) {
allocation->wait_for_clean_slab = true;
allocation->first_allocation_zone = zone->zone_number;
}
if (allocation->wait_for_clean_slab) {
data_vio->waiter.callback = retry_allocation;
result = vdo_enqueue_clean_slab_waiter(zone->allocator,
&data_vio->waiter);
if (result == VDO_SUCCESS) {
return true;
}
if ((result != VDO_NO_SPACE) || (was_waiting && tried_all)) {
vdo_set_completion_result(completion, result);
return false;
}
}
allocation->zone = zone->next;
completion->callback_thread_id = allocation->zone->thread_id;
vdo_launch_completion(completion);
return true;
}
bool vdo_allocate_block_in_zone(struct data_vio *data_vio)
{
int result = allocate_and_lock_block(&data_vio->allocation);
if (result == VDO_SUCCESS)
return true;
if ((result != VDO_NO_SPACE) || !continue_allocating(data_vio))
continue_data_vio_with_error(data_vio, result);
return false;
}
void vdo_release_physical_zone_pbn_lock(struct physical_zone *zone,
physical_block_number_t locked_pbn,
struct pbn_lock *lock)
{
struct pbn_lock *holder;
if (lock == NULL)
return;
VDO_ASSERT_LOG_ONLY(lock->holder_count > 0,
"should not be releasing a lock that is not held");
lock->holder_count -= 1;
if (lock->holder_count > 0) {
return;
}
holder = vdo_int_map_remove(zone->pbn_operations, locked_pbn);
VDO_ASSERT_LOG_ONLY((lock == holder), "physical block lock mismatch for block %llu",
(unsigned long long) locked_pbn);
release_pbn_lock_provisional_reference(lock, locked_pbn, zone->allocator);
return_pbn_lock_to_pool(zone->lock_pool, lock);
}
void vdo_dump_physical_zone(const struct physical_zone *zone)
{
vdo_dump_block_allocator(zone->allocator);
}