diff --git a/Makefile b/Makefile index 4d229a0..156223b 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,8 @@ obj-y += source/core/flash.o obj-y += source/fs/block.o obj-y += source/fs/part.o +obj-y += source/ftl/ftl.o + obj = $(addprefix $(BUILD)/, $(obj-y)) # Include the dependencies. diff --git a/include/ftl.h b/include/ftl.h new file mode 100644 index 0000000..f0ba44f --- /dev/null +++ b/include/ftl.h @@ -0,0 +1,35 @@ +#pragma once + +struct ftl_page_group { + uint8_t magic[3]; + uint8_t epoch; + uint32_t tail; +} __attribute__((packed)); + +struct ftl_page_desc { + uint32_t va; + uint32_t subtrees[32]; +} __attribute__((packed)); + +struct ftl_journal { + struct flash_dev *dev; + uint32_t head, tail; + uint32_t root; + uint32_t nblocks; + uint8_t log2_groups_per_block; + uint8_t log2_pages_per_group; + uint8_t log2_page_size; + uint8_t log2_block_size; + uint8_t log2_erase_size; + uint8_t epoch; +}; + +#define FTL_MAX_ATTEMPTS 8 + +int ftl_init_journal(struct ftl_journal *j, struct flash_dev *dev); +int ftl_resume_journal(struct ftl_journal *j); +int ftl_find(struct ftl_journal *j, uint32_t *loc, uint32_t va); +int ftl_write(struct ftl_journal *j, uint32_t addr, const uint8_t *data); +int ftl_read(struct ftl_journal *j, uint8_t *data, uint32_t va); +int ftl_trim(struct ftl_journal *j, uint32_t va); +uint32_t ftl_get_capacity(const struct ftl_journal *j); diff --git a/include/macros.h b/include/macros.h index ad809a5..7b67055 100644 --- a/include/macros.h +++ b/include/macros.h @@ -21,6 +21,9 @@ #define ROUND_DOWN(x, k) ((x) & ~((k) - 1)) #define ROUND_UP(x, k) (((x) + (k) - 1) & ~((k) - 1)) +#define is_aligned(x, k) (!((x) & ((1 << (k)) - 1))) +#define align_eq(x, y, k) (!(((x) ^ (y)) >> k)) + /* Bit manipulation */ #define BIT_SIZE(t) (CHAR_BIT * sizeof(t)) #define BIT(n) (1 << (n)) diff --git a/source/ftl/ftl.c b/source/ftl/ftl.c new file mode 100644 index 0000000..3c178f0 --- /dev/null +++ b/source/ftl/ftl.c @@ -0,0 +1,642 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define ERR_NOT_FOUND 2 + +/* Given the current user page, this function computes the page number of the + * next user page by incrementing the page number. However, if incrementing the + * page number results in the page number of a page containing page + * descriptors, the page number is incremented again to have it point to the + * first user page of the next page group. Finally, if incrementing the page + * number results in a page number that is larger than the total amount of + * possible pages on the devices, the page number of the very first user page + * is returned instead. + */ +static uint32_t next_upage(struct ftl_journal *j, uint32_t p) +{ + size_t log2_pages_per_block = j->log2_pages_per_group + + j->log2_groups_per_block; + + ++p; + + if (is_aligned(p + 1, j->log2_pages_per_group)) + ++p; + + if (p >= (j->nblocks << log2_pages_per_block)) + p = 0; + + return p; +} + +/* Determines the amount of user pages to store in a page group by determining + * how many page descriptors the last page of the page group can contain at + * most. Because the page group consists of $2^n$ pages, $2^{n - 1}$ of those + * pages will end up becoming user pages. Once the amount of pages in a page + * group has been determined, the amount of page groups within an erase block + * can also be determined, as a single page group may not cover a whole erase + * block. + */ +static int find_block_div(struct ftl_journal *j) +{ + size_t log2_pages_per_block = j->log2_block_size - j->log2_page_size; + size_t nbytes_avail = (1 << j->log2_page_size) - + sizeof(struct ftl_page_group); + size_t nbytes = sizeof(struct ftl_page_desc); + + j->log2_pages_per_group = 1; + + while (j->log2_pages_per_group < log2_pages_per_block) { + nbytes = 2 * nbytes + sizeof(struct ftl_page_desc); + + if (nbytes > nbytes_avail) + break; + + ++j->log2_pages_per_group; + } + + j->log2_groups_per_block = log2_pages_per_block - j->log2_pages_per_group; + + return 0; +} + +/* Erases an entire block by erasing the smallest units that can be erased that + * span the block. + */ +static int erase_block(struct ftl_journal *j, uint32_t block) +{ + uint32_t pages_per_block = j->log2_block_size - j->log2_erase_size; + uint32_t page = block << pages_per_block; + uint32_t i; + + for (i = 0; i < UINT32_C(1) << pages_per_block; ++i) { + if (flash_erase(j->dev, page + i, + UINT32_C(1) << j->log2_erase_size) < 0) + return -1; + } + + return 0; +} + +static void reset_journal(struct ftl_journal *j) +{ + j->log2_erase_size = ilog2(4 * KIB); + j->log2_page_size = ilog2(4 * KIB); + j->log2_block_size = ilog2(64 * KIB); + + find_block_div(j); + + j->nblocks = flash_get_size(j->dev) >> j->log2_block_size; + + j->head = 0; + j->tail = 0; + j->root = UINT32_MAX; + j->epoch = 0; +} + +int ftl_init_journal(struct ftl_journal *j, struct flash_dev *dev) +{ + j->dev = dev; + + reset_journal(j); + + return 0; +} + +/* Given a block number, this function attempts to find the first block that is + * in use. A block is considered to be in use when the first page group is in + * use, as a block can only be erased as a whole. Therefore, if the first page + * group is not in use, neither will the other page groups in a block. + */ +static int find_block(struct ftl_journal *j, struct ftl_page_group *group, + uint32_t *where, uint32_t block) +{ + uint32_t page; + unsigned attempt; + + for (attempt = 0; block < j->nblocks && attempt < FTL_MAX_ATTEMPTS; + ++attempt, ++block) { + page = block << j->log2_block_size; + page |= ((UINT32_C(1) << j->log2_pages_per_group) - 1) << j->log2_page_size; + + if (flash_read(j->dev, page, group, sizeof *group) < 0) + continue; + + if (memcmp(group->magic, "FTL", sizeof group->magic) != 0) + continue; + + *where = block; + + return 0; + } + + return -1; +} + +/* Given the block number of the first block, attempts to use binary search to + * find the last block that is in use. + */ +static uint32_t find_last_block(struct ftl_journal *j, uint32_t first) +{ + struct ftl_page_group group; + uint32_t mid, low = first, high = j->nblocks - 1; + uint32_t found, next; + + while (low <= high) { + mid = (low + high) / 2; + + if (find_block(j, &group, &found, mid) < 0 || + group.epoch != j->epoch) { + if (!mid) + return first; + + high = mid - 1; + + continue; + } + + if (((found + 1) > j->nblocks) || + find_block(j, &group, &next, found + 1) < 0 || + group.epoch != j->epoch) + return found; + + low = next; + } + + return first; +} + +/* Given a page number, this function checks whether the page is fully erased + * by checking if all bits are set to ones. + */ +static int is_page_erased(struct ftl_journal *j, uint32_t page) +{ + uint8_t data[64]; + size_t i, nbytes, len = j->log2_page_size; + uint32_t addr = page << j->log2_page_size; + + while (len) { + nbytes = min(sizeof data, len); + + if (flash_read(j->dev, addr, data, nbytes) < 0) + return 0; + + for (i = 0; i < nbytes; ++i) { + if (data[i] != 0xff) + return 0; + } + + addr += nbytes; + len -= nbytes; + } + + return 1; +} + +/* Given the group number, this function checks if a page group is erased by + * checking if the pages that compose the page group are erased. + */ +static int is_group_erased(struct ftl_journal *j, uint32_t group) +{ + uint32_t npages = UINT32_C(1) << j->log2_pages_per_group; + uint32_t page = group << j->log2_pages_per_group; + uint32_t i; + + for (i = 0; i < npages; ++i) { + if (!is_page_erased(j, page + i)) + return 0; + } + + return 1; +} + +/* Attempts to find the last page group that is in use within a block by + * performing a binary search on the page groups. + */ +static uint32_t find_last_group(struct ftl_journal *j, uint32_t block) +{ + uint32_t ngroups = UINT32_C(1) << j->log2_groups_per_block; + uint32_t mid, low = 0, high = ngroups - 1; + + while (low <= high) { + mid = (low + high) / 2; + + if (is_group_erased(j, mid)) { + high = mid - 1; + continue; + } + + if (((mid + 1) >= ngroups) || + is_group_erased(j, mid + 1)) + return (block << j->log2_groups_per_block) + mid; + + low = mid + 1; + } + + return block << j->log2_groups_per_block; +} + +int read_page_desc(struct ftl_journal *j, + struct ftl_page_desc *page_desc, uint32_t upage); + +static int find_root(struct ftl_journal *j, uint32_t group) +{ + struct ftl_page_desc page_desc; + uint32_t upage; + + upage = group << j->log2_pages_per_group; + + do { + j->root = upage; + upage = next_upage(j, upage); + + if (read_page_desc(j, &page_desc, upage) < 0) + return -1; + /* TODO: better condition? */ + } while (page_desc.va != UINT32_MAX); + + return 0; +} + +/* Attempts to find the first free page within a page group by looking for the + * first page that is considered to be erased. If no such page could be found + * within the page group, the first user page of the next page group should be + * used as that page group should not be in use. + */ +static int find_head(struct ftl_journal *j) +{ + size_t log2_pages_per_block = j->log2_pages_per_group + + j->log2_groups_per_block; + + j->head = j->root; + + do { + j->head = next_upage(j, j->head); + + if (is_aligned(j->head, log2_pages_per_block)) + return 0; + } while (!is_page_erased(j, j->head)); + + return 0; +} + +/* Resumes the journal by finding the first block that is in use, the last + * block that is in use, the last page group that is in use, and setting the + * head to the first free user page. + */ +int ftl_resume_journal(struct ftl_journal *j) +{ + struct ftl_page_group group; + uint32_t first, last, group_no; + + if (!j) + return -1; + + if (find_block(j, &group, &first, 0) < 0) { + reset_journal(j); + + return -1; + } + + j->epoch = group.epoch; + last = find_last_block(j, first); + group_no = find_last_group(j, last); + + if (find_root(j, group_no) < 0) + return -1; + + if (find_head(j) < 0) + return -1; + + return 0; +} + +/* Writes the page descriptor to the footer of the current page group and + * increments the head to point to the next free user page. + */ +static int ftl_write_page_desc(struct ftl_journal *j, + const struct ftl_page_desc *page_desc) +{ + struct ftl_page_group group; + uint32_t group_no, page, addr, offset, head; + + group_no = j->head >> j->log2_pages_per_group; + page = ((group_no + 1) << j->log2_pages_per_group) - 1; + addr = page << j->log2_page_size; + + /* Write the page group header. */ + if (is_page_erased(j, page)) { + memcpy(&group.magic, "FTL", sizeof group.magic); + group.epoch = j->epoch; + group.tail = j->tail; + + if (flash_write(j->dev, addr, &group, sizeof group) < 0) + return -1; + } + + offset = sizeof group + (j->head & ((1 << j->log2_pages_per_group) - 1)) * + sizeof *page_desc; + + if (flash_write(j->dev, addr + offset, page_desc, sizeof *page_desc) < 0) + return -1; + + j->root = j->head; + + head = j->head; + j->head = next_upage(j, j->head); + + if (j->head < head) + ++j->epoch; + + return 0; +} + +/* Given the page number of a user page, reads the page descriptor associated + * with the user page by locating the footer and more specifically the page + * descriptor within the page group. + */ +int read_page_desc(struct ftl_journal *j, + struct ftl_page_desc *page_desc, uint32_t upage) +{ + uint32_t group_no, page, addr, offset; + + group_no = upage >> j->log2_pages_per_group; + page = ((group_no + 1) << j->log2_pages_per_group) - 1; + addr = page << j->log2_page_size; + offset = sizeof(struct ftl_page_group) + + (upage & ((1 << j->log2_pages_per_group) - 1)) * sizeof *page_desc; + + return flash_read(j->dev, addr + offset, page_desc, sizeof *page_desc); +} + +/* Trace a path for a given virtual target address by comparing each of the + * bits in the target address with the virtual address of our root. In case of + * a mismatch, we proceed our traversal with the given subtree at the current + * depth until we have either found that there is no further subtree to + * traverse or until we have found the actual user page. + */ +static int trace_path(struct ftl_journal *j, + struct ftl_page_desc *new_page_desc, uint32_t *loc, uint32_t va) +{ + struct ftl_page_desc page_desc; + uint8_t depth = 0; + uint32_t upage = j->root; + + if (new_page_desc) + new_page_desc->va = va; + + if (upage == UINT32_MAX) + goto err_not_found; + + if (read_page_desc(j, &page_desc, upage) < 0) + return -1; + + for (; depth < 32; ++depth) { + if (page_desc.va == UINT32_MAX) + goto err_not_found; + + if (!((va ^ page_desc.va) & (1 << (32 - depth - 1)))) { + if (new_page_desc) + new_page_desc->subtrees[depth] = page_desc.subtrees[depth]; + + continue; + } + + if (new_page_desc) + new_page_desc->subtrees[depth] = upage; + + if ((upage = page_desc.subtrees[depth]) == UINT32_MAX) { + ++depth; + goto err_not_found; + } + + if (read_page_desc(j, &page_desc, upage) < 0) + return -1; + } + + if (loc) + *loc = upage; + + return 0; + +err_not_found: + if (new_page_desc) { + for (; depth < 32; ++depth) { + new_page_desc->subtrees[depth] = UINT32_MAX; + } + } + + return -ERR_NOT_FOUND; +} + +static int ftl_write_upage(struct ftl_journal *j, const uint8_t *page, + const struct ftl_page_desc *page_desc); + +static int free_page(struct ftl_journal *j, uint32_t upage) +{ + struct ftl_page_desc page_desc; + uint32_t found_upage, va; + + if (read_page_desc(j, &page_desc, upage) < 0) + return -1; + + va = page_desc.va; + + if (trace_path(j, &page_desc, &found_upage, va) < 0) + return -1; + + if (upage != found_upage) + return 0; + + if (flash_copy(j->dev, j->head << j->log2_page_size, + upage << j->log2_page_size, 1 << j->log2_page_size) < 0) + return -1; + + return ftl_write_upage(j, NULL, &page_desc); +} + +static int free_group(struct ftl_journal *j, uint32_t group) +{ + uint32_t npages = UINT32_C(1) << j->log2_pages_per_group; + uint32_t page = group << j->log2_pages_per_group; + uint32_t i; + + for (i = 0; i < npages; ++i) { + if (free_page(j, page + i) < 0) + return -1; + } + + return 0; +} + +static int free_block(struct ftl_journal *j, uint32_t block) +{ + uint32_t ngroups = UINT32_C(1) << j->log2_groups_per_block; + uint32_t group = block << j->log2_groups_per_block; + uint32_t i; + + for (i = 0; i < ngroups; ++i) { + if (free_group(j, group + i) < 0) + return -1; + } + + return 0; +} + +static int free_tail(struct ftl_journal *j) +{ + size_t log2_pages_per_block = j->log2_pages_per_group + + j->log2_groups_per_block; + size_t npages = j->nblocks << log2_pages_per_block; + size_t dist; + + if (j->tail < j->head) + dist = npages - j->head + j->tail; + else + dist = j->tail - j->head; + + if (dist > (UINT32_C(1) << log2_pages_per_block)) + return 0; + + if (free_block(j, j->tail >> log2_pages_per_block) < 0) + return -1; + + j->tail += 1 << log2_pages_per_block; + + if (j->tail >= npages) + j->tail -= npages; + + return 0; +} + +static int prepare_head(struct ftl_journal *j) +{ + size_t log2_pages_per_block = j->log2_pages_per_group + + j->log2_groups_per_block; + + if (!is_aligned(j->head, log2_pages_per_block)) + return 0; + + if (free_tail(j) < 0) + return -1; + + return erase_block(j, j->head >> log2_pages_per_block); +} + +/* Prepares the head for writing, writes the user page to the current available + * user page and finally writes the page descriptor to the footer of the page + * group, whereupon the head is incremented to point to the next available user + * page. + */ +static int ftl_write_upage(struct ftl_journal *j, const uint8_t *page, + const struct ftl_page_desc *page_desc) +{ + if (prepare_head(j) < 0) + return -1; + + if (page && flash_write(j->dev, j->head << j->log2_page_size, page, + j->log2_page_size) < 0) + return -1; + + return ftl_write_page_desc(j, page_desc); +} + +int ftl_find(struct ftl_journal *j, uint32_t *page, uint32_t va) +{ + return trace_path(j, NULL, page, va); +} + +int ftl_read(struct ftl_journal *j, uint8_t *data, uint32_t va) +{ + int ret; + uint32_t page; + + if ((ret = ftl_find(j, &page, va)) < 0) { + if (ret != -ERR_NOT_FOUND) + return -1; + + memset(data, 0, j->log2_page_size); + return 0; + } + + return flash_read(j->dev, page << j->log2_page_size, data, + j->log2_page_size); +} + +int ftl_write(struct ftl_journal *j, uint32_t va, const uint8_t *data) +{ + struct ftl_page_desc page_desc; + int ret; + + if (va >= ftl_get_capacity(j) && + !is_aligned(va, 1 << j->log2_page_size)) + return -1; + + if ((ret = trace_path(j, &page_desc, NULL, va)) < 0) { + if (ret != -ERR_NOT_FOUND) + return -1; + } + + return ftl_write_upage(j, data, &page_desc); +} + +int ftl_trim(struct ftl_journal *j, uint32_t va) +{ + struct ftl_page_desc page_desc, alt_page_desc; + size_t level, i; + uint32_t alt_va, page; + int ret; + + if ((ret = trace_path(j, &page_desc, &page, va)) < 0) { + if (ret == -ERR_NOT_FOUND) + return 0; + + return ret; + } + + for (i = 0; i < 32; ++i) { + level = 32 - i - 1; + + if ((alt_va = page_desc.subtrees[level]) != UINT32_MAX) + break; + } + + if (i == 32) { + j->root = UINT32_MAX; + /* TODO: how do we clean the FTL? */ + + return 0; + } + + if (read_page_desc(j, &alt_page_desc, alt_va) < 0) + return -1; + + page_desc.va = alt_page_desc.va; + page_desc.subtrees[level] = UINT32_MAX; + + for (i = level + 1; i < 32; ++i) { + page_desc.subtrees[i] = alt_page_desc.subtrees[i]; + } + + if (flash_copy(j->dev, j->head << j->log2_page_size, + page << j->log2_page_size, 1 << j->log2_page_size) < 0) + return -1; + + return ftl_write_upage(j, NULL, &page_desc); +} + +/* The capacity of the device is the total amount of user pages minus a block + * worth of user pages for garbage collection. + */ +uint32_t ftl_get_capacity(const struct ftl_journal *j) +{ + return ((j->nblocks - 1) << j->log2_block_size) - + ((j->nblocks - 1) << j->log2_page_size); +}