#include #include #include #include #include #include #include #include #include #define ERR_NOT_FOUND 2 /* Given the current user page, this function computes the page number of the * next user page by incrementing the page number. However, if incrementing the * page number results in the page number of a page containing page * descriptors, the page number is incremented again to have it point to the * first user page of the next page group. Finally, if incrementing the page * number results in a page number that is larger than the total amount of * possible pages on the devices, the page number of the very first user page * is returned instead. */ static uint32_t next_upage(struct ftl_journal *j, uint32_t p) { size_t log2_pages_per_block = j->log2_pages_per_group + j->log2_groups_per_block; ++p; if (is_aligned(p + 1, j->log2_pages_per_group)) ++p; if (p >= (j->nblocks << log2_pages_per_block)) p = 0; return p; } /* Determines the amount of user pages to store in a page group by determining * how many page descriptors the last page of the page group can contain at * most. Because the page group consists of $2^n$ pages, $2^{n - 1}$ of those * pages will end up becoming user pages. Once the amount of pages in a page * group has been determined, the amount of page groups within an erase block * can also be determined, as a single page group may not cover a whole erase * block. */ static int find_block_div(struct ftl_journal *j) { size_t log2_pages_per_block = j->log2_block_size - j->log2_page_size; size_t nbytes_avail = (1 << j->log2_page_size) - sizeof(struct ftl_page_group); size_t nbytes = sizeof(struct ftl_page_desc); j->log2_pages_per_group = 1; while (j->log2_pages_per_group < log2_pages_per_block) { nbytes = 2 * nbytes + sizeof(struct ftl_page_desc); if (nbytes > nbytes_avail) break; ++j->log2_pages_per_group; } j->log2_groups_per_block = log2_pages_per_block - j->log2_pages_per_group; return 0; } /* Erases an entire block by erasing the smallest units that can be erased that * span the block. */ static int erase_block(struct ftl_journal *j, uint32_t block) { uint32_t pages_per_block = j->log2_block_size - j->log2_erase_size; uint32_t page = block << pages_per_block; uint32_t i; for (i = 0; i < UINT32_C(1) << pages_per_block; ++i) { if (flash_erase(j->dev, page + i, UINT32_C(1) << j->log2_erase_size) < 0) return -1; } return 0; } static void reset_journal(struct ftl_journal *j) { j->log2_erase_size = ilog2(4 * KIB); j->log2_page_size = ilog2(4 * KIB); j->log2_block_size = ilog2(64 * KIB); find_block_div(j); j->nblocks = flash_get_size(j->dev) >> j->log2_block_size; j->head = 0; j->tail = 0; j->root = UINT32_MAX; j->nused_pages = 0; j->epoch = 0; } int ftl_init_journal(struct ftl_journal *j, struct flash_dev *dev) { j->dev = dev; reset_journal(j); return 0; } /* Given a block number, this function attempts to find the first block that is * in use. A block is considered to be in use when the first page group is in * use, as a block can only be erased as a whole. Therefore, if the first page * group is not in use, neither will the other page groups in a block. */ static int find_block(struct ftl_journal *j, struct ftl_page_group *group, uint32_t *where, uint32_t block) { uint32_t page; unsigned attempt; for (attempt = 0; block < j->nblocks && attempt < FTL_MAX_ATTEMPTS; ++attempt, ++block) { page = block << j->log2_block_size; page |= ((UINT32_C(1) << j->log2_pages_per_group) - 1) << j->log2_page_size; if (flash_read(j->dev, page, group, sizeof *group) < 0) continue; if (memcmp(group->magic, "FTL", sizeof group->magic) != 0) continue; *where = block; return 0; } return -1; } /* Given the block number of the first block, attempts to use binary search to * find the last block that is in use. */ static uint32_t find_last_block(struct ftl_journal *j, uint32_t first) { struct ftl_page_group group; uint32_t mid, low = first, high = j->nblocks - 1; uint32_t found, next; while (low <= high) { mid = (low + high) / 2; if (find_block(j, &group, &found, mid) < 0 || group.epoch != j->epoch) { if (!mid) return first; high = mid - 1; continue; } if (((found + 1) > j->nblocks) || find_block(j, &group, &next, found + 1) < 0 || group.epoch != j->epoch) return found; low = next; } return first; } /* Given a page number, this function checks whether the page is fully erased * by checking if all bits are set to ones. */ static int is_page_erased(struct ftl_journal *j, uint32_t page) { uint8_t data[64]; size_t i, nbytes, len = j->log2_page_size; uint32_t addr = page << j->log2_page_size; while (len) { nbytes = min(sizeof data, len); if (flash_read(j->dev, addr, data, nbytes) < 0) return 0; for (i = 0; i < nbytes; ++i) { if (data[i] != 0xff) return 0; } addr += nbytes; len -= nbytes; } return 1; } /* Given the group number, this function checks if a page group is erased by * checking if the pages that compose the page group are erased. */ static int is_group_erased(struct ftl_journal *j, uint32_t group) { uint32_t npages = UINT32_C(1) << j->log2_pages_per_group; uint32_t page = group << j->log2_pages_per_group; uint32_t i; for (i = 0; i < npages; ++i) { if (!is_page_erased(j, page + i)) return 0; } return 1; } /* Attempts to find the last page group that is in use within a block by * performing a binary search on the page groups. */ static uint32_t find_last_group(struct ftl_journal *j, uint32_t block) { uint32_t ngroups = UINT32_C(1) << j->log2_groups_per_block; uint32_t mid, low = 0, high = ngroups - 1; while (low <= high) { mid = (low + high) / 2; if (is_group_erased(j, mid)) { high = mid - 1; continue; } if (((mid + 1) >= ngroups) || is_group_erased(j, mid + 1)) return (block << j->log2_groups_per_block) + mid; low = mid + 1; } return block << j->log2_groups_per_block; } int read_page_desc(struct ftl_journal *j, struct ftl_page_desc *page_desc, uint32_t upage); static int find_root(struct ftl_journal *j, uint32_t group) { struct ftl_page_desc page_desc; uint32_t upage; upage = group << j->log2_pages_per_group; do { j->root = upage; upage = next_upage(j, upage); if (read_page_desc(j, &page_desc, upage) < 0) return -1; } while (page_desc.va != UINT32_MAX || page_desc.nused_pages == 0); return 0; } /* Attempts to find the first free page within a page group by looking for the * first page that is considered to be erased. If no such page could be found * within the page group, the first user page of the next page group should be * used as that page group should not be in use. */ static int find_head(struct ftl_journal *j) { size_t log2_pages_per_block = j->log2_pages_per_group + j->log2_groups_per_block; j->head = j->root; do { j->head = next_upage(j, j->head); if (is_aligned(j->head, log2_pages_per_block)) return 0; } while (!is_page_erased(j, j->head)); return 0; } int read_page_group(struct ftl_journal *j, struct ftl_page_group *group, uint32_t group_no); /* Resumes the journal by finding the first block that is in use, the last * block that is in use, the last page group that is in use, and setting the * head to the first free user page. */ int ftl_resume_journal(struct ftl_journal *j) { struct ftl_page_group group; struct ftl_page_desc page_desc; uint32_t first, last, group_no; if (!j) return -1; if (find_block(j, &group, &first, 0) < 0) { reset_journal(j); return -1; } j->epoch = group.epoch; last = find_last_block(j, first); group_no = find_last_group(j, last); if (find_root(j, group_no) < 0) return -1; if (find_head(j) < 0) return -1; if (read_page_group(j, &group, j->root >> j->log2_pages_per_group) < 0) return -1; if (read_page_desc(j, &page_desc, j->root) < 0) return -1; j->tail = group.tail; j->nused_pages = page_desc.nused_pages; return 0; } /* Writes the page descriptor to the footer of the current page group and * increments the head to point to the next free user page. */ static int ftl_write_page_desc(struct ftl_journal *j, const struct ftl_page_desc *page_desc) { struct ftl_page_group group; uint32_t group_no, page, addr, offset, head; group_no = j->head >> j->log2_pages_per_group; page = ((group_no + 1) << j->log2_pages_per_group) - 1; addr = page << j->log2_page_size; /* Write the page group header. */ if (is_page_erased(j, page)) { memcpy(&group.magic, "FTL", sizeof group.magic); group.epoch = j->epoch; group.tail = j->tail; if (flash_write(j->dev, addr, &group, sizeof group) < 0) return -1; } offset = sizeof group + (j->head & ((1 << j->log2_pages_per_group) - 1)) * sizeof *page_desc; if (flash_write(j->dev, addr + offset, page_desc, sizeof *page_desc) < 0) return -1; j->root = j->head; head = j->head; j->head = next_upage(j, j->head); if (j->head < head) ++j->epoch; return 0; } /* Given the page number of a user page, reads the page descriptor associated * with the user page by locating the footer and more specifically the page * descriptor within the page group. */ int read_page_desc(struct ftl_journal *j, struct ftl_page_desc *page_desc, uint32_t upage) { uint32_t group_no, page, addr, offset; group_no = upage >> j->log2_pages_per_group; page = ((group_no + 1) << j->log2_pages_per_group) - 1; addr = page << j->log2_page_size; offset = sizeof(struct ftl_page_group) + (upage & ((1 << j->log2_pages_per_group) - 1)) * sizeof *page_desc; return flash_read(j->dev, addr + offset, page_desc, sizeof *page_desc); } int read_page_group(struct ftl_journal *j, struct ftl_page_group *group, uint32_t group_no) { uint32_t page, addr; page = ((group_no + 1) << j->log2_pages_per_group) - 1; addr = page << j->log2_page_size; return flash_read(j->dev, addr, group, sizeof *group); } /* Trace a path for a given virtual target address by comparing each of the * bits in the target address with the virtual address of our root. In case of * a mismatch, we proceed our traversal with the given subtree at the current * depth until we have either found that there is no further subtree to * traverse or until we have found the actual user page. */ static int trace_path(struct ftl_journal *j, struct ftl_page_desc *new_page_desc, uint32_t *loc, uint32_t va) { struct ftl_page_desc page_desc; uint8_t depth = 0; uint32_t upage = j->root; if (new_page_desc) new_page_desc->va = va; if (upage == UINT32_MAX) goto err_not_found; if (read_page_desc(j, &page_desc, upage) < 0) return -1; for (; depth < 32; ++depth) { if (page_desc.va == UINT32_MAX) goto err_not_found; if (!((va ^ page_desc.va) & (1 << (32 - depth - 1)))) { if (new_page_desc) new_page_desc->subtrees[depth] = page_desc.subtrees[depth]; continue; } if (new_page_desc) new_page_desc->subtrees[depth] = upage; if ((upage = page_desc.subtrees[depth]) == UINT32_MAX) { ++depth; goto err_not_found; } if (read_page_desc(j, &page_desc, upage) < 0) return -1; } if (loc) *loc = upage; return 0; err_not_found: if (new_page_desc) { for (; depth < 32; ++depth) { new_page_desc->subtrees[depth] = UINT32_MAX; } } return -ERR_NOT_FOUND; } static int ftl_write_upage(struct ftl_journal *j, const uint8_t *page, const struct ftl_page_desc *page_desc); /* For a given user page, attempt to claim more free space by checking if no * recent mapping has obsoleted the older mapping. If a more recent mapping * exists, the page can be safely ignored and erased. Otherwise, we preserve * the page by copying the page to create a new mapping such that the old page * can be ignored and erased. */ static int free_page(struct ftl_journal *j, uint32_t upage) { struct ftl_page_desc page_desc; uint32_t found_upage, va; if (read_page_desc(j, &page_desc, upage) < 0) return -1; va = page_desc.va; if (trace_path(j, &page_desc, &found_upage, va) < 0) return -1; if (upage != found_upage) return 0; page_desc.nused_pages = j->nused_pages; if (flash_copy(j->dev, j->head << j->log2_page_size, upage << j->log2_page_size, 1 << j->log2_page_size) < 0) return -1; return ftl_write_upage(j, NULL, &page_desc); } /* Claim more free space by checking which user pages in a page group are * mapped and for which the mappings have been obsoleted by a more recent * mapping. The mapped user pages are preserved by copying. */ static int free_group(struct ftl_journal *j, uint32_t group) { uint32_t npages = UINT32_C(1) << j->log2_pages_per_group; uint32_t page = group << j->log2_pages_per_group; uint32_t i; for (i = 0; i < npages; ++i) { if (free_page(j, page + i) < 0) return -1; } return 0; } /* Claim more free space by checking which user pages in a block are mapped and * for which the mappings have been obsoleted by a more recent mapping. The * mapped user pages are preserved by copying. */ static int free_block(struct ftl_journal *j, uint32_t block) { uint32_t ngroups = UINT32_C(1) << j->log2_groups_per_block; uint32_t group = block << j->log2_groups_per_block; uint32_t i; for (i = 0; i < ngroups; ++i) { if (free_group(j, group + i) < 0) return -1; } return 0; } /* Checks if there are sufficient pages available for writing. Otherwise this * function attempts to claim more free space from unmapped pages for which * newer pages have obsoleted the mapping. Further, we move the user pages that * are still mapped as these should be preserved. */ static int free_tail(struct ftl_journal *j) { size_t log2_pages_per_block = j->log2_pages_per_group + j->log2_groups_per_block; size_t npages = j->nblocks << log2_pages_per_block; size_t dist; if (j->head < j->tail) dist = j->tail - j->head; else dist = npages - j->head + j->tail; if (dist > (UINT32_C(1) << log2_pages_per_block)) return 0; if (free_block(j, j->tail >> log2_pages_per_block) < 0) return -1; j->tail += 1 << log2_pages_per_block; if (j->tail >= npages) j->tail -= npages; return 0; } /* Prepare the head for writing. If the user page to be written to is not * aligned on a block boundary, the block must already be erased and there is * nothing to be done. Otherwise, we free the tail if necessary and erase the * block for writing. */ static int prepare_head(struct ftl_journal *j) { size_t log2_pages_per_block = j->log2_pages_per_group + j->log2_groups_per_block; if (!is_aligned(j->head, log2_pages_per_block)) return 0; if (free_tail(j) < 0) return -1; return erase_block(j, j->head >> log2_pages_per_block); } /* Prepares the head for writing, writes the user page to the current available * user page and finally writes the page descriptor to the footer of the page * group, whereupon the head is incremented to point to the next available user * page. */ static int ftl_write_upage(struct ftl_journal *j, const uint8_t *page, const struct ftl_page_desc *page_desc) { if (prepare_head(j) < 0) return -1; if (page && flash_write(j->dev, j->head << j->log2_page_size, page, j->log2_page_size) < 0) return -1; return ftl_write_page_desc(j, page_desc); } int ftl_find(struct ftl_journal *j, uint32_t *page, uint32_t va) { return trace_path(j, NULL, page, va); } int ftl_read(struct ftl_journal *j, uint8_t *data, uint32_t va) { int ret; uint32_t page; if ((ret = ftl_find(j, &page, va)) < 0) { if (ret != -ERR_NOT_FOUND) return -1; memset(data, 0, j->log2_page_size); return 0; } return flash_read(j->dev, page << j->log2_page_size, data, j->log2_page_size); } int ftl_write(struct ftl_journal *j, uint32_t va, const uint8_t *data) { struct ftl_page_desc page_desc; int ret; if (va >= ftl_get_capacity(j) && !is_aligned(va, 1 << j->log2_page_size)) return -1; if ((ret = trace_path(j, &page_desc, NULL, va)) < 0 && ret != -ERR_NOT_FOUND) return -1; if (ret == -ERR_NOT_FOUND) ++j->nused_pages; page_desc.nused_pages = j->nused_pages; return ftl_write_upage(j, data, &page_desc); } int ftl_trim(struct ftl_journal *j, uint32_t va) { struct ftl_page_desc page_desc, alt_page_desc; size_t level, i; uint32_t alt_va, page; int ret; if ((ret = trace_path(j, &page_desc, &page, va)) < 0) { if (ret == -ERR_NOT_FOUND) return 0; return ret; } --j->nused_pages; for (i = 0; i < 32; ++i) { level = 32 - i - 1; if ((alt_va = page_desc.subtrees[level]) != UINT32_MAX) break; } if (i == 32) { j->root = UINT32_MAX; /* TODO: how do we clean the FTL? */ return 0; } if (read_page_desc(j, &alt_page_desc, alt_va) < 0) return -1; page_desc.va = alt_page_desc.va; page_desc.nused_pages = j->nused_pages; page_desc.subtrees[level] = UINT32_MAX; for (i = level + 1; i < 32; ++i) { page_desc.subtrees[i] = alt_page_desc.subtrees[i]; } if (flash_copy(j->dev, j->head << j->log2_page_size, page << j->log2_page_size, 1 << j->log2_page_size) < 0) return -1; return ftl_write_upage(j, NULL, &page_desc); } /* Returns the amount of used pages with a unique virtual address multiplied by * the page size as the size of used space on the device. */ uint32_t ftl_get_size(const struct ftl_journal *j) { return j->nused_pages << j->log2_page_size; } /* The capacity of the device is the total amount of user pages minus a block * worth of user pages for garbage collection. */ uint32_t ftl_get_capacity(const struct ftl_journal *j) { return ((j->nblocks - 1) << j->log2_block_size) - ((j->nblocks - 1) << j->log2_page_size); }