/* * Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2014 * The President and Fellows of Harvard College. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * SFS filesystem * * I/O plumbing. */ #include #include #include #include #include #include #include #include "sfsprivate.h" //////////////////////////////////////////////////////////// // // Basic block-level I/O routines /* * Note: sfs_readblock is used to read the superblock * early in mount, before sfs is fully (or even mostly) * initialized, and so may not use anything from sfs * except sfs_device. */ /* * Read or write a block, retrying I/O errors. */ static int sfs_rwblock(struct sfs_fs *sfs, struct uio *uio) { int result; int tries=0; KASSERT(vfs_biglock_do_i_hold()); DEBUG(DB_SFS, "sfs: %s %llu\n", uio->uio_rw == UIO_READ ? "read" : "write", uio->uio_offset / SFS_BLOCKSIZE); retry: result = DEVOP_IO(sfs->sfs_device, uio); if (result == EINVAL) { /* * This means the sector we requested was out of range, * or the seek address we gave wasn't sector-aligned, * or a couple of other things that are our fault. */ panic("sfs: DEVOP_IO returned EINVAL\n"); } if (result == EIO) { if (tries == 0) { tries++; kprintf("sfs: block %llu I/O error, retrying\n", uio->uio_offset / SFS_BLOCKSIZE); goto retry; } else if (tries < 10) { tries++; goto retry; } else { kprintf("sfs: block %llu I/O error, giving up after " "%d retries\n", uio->uio_offset / SFS_BLOCKSIZE, tries); } } return result; } /* * Read a block. */ int sfs_readblock(struct sfs_fs *sfs, daddr_t block, void *data, size_t len) { struct iovec iov; struct uio ku; KASSERT(len == SFS_BLOCKSIZE); SFSUIO(&iov, &ku, data, block, UIO_READ); return sfs_rwblock(sfs, &ku); } /* * Write a block. */ int sfs_writeblock(struct sfs_fs *sfs, daddr_t block, void *data, size_t len) { struct iovec iov; struct uio ku; KASSERT(len == SFS_BLOCKSIZE); SFSUIO(&iov, &ku, data, block, UIO_WRITE); return sfs_rwblock(sfs, &ku); } //////////////////////////////////////////////////////////// // // File-level I/O /* * Do I/O to a block of a file that doesn't cover the whole block. We * need to read in the original block first, even if we're writing, so * we don't clobber the portion of the block we're not intending to * write over. * * SKIPSTART is the number of bytes to skip past at the beginning of * the sector; LEN is the number of bytes to actually read or write. * UIO is the area to do the I/O into. */ static int sfs_partialio(struct sfs_vnode *sv, struct uio *uio, uint32_t skipstart, uint32_t len) { /* * I/O buffer for handling partial sectors. * * Note: in real life (and when you've done the fs assignment) * you would get space from the disk buffer cache for this, * not use a static area. */ static char iobuf[SFS_BLOCKSIZE]; struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data; daddr_t diskblock; uint32_t fileblock; int result; /* Allocate missing blocks if and only if we're writing */ bool doalloc = (uio->uio_rw==UIO_WRITE); KASSERT(skipstart + len <= SFS_BLOCKSIZE); /* We're using a global static buffer; it had better be locked */ KASSERT(vfs_biglock_do_i_hold()); /* Compute the block offset of this block in the file */ fileblock = uio->uio_offset / SFS_BLOCKSIZE; /* Get the disk block number */ result = sfs_bmap(sv, fileblock, doalloc, &diskblock); if (result) { return result; } if (diskblock == 0) { /* * There was no block mapped at this point in the file. * Zero the buffer. */ KASSERT(uio->uio_rw == UIO_READ); bzero(iobuf, sizeof(iobuf)); } else { /* * Read the block. */ result = sfs_readblock(sfs, diskblock, iobuf, sizeof(iobuf)); if (result) { return result; } } /* * Now perform the requested operation into/out of the buffer. */ result = uiomove(iobuf+skipstart, len, uio); if (result) { return result; } /* * If it was a write, write back the modified block. */ if (uio->uio_rw == UIO_WRITE) { result = sfs_writeblock(sfs, diskblock, iobuf, sizeof(iobuf)); if (result) { return result; } } return 0; } /* * Do I/O (either read or write) of a single whole block. */ static int sfs_blockio(struct sfs_vnode *sv, struct uio *uio) { struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data; daddr_t diskblock; uint32_t fileblock; int result; bool doalloc = (uio->uio_rw==UIO_WRITE); off_t saveoff; off_t diskoff; off_t saveres; off_t diskres; /* Get the block number within the file */ fileblock = uio->uio_offset / SFS_BLOCKSIZE; /* Look up the disk block number */ result = sfs_bmap(sv, fileblock, doalloc, &diskblock); if (result) { return result; } if (diskblock == 0) { /* * No block - fill with zeros. * * We must be reading, or sfs_bmap would have * allocated a block for us. */ KASSERT(uio->uio_rw == UIO_READ); return uiomovezeros(SFS_BLOCKSIZE, uio); } /* * Do the I/O directly to the uio region. Save the uio_offset, * and substitute one that makes sense to the device. */ saveoff = uio->uio_offset; diskoff = diskblock * SFS_BLOCKSIZE; uio->uio_offset = diskoff; /* * Temporarily set the residue to be one block size. */ KASSERT(uio->uio_resid >= SFS_BLOCKSIZE); saveres = uio->uio_resid; diskres = SFS_BLOCKSIZE; uio->uio_resid = diskres; result = sfs_rwblock(sfs, uio); /* * Now, restore the original uio_offset and uio_resid and update * them by the amount of I/O done. */ uio->uio_offset = (uio->uio_offset - diskoff) + saveoff; uio->uio_resid = (uio->uio_resid - diskres) + saveres; return result; } /* * Do I/O of a whole region of data, whether or not it's block-aligned. */ int sfs_io(struct sfs_vnode *sv, struct uio *uio) { uint32_t blkoff; uint32_t nblocks, i; int result = 0; uint32_t origresid, extraresid = 0; origresid = uio->uio_resid; /* * If reading, check for EOF. If we can read a partial area, * remember how much extra there was in EXTRARESID so we can * add it back to uio_resid at the end. */ if (uio->uio_rw == UIO_READ) { off_t size = sv->sv_i.sfi_size; off_t endpos = uio->uio_offset + uio->uio_resid; if (uio->uio_offset >= size) { /* At or past EOF - just return */ return 0; } if (endpos > size) { extraresid = endpos - size; KASSERT(uio->uio_resid > extraresid); uio->uio_resid -= extraresid; } } /* * First, do any leading partial block. */ blkoff = uio->uio_offset % SFS_BLOCKSIZE; if (blkoff != 0) { /* Number of bytes at beginning of block to skip */ uint32_t skip = blkoff; /* Number of bytes to read/write after that point */ uint32_t len = SFS_BLOCKSIZE - blkoff; /* ...which might be less than the rest of the block */ if (len > uio->uio_resid) { len = uio->uio_resid; } /* Call sfs_partialio() to do it. */ result = sfs_partialio(sv, uio, skip, len); if (result) { goto out; } } /* If we're done, quit. */ if (uio->uio_resid==0) { goto out; } /* * Now we should be block-aligned. Do the remaining whole blocks. */ KASSERT(uio->uio_offset % SFS_BLOCKSIZE == 0); nblocks = uio->uio_resid / SFS_BLOCKSIZE; for (i=0; iuio_resid < SFS_BLOCKSIZE); if (uio->uio_resid > 0) { result = sfs_partialio(sv, uio, 0, uio->uio_resid); if (result) { goto out; } } out: /* If writing and we did anything, adjust file length */ if (uio->uio_resid != origresid && uio->uio_rw == UIO_WRITE && uio->uio_offset > (off_t)sv->sv_i.sfi_size) { sv->sv_i.sfi_size = uio->uio_offset; sv->sv_dirty = true; } /* Add in any extra amount we couldn't read because of EOF */ uio->uio_resid += extraresid; /* Done */ return result; } //////////////////////////////////////////////////////////// // Metadata I/O /* * This is much the same as sfs_partialio, but intended for use with * metadata (e.g. directory entries). It assumes the objects being * handled are smaller than whole blocks, do not cross block * boundaries, and originate in the kernel. * * It is separate from sfs_partialio because, although there is no * such code in this version of SFS, it is often desirable when doing * more advanced things to handle metadata and user data I/O * differently. */ int sfs_metaio(struct sfs_vnode *sv, off_t actualpos, void *data, size_t len, enum uio_rw rw) { struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data; off_t endpos; uint32_t vnblock; uint32_t blockoffset; daddr_t diskblock; bool doalloc; int result; /* * I/O buffer for metadata ops. * * Note: in real life (and when you've done the fs assignment) you * would get space from the disk buffer cache for this, not use a * static area. */ static char metaiobuf[SFS_BLOCKSIZE]; /* We're using a global static buffer; it had better be locked */ KASSERT(vfs_biglock_do_i_hold()); /* Figure out which block of the vnode (directory, whatever) this is */ vnblock = actualpos / SFS_BLOCKSIZE; blockoffset = actualpos % SFS_BLOCKSIZE; /* Get the disk block number */ doalloc = (rw == UIO_WRITE); result = sfs_bmap(sv, vnblock, doalloc, &diskblock); if (result) { return result; } if (diskblock == 0) { /* Should only get block 0 back if doalloc is false */ KASSERT(rw == UIO_READ); /* Sparse file, read as zeros. */ bzero(data, len); return 0; } /* Read the block */ result = sfs_readblock(sfs, diskblock, metaiobuf, sizeof(metaiobuf)); if (result) { return result; } if (rw == UIO_READ) { /* Copy out the selected region */ memcpy(data, metaiobuf + blockoffset, len); } else { /* Update the selected region */ memcpy(metaiobuf + blockoffset, data, len); /* Write the block back */ result = sfs_writeblock(sfs, diskblock, metaiobuf, sizeof(metaiobuf)); if (result) { return result; } /* Update the vnode size if needed */ endpos = actualpos + len; if (endpos > (off_t)sv->sv_i.sfi_size) { sv->sv_i.sfi_size = endpos; sv->sv_dirty = true; } } /* Done */ return 0; }