From 0b3cf046cb5b65ccaf22687e105a4380533c0305 Mon Sep 17 00:00:00 2001 From: behlendo Date: Fri, 7 Mar 2008 23:07:02 +0000 Subject: [PATCH] Add the initial vestigates of vnode support git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@30 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c --- include/sys/sysmacros.h | 1 + include/sys/vnode.h | 97 +++++++++++++++++++++++ modules/spl/Makefile.in | 1 + modules/spl/spl-vnode.c | 168 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 267 insertions(+) create mode 100644 include/sys/vnode.h create mode 100644 modules/spl/spl-vnode.c diff --git a/include/sys/sysmacros.h b/include/sys/sysmacros.h index 3bc9f7a37..b65a5797c 100644 --- a/include/sys/sysmacros.h +++ b/include/sys/sysmacros.h @@ -131,6 +131,7 @@ extern int highbit(unsigned long i); #define makedevice(maj,min) makedev(maj,min) #define zone_dataset_visible(x, y) (1) #define INGLOBALZONE(z) (1) +#define utsname system_utsname /* XXX - Borrowed from zfs project libsolcompat/include/sys/sysmacros.h */ /* common macros */ diff --git a/include/sys/vnode.h b/include/sys/vnode.h new file mode 100644 index 000000000..9afac4c25 --- /dev/null +++ b/include/sys/vnode.h @@ -0,0 +1,97 @@ +#ifndef _SPL_VNODE_H +#define _SPL_VNODE_H + +#define XVA_MAPSIZE 3 +#define XVA_MAGIC 0x78766174 + +typedef struct vnode { + uint64_t v_size; + int v_fd; + mode_t v_mode; + char *v_path; +} vnode_t; + + +typedef struct xoptattr { + timestruc_t xoa_createtime; /* Create time of file */ + uint8_t xoa_archive; + uint8_t xoa_system; + uint8_t xoa_readonly; + uint8_t xoa_hidden; + uint8_t xoa_nounlink; + uint8_t xoa_immutable; + uint8_t xoa_appendonly; + uint8_t xoa_nodump; + uint8_t xoa_settable; + uint8_t xoa_opaque; + uint8_t xoa_av_quarantined; + uint8_t xoa_av_modified; +} xoptattr_t; + +typedef struct vattr { + uint_t va_mask; /* bit-mask of attributes */ + u_offset_t va_size; /* file size in bytes */ +} vattr_t; + + +typedef struct xvattr { + vattr_t xva_vattr; /* Embedded vattr structure */ + uint32_t xva_magic; /* Magic Number */ + uint32_t xva_mapsize; /* Size of attr bitmap (32-bit words) */ + uint32_t *xva_rtnattrmapp; /* Ptr to xva_rtnattrmap[] */ + uint32_t xva_reqattrmap[XVA_MAPSIZE]; /* Requested attrs */ + uint32_t xva_rtnattrmap[XVA_MAPSIZE]; /* Returned attrs */ + xoptattr_t xva_xoptattrs; /* Optional attributes */ +} xvattr_t; + +typedef struct vsecattr { + uint_t vsa_mask; /* See below */ + int vsa_aclcnt; /* ACL entry count */ + void *vsa_aclentp; /* pointer to ACL entries */ + int vsa_dfaclcnt; /* default ACL entry count */ + void *vsa_dfaclentp; /* pointer to default ACL entries */ + size_t vsa_aclentsz; /* ACE size in bytes of vsa_aclentp */ +} vsecattr_t; + +#define AT_TYPE 0x00001 +#define AT_MODE 0x00002 +// #define AT_UID 0x00004 /* Conflicts with linux/auxvec.h */ +// #define AT_GID 0x00008 /* Conflicts with linux/auxvec.h */ +#define AT_FSID 0x00010 +#define AT_NODEID 0x00020 +#define AT_NLINK 0x00040 +#define AT_SIZE 0x00080 +#define AT_ATIME 0x00100 +#define AT_MTIME 0x00200 +#define AT_CTIME 0x00400 +#define AT_RDEV 0x00800 +#define AT_BLKSIZE 0x01000 +#define AT_NBLOCKS 0x02000 +#define AT_SEQ 0x08000 +#define AT_XVATTR 0x10000 + +#define CRCREAT 0 + +#define VOP_CLOSE(vp, f, c, o, cr, ct) 0 +#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0 +#define VOP_GETATTR(vp, vap, fl, cr, ct) ((vap)->va_size = (vp)->v_size, 0) + +#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd) + +#define VN_RELE(vp) vn_close(vp) + +extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp, + int x2, int x3); +extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp, + int x2, int x3, vnode_t *vp, int fd); +extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, + offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp); +extern void vn_close(vnode_t *vp); + +#define vn_remove(path, x1, x2) remove(path) +#define vn_rename(from, to, seg) rename((from), (to)) +#define vn_is_readonly(vp) B_FALSE + +extern vnode_t *rootdir; + +#endif /* SPL_VNODE_H */ diff --git a/modules/spl/Makefile.in b/modules/spl/Makefile.in index 09e934e4f..667858aa9 100644 --- a/modules/spl/Makefile.in +++ b/modules/spl/Makefile.in @@ -13,6 +13,7 @@ spl-objs += spl-kmem.o spl-objs += spl-thread.o spl-objs += spl-taskq.o spl-objs += spl-rwlock.o +spl-objs += spl-vnode.o spl-objs += spl-generic.o splmodule := spl.ko diff --git a/modules/spl/spl-vnode.c b/modules/spl/spl-vnode.c new file mode 100644 index 000000000..5089f8567 --- /dev/null +++ b/modules/spl/spl-vnode.c @@ -0,0 +1,168 @@ +#include +#include "config.h" + +/* + * XXX: currently borrrowed from libsolcompat until this + * can be adapted to the linux kernel interfaces. + */ +#if 0 +/* + * ========================================================================= + * vnode operations + * ========================================================================= + */ +/* + * Note: for the xxxat() versions of these functions, we assume that the + * starting vp is always rootdir (which is true for spa_directory.c, the only + * ZFS consumer of these interfaces). We assert this is true, and then emulate + * them by adding '/' in front of the path. + */ + +/*ARGSUSED*/ +int +vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) +{ + int fd; + vnode_t *vp; + int old_umask; + char realpath[MAXPATHLEN]; + struct stat64 st; + + /* + * If we're accessing a real disk from userland, we need to use + * the character interface to avoid caching. This is particularly + * important if we're trying to look at a real in-kernel storage + * pool from userland, e.g. via zdb, because otherwise we won't + * see the changes occurring under the segmap cache. + * On the other hand, the stupid character device returns zero + * for its size. So -- gag -- we open the block device to get + * its size, and remember it for subsequent VOP_GETATTR(). + */ +#if defined(__sun__) || defined(__sun) + if (strncmp(path, "/dev/", 5) == 0) { +#else + if (0) { +#endif + char *dsk; + fd = open64(path, O_RDONLY); + if (fd == -1) + return (errno); + if (fstat64(fd, &st) == -1) { + close(fd); + return (errno); + } + close(fd); + (void) sprintf(realpath, "%s", path); + dsk = strstr(path, "/dsk/"); + if (dsk != NULL) + (void) sprintf(realpath + (dsk - path) + 1, "r%s", + dsk + 1); + } else { + (void) sprintf(realpath, "%s", path); + if (!(flags & FCREAT) && stat64(realpath, &st) == -1) + return (errno); + } + +#ifdef __linux__ + if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) { + flags |= O_DIRECT; + if (flags & FWRITE) + flags |= O_EXCL; + } +#endif + + if (flags & FCREAT) + old_umask = umask(0); + + /* + * The construct 'flags - FREAD' conveniently maps combinations of + * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. + */ + fd = open64(realpath, flags - FREAD, mode); + + if (flags & FCREAT) + (void) umask(old_umask); + + if (fd == -1) + return (errno); + + if (fstat64(fd, &st) == -1) { + close(fd); + return (errno); + } + + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); + + *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); + + vp->v_fd = fd; + vp->v_size = st.st_size; + vp->v_mode = st.st_mode; + vp->v_path = spa_strdup(path); + + return (0); +} + +/*ARGSUSED*/ +int +vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, + int x3, vnode_t *startvp, int fd) +{ + char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); + int ret; + + ASSERT(startvp == rootdir); + (void) sprintf(realpath, "/%s", path); + + /* fd ignored for now, need if want to simulate nbmand support */ + ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); + + umem_free(realpath, strlen(path) + 2); + + return (ret); +} + +/*ARGSUSED*/ +int +vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, + int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) +{ + ssize_t iolen, split; + + if (uio == UIO_READ) { + iolen = pread64(vp->v_fd, addr, len, offset); + } else { + /* + * To simulate partial disk writes, we split writes into two + * system calls so that the process can be killed in between. + */ +#ifdef ZFS_DEBUG + if (!S_ISBLK(vp->v_mode) && !S_ISCHR(vp->v_mode)) { + split = (len > 0 ? rand() % len : 0); + iolen = pwrite64(vp->v_fd, addr, split, offset); + iolen += pwrite64(vp->v_fd, (char *)addr + split, + len - split, offset + split); + } else + iolen = pwrite64(vp->v_fd, addr, len, offset); +#else + iolen = pwrite64(vp->v_fd, addr, len, offset); +#endif + } + + if (iolen < 0) + return (errno); + if (residp) + *residp = len - iolen; + else if (iolen != len) + return (EIO); + return (0); +} + +void +vn_close(vnode_t *vp) +{ + close(vp->v_fd); + spa_strfree(vp->v_path); + umem_free(vp, sizeof (vnode_t)); +} +#endif -- 2.40.0