Skip to content

Commit

Permalink
vfs: use 'unsigned long' accesses for dcache name comparison and hashing
Browse files Browse the repository at this point in the history
Ok, this is hacky, and only works on little-endian machines with goo
unaligned handling.  And even then only with CONFIG_DEBUG_PAGEALLOC
disabled, since it can access up to 7 bytes after the pathname.

But it runs like a bat out of hell.

Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
torvalds committed Mar 9, 2012
1 parent 9f8050c commit bfcfaa7
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 0 deletions.
1 change: 1 addition & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ config X86
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC

config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
Expand Down
4 changes: 4 additions & 0 deletions fs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

menu "File systems"

# Use unaligned word dcache accesses
config DCACHE_WORD_ACCESS
bool

if BLOCK

source "fs/ext2/Kconfig"
Expand Down
23 changes: 23 additions & 0 deletions fs/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,28 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
static inline int dentry_cmp(const unsigned char *cs, size_t scount,
const unsigned char *ct, size_t tcount)
{
#ifdef CONFIG_DCACHE_WORD_ACCESS
unsigned long a,b,mask;

if (unlikely(scount != tcount))
return 1;

for (;;) {
a = *(unsigned long *)cs;
b = *(unsigned long *)ct;
if (tcount < sizeof(unsigned long))
break;
if (unlikely(a != b))
return 1;
cs += sizeof(unsigned long);
ct += sizeof(unsigned long);
tcount -= sizeof(unsigned long);
if (!tcount)
return 0;
}
mask = ~(~0ul << tcount*8);
return unlikely(!!((a ^ b) & mask));
#else
if (scount != tcount)
return 1;

Expand All @@ -155,6 +177,7 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
tcount--;
} while (tcount);
return 0;
#endif
}

static void __d_free(struct rcu_head *head)
Expand Down
122 changes: 122 additions & 0 deletions fs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode)
return 1;
}

/*
* We can do the critical dentry name comparison and hashing
* operations one word at a time, but we are limited to:
*
* - Architectures with fast unaligned word accesses. We could
* do a "get_unaligned()" if this helps and is sufficiently
* fast.
*
* - Little-endian machines (so that we can generate the mask
* of low bytes efficiently). Again, we *could* do a byte
* swapping load on big-endian architectures if that is not
* expensive enough to make the optimization worthless.
*
* - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
* do not trap on the (extremely unlikely) case of a page
* crossing operation.
*
* - Furthermore, we need an efficient 64-bit compile for the
* 64-bit case in order to generate the "number of bytes in
* the final mask". Again, that could be replaced with a
* efficient population count instruction or similar.
*/
#ifdef CONFIG_DCACHE_WORD_ACCESS

#ifdef CONFIG_64BIT

/*
* Jan Achrenius on G+: microoptimized version of
* the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
* that works for the bytemasks without having to
* mask them first.
*/
static inline long count_masked_bytes(unsigned long mask)
{
return mask*0x0001020304050608 >> 56;
}

static inline unsigned int fold_hash(unsigned long hash)
{
hash += hash >> (8*sizeof(int));
return hash;
}

#else /* 32-bit case */

/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline long count_masked_bytes(long mask)
{
/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
long a = (0x0ff0001+mask) >> 23;
/* Fix the 1 for 00 case */
return a & mask;
}

#define fold_hash(x) (x)

#endif

unsigned int full_name_hash(const unsigned char *name, unsigned int len)
{
unsigned long a, mask;
unsigned long hash = 0;

for (;;) {
a = *(unsigned long *)name;
hash *= 9;
if (len < sizeof(unsigned long))
break;
hash += a;
name += sizeof(unsigned long);
len -= sizeof(unsigned long);
if (!len)
goto done;
}
mask = ~(~0ul << len*8);
hash += mask & a;
done:
return fold_hash(hash);
}
EXPORT_SYMBOL(full_name_hash);

#define ONEBYTES 0x0101010101010101ul
#define SLASHBYTES 0x2f2f2f2f2f2f2f2ful
#define HIGHBITS 0x8080808080808080ul

/* Return the high bit set in the first byte that is a zero */
static inline unsigned long has_zero(unsigned long a)
{
return ((a - ONEBYTES) & ~a) & HIGHBITS;
}

/*
* Calculate the length and hash of the path component, and
* return the length of the component;
*/
static inline unsigned long hash_name(const char *name, unsigned int *hashp)
{
unsigned long a, mask, hash, len;

hash = a = 0;
len = -sizeof(unsigned long);
do {
hash = (hash + a) * 9;
len += sizeof(unsigned long);
a = *(unsigned long *)(name+len);
/* Do we have any NUL or '/' bytes in this word? */
mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
} while (!mask);

/* The mask *below* the first high bit set */
mask = (mask - 1) & ~mask;
mask >>= 7;
hash += a & mask;
*hashp = fold_hash(hash);

return len + count_masked_bytes(mask);
}

#else

unsigned int full_name_hash(const unsigned char *name, unsigned int len)
{
unsigned long hash = init_name_hash();
Expand Down Expand Up @@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
return len;
}

#endif

/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
Expand Down

0 comments on commit bfcfaa7

Please sign in to comment.