Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ustring hash improvements #1

Closed
wants to merge 9 commits into from
22 changes: 12 additions & 10 deletions src/include/strutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,23 @@ std::string DLLPUBLIC timeintervalformat (double secs, int digits=1);
bool DLLPUBLIC get_rest_arguments (const std::string &str, std::string &base,
std::map<std::string, std::string> &result);

/// Beautiful little string hasher from Aho, Sethi, and Ullman's 1986
/// Dragon compiler book. This depends on sizeof(unsigned int) == 4.
inline unsigned int
/// Hash a string without pre-known length. We use the Jenkins
/// one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function),
/// which seems to be a good speed/quality/requirements compromise.
inline size_t
strhash (const char *s)
{
if (!s) return 0;
unsigned int h=0, g;
if (! s) return 0;
size_t h = 0;
while (*s) {
h = (h<<4) + (unsigned char)(*s);
if ((g = (h & 0xf0000000))) {
h ^= g>>24;
h ^= g;
}
h += (unsigned char)(*s);
h += h << 10;
h ^= h >> 6;
++s;
}
h += h << 3;
h ^= h >> 11;
h += h << 15;
return h;
}

Expand Down
201 changes: 96 additions & 105 deletions src/libtexture/imagecache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,19 +280,8 @@ ImageCacheFile::ImageCacheFile (ImageCacheImpl &imagecache,
m_imagecache(imagecache), m_duplicate(NULL)
{
m_filename = imagecache.resolve_filename (m_filename.string());
recursive_lock_guard guard (m_input_mutex);
open (thread_info);
#if 0
static int x=0;
if ((++x % 16) == 0) {
std::cerr << "Opened " << filename ;
std::cerr << ", now mem is "
<< Strutil::memformat (Sysutil::memory_used())
<< " virtual, resident = "
<< Strutil::memformat (Sysutil::memory_used(true))
<< "\n";
}
#endif
// N.B. the file is not opened, the ImageInput is NULL. This is
// reflected by the fact that m_validspec is false.
}


Expand Down Expand Up @@ -358,7 +347,6 @@ ImageCacheFile::open (ImageCachePerThreadInfo *thread_info)
// From here on, we know that we've opened this file for the very
// first time. So read all the subimages, fill out all the fields
// of the ImageCacheFile.
m_validspec = true;
m_subimages.clear ();
int nsubimages = 0;
do {
Expand Down Expand Up @@ -542,6 +530,7 @@ ImageCacheFile::open (ImageCachePerThreadInfo *thread_info)
m_mod_time = boost::filesystem::last_write_time (m_filename.string());

DASSERT (! m_broken);
m_validspec = true;
return true;
}

Expand Down Expand Up @@ -894,6 +883,11 @@ ImageCacheImpl::find_file (ustring filename,
ImageCachePerThreadInfo *thread_info)
{
ImageCacheStatistics &stats (thread_info->m_stats);
ImageCacheFile *tf = NULL;
bool newfile = false;

// Part 1 - make sure the ImageCacheFile entry exists and is in the
// file cache. For this part, we need to lock the file cache.
{
#if IMAGECACHE_TIME_STATS
Timer timer;
Expand All @@ -913,110 +907,110 @@ ImageCacheImpl::find_file (ustring filename,
#endif

if (found != m_files.end()) {
ImageCacheFile *tf = found->second.get();
// if this is a duplicate texture, switch to the canonical copy
if (tf->duplicate())
tf = tf->duplicate();
tf->use ();
// If the ICF exists but has no spec, it's because it was
// broken and then subsequently invalidated. Downstream we
// assume that the spec exists even for an invalid file, so try
// opening it again, it'll either succeed or re-mark as broken.
if (! tf->validspec()) {
recursive_lock_guard guard (tf->m_input_mutex);
tf->open (thread_info);
DASSERT (tf->m_broken || tf->validspec());
}
filemutex_holder (NULL);
return tf;
tf = found->second.get();
} else {
// No such entry in the file cache. Add it, but don't open yet.
tf = new ImageCacheFile (*this, thread_info, filename);
check_max_files (thread_info);
safe_insert (m_files, filename, tf, m_file_sweep);
newfile = true;
}
filemutex_holder (NULL);
}

// We don't already have this file in the table. Try to
// open it and create a record.

// Yes, we're creating an ImageCacheFile with no lock -- this is to
// prevent all the other threads from blocking because of our
// expensive disk read. We believe this is safe, since underneath
// the ImageCacheFile will lock itself for the open and there are
// no other non-threadsafe side effects.
Timer timer;
ImageCacheFile *tf = new ImageCacheFile (*this, thread_info, filename);
double createtime = timer();
stats.fileio_time += createtime;
stats.fileopen_time += createtime;
tf->iotime() += createtime;

DASSERT (m_filemutex_holder != thread_info); // we better not already hold
ic_write_lock writeguard (m_filemutex);
filemutex_holder (thread_info);
filemutex_holder (NULL);
#if IMAGECACHE_TIME_STATS
double donelocking = timer();
stats.file_locking_time += donelocking-createtime;
stats.find_file_time += timer()-donelocking;
#endif
}
DASSERT (m_filemutex_holder != thread_info); // we better not hold

// Another thread may have created and added the file earlier while
// we were unlocked.
if (m_files.find (filename) != m_files.end()) {
delete tf; // Don't need that one after all
tf = m_files[filename].get();
// if this is a duplicate texture, switch to the canonical copy
if (tf->duplicate())
tf = tf->duplicate();
tf->use();
filemutex_holder (NULL);
return tf;
}

// What if we've opened another file, with a different name, but the
// SAME pixels? It can happen! Bad user, bad! But let's save them
// from their own foolishness.
if (tf->fingerprint ()) {
// std::cerr << filename << " hash=" << tf->fingerprint() << "\n";
FilenameMap::iterator fingerfound = m_fingerprints.find (tf->fingerprint());
if (fingerfound == m_fingerprints.end()) {
// Not already in the fingerprint list -- add it
m_fingerprints[tf->fingerprint()] = tf;
} else {
// Already in fingerprints -- mark this one as a duplicate, but
// ONLY if we don't have other reasons not to consider them true
// duplicates (the fingerprint only considers source image
// pixel values).
// FIXME -- be sure to add extra tests here if more metadata
// have significance later!
ImageCacheFile *dup = fingerfound->second.get();
if (tf->m_swrap == dup->m_swrap && tf->m_twrap == dup->m_twrap &&
tf->m_rwrap == dup->m_rwrap &&
tf->m_datatype == dup->m_datatype &&
tf->m_cubelayout == dup->m_cubelayout &&
tf->m_y_up == dup->m_y_up) {
tf->duplicate (dup);
tf->close ();
// std::cerr << " duplicates "
// << fingerfound->second.get()->filename() << "\n";
// Part 2 - open tihe file if it's never been opened before.
// No need to have the file cache locked for this, though we lock
// the tf->m_input_mutex if we need to open it.
if (! tf->validspec()) {
Timer timer;
recursive_lock_guard guard (tf->m_input_mutex);
if (! tf->validspec()) {
tf->open (thread_info);
DASSERT (tf->m_broken || tf->validspec());
double createtime = timer();
stats.fileio_time += createtime;
stats.fileopen_time += createtime;
tf->iotime() += createtime;

// What if we've opened another file, with a different name,
// but the SAME pixels? It can happen! Bad user, bad! But
// let's save them from their own foolishness.
bool was_duplicate = false;
if (tf->fingerprint ()) {
// std::cerr << filename << " hash=" << tf->fingerprint() << "\n";
ImageCacheFile *dup = find_fingerprint (tf->fingerprint(), tf);
if (dup != tf) {
// Already in fingerprints -- mark this one as a
// duplicate, but ONLY if we don't have other
// reasons not to consider them true duplicates (the
// fingerprint only considers source image pixel values.
// FIXME -- be sure to add extra tests
// here if more metadata have significance later!
if (tf->m_swrap == dup->m_swrap && tf->m_twrap == dup->m_twrap &&
tf->m_rwrap == dup->m_rwrap &&
tf->m_datatype == dup->m_datatype &&
tf->m_cubelayout == dup->m_cubelayout &&
tf->m_y_up == dup->m_y_up) {
tf->duplicate (dup);
tf->close ();
was_duplicate = true;
// std::cerr << " duplicates "
// << fingerfound->second.get()->filename() << "\n";
}
}
}
#if IMAGECACHE_TIME_STATS
stats.find_file_time += timer()-createtime;
#endif
}
}

check_max_files (thread_info);
safe_insert (m_files, filename, tf, m_file_sweep);
// if this is a duplicate texture, switch to the canonical copy
if (tf->duplicate())
tf = tf->duplicate();
else
++stats.unique_files;
tf->use ();

#if IMAGECACHE_TIME_STATS
stats.find_file_time += timer()-donelocking;
#endif
else {
// not a duplicate -- if opening the first time, count as unique
if (newfile)
++stats.unique_files;
}

filemutex_holder (NULL);
tf->use (); // Mark it as recently used
return tf;
}



ImageCacheFile *
ImageCacheImpl::find_fingerprint (ustring finger, ImageCacheFile *file)
{
spin_lock lock (m_fingerprints_mutex);
FilenameMap::iterator found = m_fingerprints.find (finger);
if (found == m_fingerprints.end()) {
// Not already in the fingerprint list -- add it
m_fingerprints[finger] = file;
} else {
// In the list -- return its mapping
file = found->second.get();
}
return file;
}



void
ImageCacheImpl::clear_fingerprints ()
{
spin_lock lock (m_fingerprints_mutex);
m_fingerprints.clear ();
}



void
ImageCacheImpl::check_max_files (ImageCachePerThreadInfo *thread_info)
{
Expand Down Expand Up @@ -2226,10 +2220,7 @@ ImageCacheImpl::invalidate_all (bool force)
invalidate (f);
}

{
ic_write_lock fileguard (m_filemutex);
m_fingerprints.clear (); // Clear fingerprint database
}
clear_fingerprints ();

// Mark the per-thread microcaches as invalid
lock_guard lock (m_perthread_info_mutex);
Expand Down
49 changes: 29 additions & 20 deletions src/libtexture/imagecache_pvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,22 +211,20 @@ class ImageCacheFile : public RefCnt {
SubimageInfo &subimageinfo (int subimage) { return m_subimages[subimage]; }

const LevelInfo &levelinfo (int subimage, int miplevel) const {
DASSERT (validspec());
DASSERT ((int)m_subimages.size() > subimage);
DASSERT ((int)m_subimages[subimage].levels.size() > miplevel);
return m_subimages[subimage].levels[miplevel];
}
LevelInfo &levelinfo (int subimage, int miplevel) {
DASSERT (validspec());
DASSERT ((int)m_subimages.size() > subimage);
DASSERT ((int)m_subimages[subimage].levels.size() > miplevel);
return m_subimages[subimage].levels[miplevel];
}

/// Do we currently have a valid spec?
bool validspec () const {
DASSERT ((m_subimages.size() > 0) == m_validspec &&
"subimages array size doesn't match validity");
DASSERT ((m_validspec == false || m_subimages.size() > 0) &&
"validspec is true, but subimages are empty");
return m_validspec;
}

Expand Down Expand Up @@ -262,7 +260,7 @@ class ImageCacheFile : public RefCnt {
size_t m_timesopened; ///< Separate times we opened this file
double m_iotime; ///< I/O time for this file
bool m_mipused; ///< MIP level >0 accessed
bool m_validspec; ///< If false, reread spec upon open
volatile bool m_validspec; ///< If false, reread spec upon open
ImageCacheImpl &m_imagecache; ///< Back pointer for ImageCache
mutable recursive_mutex m_input_mutex; ///< Mutex protecting the ImageInput
std::time_t m_mod_time; ///< Time file was last updated
Expand Down Expand Up @@ -871,6 +869,22 @@ class ImageCacheImpl : public ImageCache {
std::string onefile_stat_line (const ImageCacheFileRef &file,
int i, bool includestats=true) const;

/// Search the fingerprint table for the given fingerprint. If it
/// doesn't already have an entry in the fingerprint map, then add
/// one, mapping the it to file. In either case, return the file it
/// maps to (the caller can tell if it was newly added to the table
/// by whether the return value is the same as the passed-in file).
/// All the while, properly maintain thread safety on the
/// fingerprint table.
ImageCacheFile *find_fingerprint (ustring finger, ImageCacheFile *file);

/// Clear the fingerprint list, thread-safe.
void clear_fingerprints ();

typedef spin_mutex ic_mutex;
typedef spin_lock ic_read_lock;
typedef spin_lock ic_write_lock;

thread_specific_ptr< ImageCachePerThreadInfo > m_perthread_info;
std::vector<ImageCachePerThreadInfo *> m_all_perthread_info;
static mutex m_perthread_info_mutex; ///< Thread safety for perthread
Expand All @@ -886,33 +900,28 @@ class ImageCacheImpl : public ImageCache {
int m_failure_retries; ///< Times to re-try disk failures
Imath::M44f m_Mw2c; ///< world-to-"common" matrix
Imath::M44f m_Mc2w; ///< common-to-world matrix

mutable ic_mutex m_filemutex; ///< Thread safety for file cache
FilenameMap m_files; ///< Map file names to ImageCacheFile's
FilenameMap::iterator m_file_sweep; ///< Sweeper for "clock" paging algorithm
ImageCachePerThreadInfo *m_filemutex_holder; // debugging

spin_mutex m_fingerprints_mutex; ///< Protect m_fingerprints
FilenameMap m_fingerprints; ///< Map fingerprints to files

mutable ic_mutex m_tilemutex; ///< Thread safety for tile cache
TileCache m_tilecache; ///< Our in-memory tile cache
TileCache::iterator m_tile_sweep; ///< Sweeper for "clock" paging algorithm
ImageCachePerThreadInfo *m_tilemutex_holder; // debugging

atomic_ll m_mem_used; ///< Memory being used for tiles
int m_statslevel; ///< Statistics level

/// Saved error string, per-thread
///
mutable thread_specific_ptr< std::string > m_errormessage;
#if 0
// This approach uses regular shared mutexes to protect the caches.
typedef shared_mutex ic_mutex;
typedef shared_lock ic_read_lock;
typedef unique_lock ic_write_lock;
#else
// This alternate approach uses spin locks.
typedef spin_mutex ic_mutex;
typedef spin_lock ic_read_lock;
typedef spin_lock ic_write_lock;
#endif
mutable ic_mutex m_filemutex; ///< Thread safety for file cache
mutable ic_mutex m_tilemutex; ///< Thread safety for tile cache

// For debugging -- keep track of who holds the tile and file mutex
ImageCachePerThreadInfo *m_tilemutex_holder;
ImageCachePerThreadInfo *m_filemutex_holder;

private:
// Statistics that are really hard to track per-thread
Expand Down
Loading