symspell6.h

#ifndef SYMSPELL6_H
#define SYMSPELL6_H

#include <stdint.h>
#include <vector>
#include <functional>
#include <string>
#include <cstring>
#include <exception>
#include <limits>
#include <stdio.h>
#include <algorithm>
#include <queue>
#include <mutex>
#include <tuple>
#include <iostream>
#include <fstream>
#include <sstream>

#define _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS 1

#ifdef _MSC_VER
#   include <windows/port.h>
 //typedef __int8 int8_t;
typedef unsigned __int8 u_int8_t;

typedef __int32 int32_t;
typedef unsigned __int32 u_int32_t;

typedef __int64 int64_t;
typedef unsigned __int64 u_int64_t;

char *strndup(const char *s1, size_t n)
{
    char *copy = (char*)malloc(n + 1);
    memcpy(copy, s1, n);
    copy[n] = 0;
    return copy;
};
#else
#   define _strdup strdup
#endif

char *dupstring(const char *string)
{
  size_t len = strlen(string);
  char *new_string = new char[len+1]();
  memcpy(new_string,string,len+1);
  return new_string;
}

#ifdef USE_SPARSEPP
/* #   define SPP_USE_SPP_ALLOC 1 */
#   define CUSTOM_MAP sparse_hash_map
#   define CUSTOM_SET sparse_hash_set
#   include <sparsepp/spp.h>

using spp::sparse_hash_map;
using spp::sparse_hash_set;
#else
#   define CUSTOM_MAP unordered_map
#   define CUSTOM_SET unordered_set
#   include <unordered_map>
#   include <unordered_set>
#endif

using namespace std;

namespace symspell {
#define defaultMaxEditDistance 2
#define defaultPrefixLength 7
#define defaultCountThreshold 1
#define defaultInitialCapacity 16
#define defaultCompactLevel 5
#define mini(a, b, c) (min(a, min(b, c)))

    namespace {
	int32_t findCharLocation(const char * text, char ch)
	{
	    const char* finded = strchr(text, ch);
	    if (finded == nullptr)
		return -1;

	    return finded - text + 1;
	}
    }

    struct Hash64 {
	size_t operator()(uint64_t k) const { return (k ^ 14695981039346656037ULL) * 1099511628211ULL; }
    };

    struct comp_c_string {
	bool operator()(const char *s1, const char *s2) const {
	    return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0);
	}
    };

    struct hash_c_string {
	void hash_combine(size_t& seed, const char& v)
	{
	    seed ^= v + 0x9e3779b9 + (seed << 6) + (seed >> 2);
	}

	std::size_t operator() (const char* p) const
	{
	    size_t hash = 0;
	    for (; *p; ++p)
		hash ^= *p + 0x9e3779b9 + (hash << 6) + (hash >> 2);
	    return hash;
	}
    };

    /*
 * Copied from https://github.com/PierreBoyeau/levenshtein_distance
 * ########## BEGIN ##########
 */

    int levenshtein_dist(char const* word1, char const* word2) {
	///
	///  Please use lower-case strings
	/// word1 : first word
	/// word2 : second word
	/// getPath : bool. If True, sequence of operations to do to go from
	///	      word1 to word2
	///
	int size1 = strlen(word1) + 1, size2 = strlen(word2) + 1;
	int suppr_dist, insert_dist, subs_dist;
	int* dist = new int[(size1)*size2];

	for (int i = 0; i < size1; ++i)
	    dist[size2*i] = i;
	for (int j = 0; j < size2; ++j)
	    dist[j] = j;
	for (int i = 1; i < size1; ++i) {
	    for (int j = 1; j < size2; ++j) {
		suppr_dist = dist[size2*(i - 1) + j] + 1;
		insert_dist = dist[size2*i + j - 1] + 1;
		subs_dist = dist[size2*(i - 1) + j - 1];
		if (word1[i - 1] != word2[j - 1]) {  // word indexes are implemented differently.
		    subs_dist += 1;
		}
		dist[size2*i + j] = mini(suppr_dist, insert_dist, subs_dist);
	    }
	}

	// --------------------------------------------------------
	int res = dist[size1 * size2 - 1];
	delete[] dist;
	return(res);
    }

    int dl_dist(char const* word1, char const* word2) {
	/// Damerau-Levenshtein distance
	///  Please use lower-case strings
	/// word1 : first word
	/// word2 : second word
	///
	int size1 = strlen(word1) + 1, size2 = strlen(word2) + 1;
	int suppr_dist, insert_dist, subs_dist, val;
	int* dist = new int[size1*size2];

	for (int i = 0; i < size1; ++i)
	    dist[size2*i] = i;
	for (int j = 0; j < size2; ++j)
	    dist[j] = j;
	for (int i = 1; i < size1; ++i) {
	    for (int j = 1; j < size2; ++j) {
		suppr_dist = dist[size2*(i - 1) + j] + 1;
		insert_dist = dist[size2*i + j - 1] + 1;
		subs_dist = dist[size2*(i - 1) + j - 1];
		if (word1[i - 1] != word2[j - 1])  // word indexes are implemented differently.
		    subs_dist += 1;
		val = mini(suppr_dist, insert_dist, subs_dist);
		if (((i >= 2) && (j >= 2)) && ((word1[i - 1] == word2[j - 2]) && (word1[i - 2] == word2[j - 1])))
		    val = min(dist[size2*(i - 2) + j - 2] + 1, val);
		dist[size2*i + j] = val;
	    }
	}

	int res = dist[size1*size2 - 1];
	delete[] dist;
	return(res);
    }

    /*
	 * ########## END ##########
	 */

    class SuggestionStage;
    template<typename T> class ChunkArray;
    enum class Verbosity
    {
	/// <summary>Top suggestion with the highest term frequency of the suggestions of smallest edit distance found.</summary>
	Top,
	/// <summary>All suggestions of smallest edit distance found, suggestions ordered by term frequency.</summary>
	Closest,
	/// <summary>All suggestions within maxEditDistance, suggestions ordered by edit distance
	/// , then by term frequency (slower, no early termination).</summary>
	All
    };

    class EditDistance {
    public:
	/// <summary>Wrapper for third party edit distance algorithms.</summary>

	/// <summary>Supported edit distance algorithms.</summary>
	enum class DistanceAlgorithm {
	    /// <summary>Levenshtein algorithm.</summary>
	    Levenshtein,
	    /// <summary>Damerau optimal string alignment algorithm.</summary>
	    DamerauOSA
	};

	EditDistance(DistanceAlgorithm algorithm) {
	    this->algorithm = algorithm;
	    switch (algorithm) {
	    case DistanceAlgorithm::DamerauOSA: this->distanceComparer = dl_dist; break;
	    case DistanceAlgorithm::Levenshtein: this->distanceComparer = levenshtein_dist; break;
	    default: throw std::invalid_argument("Unknown distance algorithm.");
	    }
	}

	int Compare(char const* string1, char const* string2, int maxDistance) {
	    return this->distanceComparer(string1, string2); // todo: max distance
	}

    private:
	DistanceAlgorithm algorithm;
	int(*distanceComparer)(char const*, char const*);
    };

    class SuggestItem
    {
    public:
	/// <summary>The suggested correctly spelled word.</summary>
	const char* term;
	/// <summary>Edit distance between searched for word and suggestion.</summary>
	u_int8_t distance = 0;
	/// <summary>Frequency of suggestion in the dictionary (a measure of how common the word is).</summary>
	int64_t count = 0;

	SuggestItem() { }
	SuggestItem(const symspell::SuggestItem & p)
	{
	    this->count = p.count;
	    this->distance = p.distance;
	    this->term = p.term;
	}

	SuggestItem(const char* term, int32_t distance, int64_t count)
	{
	    this->term = term;
	    this->distance = distance;
	    this->count = count;
	}

	~SuggestItem()
	{
	    delete[] term;
	}

	bool CompareTo(SuggestItem const& other)
	{
	    // order by distance ascending, then by frequency count descending
	    if (this->distance == other.distance)
	    {
		if (other.count == this->count)
		    return false;
		else if (other.count > this->count)
		    return true;
		return false;
	    }

	    if (other.distance > this->distance)
		return false;
	    return true;
	}

	bool operator == (const SuggestItem &ref) const
	{
	    return strcmp(this->term, ref.term) == 0;
	}

	std::size_t GetHashCode()
	{
	    return hash_c_string{}(term);
	}

	SuggestItem& ShallowCopy()
	{
	    SuggestItem item;
	    item.count = this->count;
	    item.distance = this->distance;
	    item.term = this->term;

	    return item;
	}
    };

    class WordSegmentationItem
    {
    public:
	const char* segmentedString{ nullptr };
	const char* correctedString{ nullptr };
	u_int8_t distanceSum = 0;
	double probabilityLogSum = 0;

	WordSegmentationItem() { }
	WordSegmentationItem(const symspell::WordSegmentationItem & p)
	{
	    this->segmentedString = p.segmentedString;
	    this->correctedString = p.correctedString;
	    this->distanceSum = p.distanceSum;
	    this->probabilityLogSum = p.probabilityLogSum;
	}

	WordSegmentationItem& operator=(const WordSegmentationItem&) { return *this; }
	WordSegmentationItem& operator=(WordSegmentationItem&&) { return *this; }

	void set(const char* pSegmentedString, const char* pCorrectedString, u_int8_t pDistanceSum, double pProbabilityLogSum)
	{
	    this->segmentedString = pSegmentedString;
	    this->correctedString = pCorrectedString;
	    this->distanceSum = pDistanceSum;
	    this->probabilityLogSum = pProbabilityLogSum;
	}

	~WordSegmentationItem()
	{
	    delete[] segmentedString;
	    delete[] correctedString;
	}
    };

    template<typename T>
    class ChunkArray
    {
    public:
	vector<vector<T>> Values; //todo: use pointer array
	size_t Count;

	ChunkArray()
	{
	    Count = 0;
	}

	void Reserve(size_t initialCapacity)
	{
	    size_t chunks = (initialCapacity + ChunkSize - 1) / ChunkSize;
	    Values.resize(chunks);
	    for (size_t i = 0; i < chunks; ++i)
	    {
		Values[i].resize(ChunkSize);
	    }
	}

	size_t Add(T & value)
	{
	    if (Count == Capacity())
	    {
		Values.push_back(vector<T>());
		Values[Values.size() - 1].resize(ChunkSize);
	    }

	    int row = Row(Count);
	    int col = Col(Count);

	    Values[row][col] = value;
	    return Count++;
	}

	void Clear()
	{
	    Count = 0;
	}

	T& at(size_t index)
	{
	    return Values[Row(index)][Col(index)];
	}

	void set(size_t index, T &value)
	{
	    Values[Row(index)][Col(index)] = value;
	}

    private:
	const int32_t ChunkSize = 4096; //this must be a power of 2, otherwise can't optimize Row and Col functions
	const int32_t DivShift = 12; // number of bits to shift right to do division by ChunkSize (the bit position of ChunkSize)
	int Row(uint32_t index) { return index >> DivShift; } // same as index / ChunkSize
	int32_t Col(uint32_t index) { return index & (ChunkSize - 1); } //same as index % ChunkSize
	int32_t Capacity() { return Values.size() * ChunkSize; }
    };


    class SuggestionStage
    {
    public:
	class Node;
	class Entry;

	CUSTOM_MAP<size_t, Entry*> Deletes;
	CUSTOM_MAP<size_t, Entry*>::iterator DeletesEnd;

	ChunkArray<Node> Nodes;
	SuggestionStage(size_t initialCapacity)
	{
#ifdef USE_SPARSEPP
	    Deletes.resize(initialCapacity);
#else
	    Deletes.reserve(initialCapacity);
#endif
	    Nodes.Reserve(initialCapacity * 2);
	}
	size_t DeleteCount() { return Deletes.size(); }
	size_t NodeCount() { return Nodes.Count; }
	void Clear()
	{
	    Deletes.clear();
	    Nodes.Clear();

	    DeletesEnd = Deletes.end();
	}

	void Add(size_t deleteHash, const char* suggestion)
	{
	    auto deletesFinded = Deletes.find(deleteHash);
	    Entry* entry = nullptr;
	    if (deletesFinded == DeletesEnd) {
		entry = new Entry;
		entry->count = 0;
		entry->first = -1;
	    }
	    else
		entry = deletesFinded->second;

	    int64_t next = entry->first;
	    ++entry->count;
	    entry->first = Nodes.Count;
	    Deletes[deleteHash] = entry;
	    Node item;
	    item.next = next;
	    item.suggestion = suggestion;
	    Nodes.Add(item);
	}

	void CommitTo(CUSTOM_MAP<size_t, vector<const char*>> & permanentDeletes)
	{
	    auto permanentDeletesEnd = permanentDeletes.end();
	    for (auto it = Deletes.begin(); it != DeletesEnd; ++it)
	    {
		auto permanentDeletesFinded = permanentDeletes.find(it->first);
		vector<const char*>* suggestions = nullptr;
		size_t i;
		if (permanentDeletesFinded != permanentDeletesEnd)
		{
		    suggestions = &permanentDeletesFinded->second;
		    i = suggestions->size();
		    vector<const char*> newSuggestions;
		    newSuggestions.reserve(suggestions->size() + it->second->count);

		    std::copy(suggestions->begin(), suggestions->end(), back_inserter(newSuggestions));
		    permanentDeletes[it->first] = newSuggestions;
		}
		else
		{
		    i = 0;
		    suggestions = new vector<const char*>;
		    int32_t count = it->second->count;
		    suggestions->reserve(count);
		    permanentDeletes[it->first] = *suggestions;
		}

		int next = it->second->first;
		while (next >= 0)
		{
		    auto node = Nodes.at(next);
		    (*suggestions)[i] = node.suggestion;
		    next = node.next;
		    ++i;
		}
	    }
	}

    public:
	class Node
	{
	public:
	    const char* suggestion;
	    int64_t next;
	};

	class Entry
	{
	public:
	    int64_t count;
	    int64_t first;
	};
    };


    class SymSpell {
    public:
	SymSpell(int32_t initialCapacity = defaultInitialCapacity, int32_t maxDictionaryEditDistance = defaultMaxEditDistance, int32_t prefixLength = defaultPrefixLength, int32_t countThreshold = defaultCountThreshold, int32_t compactLevel = defaultCompactLevel)
	{
	    if (initialCapacity < 0) throw std::invalid_argument("initialCapacity");
	    if (maxDictionaryEditDistance < 0) throw std::invalid_argument("maxDictionaryEditDistance");
	    if (prefixLength < 1 || prefixLength <= maxDictionaryEditDistance) throw std::invalid_argument("prefixLength");
	    if (countThreshold < 0) throw std::invalid_argument("countThreshold");
	    if (compactLevel > 16) throw std::invalid_argument("compactLevel");

#ifdef USE_SPARSEPP
	    this->words.resize(initialCapacity);
	    this->deletes.resize(initialCapacity);
#else
	    this->words.reserve(initialCapacity);
	    this->deletes.reserve(initialCapacity);
#endif
	    this->initialCapacity = initialCapacity;
	    this->distanceComparer = new EditDistance(this->distanceAlgorithm);

	    this->maxDictionaryEditDistance = maxDictionaryEditDistance;
	    this->prefixLength = prefixLength;
	    this->countThreshold = countThreshold;
	    if (compactLevel > 16) compactLevel = 16;
	    this->compactMask = ((std::numeric_limits<uint32_t>::max)() >> (3 + compactLevel)) << 2;
	    this->deletesEnd = this->deletes.end();
	    this->wordsEnd = this->words.end();
	    this->belowThresholdWordsEnd = this->belowThresholdWords.end();
	    this->candidates.reserve(32);
	    //this->maxDictionaryWordLength = 0;
	}

	~SymSpell()
	{
	    vector<const char*>::iterator vecEnd;
	    auto deletesEnd = this->deletes.end();
	    for (auto it = this->deletes.begin(); it != deletesEnd; ++it)
	    {
		vecEnd = it->second.end();
		for (auto vecIt = it->second.begin(); vecIt != vecEnd; ++vecIt)
		{
		    delete[] * vecIt;
		}
	    }

	    delete this->distanceComparer;
	}

	bool CreateDictionaryEntry(const char * key, int64_t count, SuggestionStage * staging = nullptr)
	{
	    int keyLen = strlen(key);
	    if (count <= 0)
	    {
		if (this->countThreshold > 0) return false; // no point doing anything if count is zero, as it can't change anything
		count = 0;
	    }

	    int64_t countPrevious = -1;
	    auto belowThresholdWordsFinded = belowThresholdWords.find(key);
	    auto wordsFinded = words.find(key);

	    // look first in below threshold words, update count, and allow promotion to correct spelling word if count reaches threshold
	    // threshold must be >1 for there to be the possibility of low threshold words
	    if (countThreshold > 1 && belowThresholdWordsFinded != belowThresholdWordsEnd)
	    {
		countPrevious = belowThresholdWordsFinded->second;
		// calculate new count for below threshold word
		count = ((std::numeric_limits<int64_t>::max)() - countPrevious > count) ? countPrevious + count : (std::numeric_limits<int64_t>::max)();
		// has reached threshold - remove from below threshold collection (it will be added to correct words below)
		if (count >= countThreshold)
		{
		    belowThresholdWords.erase(key);
		    belowThresholdWordsEnd = belowThresholdWords.end();
		}
		else
		{
		    belowThresholdWords[key] = count;
		    belowThresholdWordsEnd = belowThresholdWords.end();
		    return false;
		}
	    }
	    else if (wordsFinded != wordsEnd)
	    {
		countPrevious = wordsFinded->second;
		count = ((std::numeric_limits<int64_t>::max)() - countPrevious > count) ? countPrevious + count : (std::numeric_limits<int64_t>::max)();
		words[key] = count;
		return false;
	    }
	    else if (count < CountThreshold())
	    {
		belowThresholdWords[key] = count;
		belowThresholdWordsEnd = belowThresholdWords.end();
		return false;
	    }

	    words[key] = count;

	    wordsEnd = words.end();

	    if (keyLen > maxDictionaryWordLength)
		maxDictionaryWordLength = keyLen;

	    EditsPrefix(key, edits);

	    if (staging != nullptr)
	    {
		auto editsEnd = edits.end();
		for (auto it = edits.begin(); it != editsEnd; ++it)
		{
		    staging->Add(*it, dupstring(key));
		}
	    }
	    else
	    {
		auto editsEnd = edits.end();
		for (auto it = edits.begin(); it != editsEnd; ++it)
		{
		    size_t deleteHash = *it;
		    auto deletesFinded = deletes.find(deleteHash);
		    if (deletesFinded != deletesEnd)
		    {
			char* tmp = new char[keyLen + 1];
			std::memcpy(tmp, key, keyLen);
			tmp[keyLen] = '\0';

			//delete[] deletes[deleteHash][deletesFinded->second.size() - 1];
			deletes[deleteHash].push_back(tmp);
			deletesEnd = deletes.end();
		    }
		    else
		    {
			char* tmp = new char[keyLen + 1];
			std::memcpy(tmp, key, keyLen);
			tmp[keyLen] = '\0';

			deletes[deleteHash] = vector<const char*>();
			//deletes[deleteHash].resize(1);
			deletes[deleteHash].push_back(tmp);
			deletesEnd = deletes.end();
		    }
		}
	    }

	    edits.clear();
	    return true;
	}

	void EditsPrefix(const char* key, CUSTOM_SET<size_t>& hashSet)
	{
	    size_t len = strlen(key);
	    char* tmp = nullptr;
	    /*if (len <= maxDictionaryEditDistance) //todo fix
		hashSet.insert("");*/

	    if (len > prefixLength)
	    {
		tmp = new char[prefixLength + 1];
		std::memcpy(tmp, key, prefixLength);
		tmp[prefixLength] = '\0';
	    }
	    else
	    {
		tmp = new char[len + 1];
		std::memcpy(tmp, key, len);
		tmp[len] = '\0';
	    }

	    hashSet.insert(stringHash(tmp));
	    Edits(tmp, 0, hashSet);
	}

	void Edits(const char * word, int32_t editDistance, CUSTOM_SET<size_t> & deleteWords)
	{
	    auto deleteWordsEnd = deleteWords.end();
	    ++editDistance;
	    size_t wordLen = strlen(word);
	    if (wordLen > 1)
	    {
		for (size_t i = 0; i < wordLen; ++i)
		{
		    char* tmp = new char[wordLen];
		    std::memcpy(tmp, word, i);
		    std::memcpy(tmp + i, word + i + 1, wordLen - 1 - i);
		    tmp[wordLen - 1] = '\0';

		    if (deleteWords.insert(stringHash(tmp)).second)
		    {
			//recursion, if maximum edit distance not yet reached
			if (editDistance < maxDictionaryEditDistance && (wordLen - 1) > 1)
			    Edits(tmp, editDistance, deleteWords);
		    }
		    else {
			delete[] tmp;
		    }
		}
	    }
	}

	void PurgeBelowThresholdWords()
	{
	    belowThresholdWords.clear();
	    belowThresholdWordsEnd = belowThresholdWords.end();
	}

	void CommitStaged(SuggestionStage staging)
	{
	    staging.CommitTo(deletes);
	}

	void Lookup(const char * input, Verbosity verbosity, vector<std::unique_ptr<symspell::SuggestItem>> & items)
	{
	    this->Lookup(input, verbosity, this->maxDictionaryEditDistance, false, items);
	}

	void Lookup(const char * input, Verbosity verbosity, int32_t maxEditDistance, vector<std::unique_ptr<symspell::SuggestItem>> & items)
	{
	    this->Lookup(input, verbosity, maxEditDistance, false, items);
	}

	void Lookup(const char * input, Verbosity verbosity, int32_t maxEditDistance, bool includeUnknown, vector<std::unique_ptr<symspell::SuggestItem>> & suggestions)
	{
	    mtx.lock();
	    suggestions.clear();
	    edits.clear();
	    candidates.reserve(32);

	    //verbosity=Top: the suggestion with the highest term frequency of the suggestions of smallest edit distance found
	    //verbosity=Closest: all suggestions of smallest edit distance found, the suggestions are ordered by term frequency
	    //verbosity=All: all suggestions <= maxEditDistance, the suggestions are ordered by edit distance, then by term frequency (slower, no early termination)

	    // maxEditDistance used in Lookup can't be bigger than the maxDictionaryEditDistance
	    // used to construct the underlying dictionary structure.
	    if (maxEditDistance > MaxDictionaryEditDistance())	throw std::invalid_argument("maxEditDistance");
	    int64_t suggestionCount = 0;
	    size_t suggestionsLen = 0;
	    auto wordsFinded = words.find(input);
	    int inputLen = strlen(input);
	    // early exit - word is too big to possibly match any words
	    if (inputLen - maxEditDistance > maxDictionaryWordLength)
	    {
		if (includeUnknown && (suggestionsLen == 0))
		{
		    std::unique_ptr<SuggestItem> unq(new SuggestItem(dupstring(input), maxEditDistance + 1, 0));
		    suggestions.push_back(std::move(unq));
		}

		mtx.unlock();
		return;
	    }

	    // quick look for exact match

	    if (wordsFinded != wordsEnd)
	    {
		suggestionCount = wordsFinded->second;

		{
		    std::unique_ptr<SuggestItem> unq(new SuggestItem(dupstring(input), 0, suggestionCount));
		    suggestions.push_back(std::move(unq));
		}

		++suggestionsLen;
		// early exit - return exact match, unless caller wants all matches
		if (verbosity != Verbosity::All)
		{
		    if (includeUnknown && (suggestionsLen == 0))
		    {
			std::unique_ptr<SuggestItem> unq(new SuggestItem(dupstring(input), maxEditDistance + 1, 0));
			suggestions.push_back(std::move(unq));
			++suggestionsLen;
		    }

		    mtx.unlock();
		    return;
		}
	    }

	    //early termination, if we only want to check if word in dictionary or get its frequency e.g. for word segmentation
	    if (maxEditDistance == 0)
	    {
		if (includeUnknown && (suggestionsLen == 0))
		{
		    std::unique_ptr<SuggestItem> unq(new SuggestItem(dupstring(input), maxEditDistance + 1, 0));
		    suggestions.push_back(std::move(unq));

		    ++suggestionsLen;
		}

		mtx.unlock();
		return;
	    }


	    hashset2.insert(stringHash(input));

	    int maxEditDistance2 = maxEditDistance;
	    int candidatePointer = 0;

	    //add original prefix
	    int inputPrefixLen = inputLen;
	    if (inputPrefixLen > prefixLength)
	    {
		inputPrefixLen = prefixLength;
		candidates.push_back(strndup(input, inputPrefixLen));
	    }
	    else
	    {
		candidates.push_back(dupstring(input));
	    }

	    size_t candidatesLen = 1; // candidates.size();
	    while (candidatePointer < candidatesLen)
	    {
		const char* candidate = candidates[candidatePointer++];
		int candidateLen = strlen(candidate);
		int lengthDiff = inputPrefixLen - candidateLen;

		//save some time - early termination
		//if canddate distance is already higher than suggestion distance, than there are no better suggestions to be expected
		if (lengthDiff > maxEditDistance2)
		{
		    // skip to next candidate if Verbosity.All, look no further if Verbosity.Top or Closest
		    // (candidates are ordered by delete distance, so none are closer than current)
		    if (verbosity == Verbosity::All) continue;
		    break;
		}

		auto deletesFinded = deletes.find(stringHash(candidate));
		vector<const char*>* dictSuggestions = nullptr;

		//read candidate entry from dictionary
		if (deletesFinded != deletesEnd)
		{
		    dictSuggestions = &deletesFinded->second;
		    size_t dictSuggestionsLen = dictSuggestions->size();
		    //iterate through suggestions (to other correct dictionary items) of delete item and add them to suggestion list
		    for (int i = 0; i < dictSuggestionsLen; ++i)
		    {
			const char* suggestion = dictSuggestions->at(i);
			int suggestionLen = strlen(suggestion);
			if (strcmp(suggestion, input) == 0) continue;
			if ((abs(suggestionLen - inputLen) > maxEditDistance2) // input and sugg lengths diff > allowed/current best distance
			    || (suggestionLen < candidateLen) // sugg must be for a different delete string, in same bin only because of hash collision
			    || (suggestionLen == candidateLen && strcmp(suggestion, candidate) != 0)) // if sugg len = delete len, then it either equals delete or is in same bin only because of hash collision
			    continue;
			auto suggPrefixLen = min(suggestionLen, prefixLength);
			if (suggPrefixLen > inputPrefixLen && (suggPrefixLen - candidateLen) > maxEditDistance2) continue;

			//True Damerau-Levenshtein Edit Distance: adjust distance, if both distances>0
			//We allow simultaneous edits (deletes) of maxEditDistance on on both the dictionary and the input term.
			//For replaces and adjacent transposes the resulting edit distance stays <= maxEditDistance.
			//For inserts and deletes the resulting edit distance might exceed maxEditDistance.
			//To prevent suggestions of a higher edit distance, we need to calculate the resulting edit distance, if there are simultaneous edits on both sides.
			//Example: (bank==bnak and bank==bink, but bank!=kanb and bank!=xban and bank!=baxn for maxEditDistance=1)
			//Two deletes on each side of a pair makes them all equal, but the first two pairs have edit distance=1, the others edit distance=2.
			int distance = 0;
			int _min = 0;
			if (candidateLen == 0)
			{
			    //suggestions which have no common chars with input (inputLen<=maxEditDistance && suggestionLen<=maxEditDistance)
			    distance = max(inputLen, suggestionLen);
			    if (distance > maxEditDistance2 || !hashset2.insert(stringHash(suggestion)).second)
				continue;
			}
			else if (suggestionLen == 1)
			{
			    if (findCharLocation(input, suggestion[0]) < 0) distance = inputLen; else distance = inputLen - 1;
			    distance = max(inputLen, suggestionLen);
			    if (distance > maxEditDistance2 || !hashset2.insert(stringHash(suggestion)).second)
				continue;
			}
			else
			    if ((prefixLength - maxEditDistance == candidateLen)
				&& (((_min = min(inputLen, suggestionLen) - prefixLength) > 1)
				    && (std::strncmp(input, suggestion, max(inputLen + 1 - _min, suggestionLen + 1 - _min)) != 0) /*(input.substr(inputLen + 1 - _min) != suggestion.substr(suggestionLen + 1 - _min))*/)
				|| ((_min > 0) && (input[inputLen - _min] != suggestion[suggestionLen - _min])
				    && ((input[inputLen - _min - 1] != suggestion[suggestionLen - _min])
					|| (input[inputLen - _min] != suggestion[suggestionLen - _min - 1]))))
			    {
				continue;
			    }
			    else
			    {
				if ((verbosity != Verbosity::All && !DeleteInSuggestionPrefix(candidate, candidateLen, suggestion, suggestionLen)) ||
				    !hashset2.insert(stringHash(suggestion)).second) continue;

				distance = distanceComparer->Compare(input, suggestion, maxEditDistance2);
				if (distance < 0) continue;
			    }

			if (distance <= maxEditDistance2)
			{
			    suggestionCount = words[suggestion];

			    std::unique_ptr<SuggestItem> si(new SuggestItem(dupstring(suggestion), distance, suggestionCount));

			    if (suggestionsLen > 0)
			    {
				switch (verbosity)
				{
				case Verbosity::Closest:
				{
				    //we will calculate DamLev distance only to the smallest found distance so far
				    if (distance < maxEditDistance2)
				    {
					suggestions.clear();
					suggestionsLen = 0;
				    }
				    break;
				}
				case Verbosity::Top:
				{
				    if (distance < maxEditDistance2 || suggestionCount > suggestions[0]->count)
				    {
					maxEditDistance2 = distance;
					suggestions[0] = std::move(si);
				    }
				    continue;
				}
				case Verbosity::All:
				{
				    break;
				}
				}
			    }
			    if (verbosity != Verbosity::All) maxEditDistance2 = distance;
			    suggestions.push_back(std::move(si));
			    ++suggestionsLen;
			}
		    }//end foreach
		}//end if

		//add edits
		//derive edits (deletes) from candidate (input) and add them to candidates list
		//this is a recursive process until the maximum edit distance has been reached
		if ((lengthDiff < maxEditDistance) && (candidateLen <= prefixLength))
		{
		    //save some time
		    //do not create edits with edit distance smaller than suggestions already found
		    if (verbosity != Verbosity::All && lengthDiff >= maxEditDistance2) continue;

		    for (int i = 0; i < candidateLen; ++i)
		    {
			char* tmp = new char[candidateLen];
			std::memcpy(tmp, candidate, i);
			std::memcpy(tmp + i, candidate + i + 1, candidateLen - 1 - i);
			tmp[candidateLen - 1] = '\0';

			if (hashset1.insert(stringHash(tmp)).second)
			{
			    candidates.push_back(tmp);
			    ++candidatesLen;
			}
			else
			    delete[] tmp;
		    }
		}
	    }//end while

	    //sort by ascending edit distance, then by descending word frequency
	    if (suggestionsLen > 1)
		std::sort(suggestions.begin(), suggestions.end(), [](std::unique_ptr<symspell::SuggestItem> &l, std::unique_ptr<symspell::SuggestItem> & r)
	    {
		return r->CompareTo(*l);
	    });


	    //cleaning

	    //std::cout << hashset2.size() << std::endl;

	    auto candidatesEnd = candidates.end();
	    for (auto it = candidates.begin(); it != candidatesEnd; ++it)
		delete[] * it;


	    candidates.clear();
	    hashset1.clear();
	    hashset2.clear();

	    mtx.unlock();

	}//end if

	bool LoadDictionary(char* corpus, int termIndex, int countIndex)
	{
	    ifstream stream;
	    stream.open(corpus);
	    if (!stream.is_open())
		return false;

	    char a, b, c;
	    a = stream.get();
	    b = stream.get();
	    c = stream.get();
	    if (a != (char)0xEF || b != (char)0xBB || c != (char)0xBF) {
		stream.seekg(0);
	    }

	    SuggestionStage staging(16384);

	    string line;
	    while (getline(stream, line))
	    {
		vector<const char*> lineParts;
		std::stringstream ss(line);
		std::string token;
		while (std::getline(ss, token, ' ')) {
		    size_t len = token.size();
		    char* tmp = new char[len + 1];
		    std::memcpy(tmp, token.c_str(), len);
		    tmp[len] = '\0';
		    lineParts.push_back(tmp);
		}

		if (lineParts.size() >= 2)
		{
		    int64_t count = stoll(lineParts[countIndex]);
		    CreateDictionaryEntry(lineParts[termIndex], count/*, &staging*/);
		}

		auto linePartsEnd = lineParts.end();
		for (auto it = lineParts.begin(); it != linePartsEnd; ++it)
		    delete[] * it;
	    }

	    stream.close();


	    //CommitStaged(staging);
	    return true;
	}

	void rempaceSpaces(char* source)
	{
	    char* i = source;
	    char* j = source;

	    do
	    {
		*i = *j;
		if (*i != ' ')
		    ++i;
	    } while (*j++ != 0);
	}

	shared_ptr<WordSegmentationItem> WordSegmentation(const char* input)
	{
	    return WordSegmentation(input, this->maxDictionaryEditDistance, this->maxDictionaryWordLength);
	}

	shared_ptr<WordSegmentationItem> WordSegmentation(const char* input, size_t maxEditDistance)
	{
	    return WordSegmentation(input, maxEditDistance, this->maxDictionaryWordLength);
	}

	shared_ptr<WordSegmentationItem> WordSegmentation(const char* input, size_t maxEditDistance, size_t maxSegmentationWordLength)
	{
	    size_t inputLen = strlen(input);
	    int arraySize = min(maxSegmentationWordLength, strlen(input));
	    std::vector<shared_ptr<WordSegmentationItem>> compositions;
	    compositions.resize(arraySize);

	    for (size_t i = 0; i < arraySize; ++i)
	    {
		std::shared_ptr<WordSegmentationItem> unq(new WordSegmentationItem());
		compositions[i] = std::move(unq);
	    }

	    int circularIndex = -1;

	    for (int j = 0; j < inputLen; ++j)
	    {
		//inner loop (row): all possible part lengths (from start position): part can't be bigger than longest word in dictionary (other than long unknown word)
		int imax = min(inputLen - j, maxSegmentationWordLength);
		for (int i = 1; i <= imax; ++i)
		{
		    char* part = new char[i + 1];
		    std::memcpy(part, input + j, i);
		    part[i] = '\0';

		    int separatorLength = 0;
		    int topEd = 0;
		    double topProbabilityLog = 0;
		    char* topResult = nullptr;

		    if (isspace(part[0]))
		    {
			size_t partLen = strlen(part);
			char* tmp = new char[partLen];
			std::memcpy(tmp, part + 1, partLen - 1);
			tmp[(i - j)] = '\0';

			delete[] part;
			part = tmp;
		    }
		    else
		    {
			//add ed+1: space did not exist, had to be inserted
			separatorLength = 1;
		    }

		    //remove space from part1, add number of removed spaces to topEd
		    topEd += strlen(part);
		    //remove space
		    rempaceSpaces(part);
		    //add number of removed spaces to ed
		    topEd -= strlen(part);
		    vector<std::unique_ptr<symspell::SuggestItem>> results;
		    Lookup(part, symspell::Verbosity::Top, maxEditDistance, results);
		    if (results.size() > 0)
		    {
			size_t termLen = strlen(results[0]->term);
			topResult = new char[termLen + 1];
			std::memcpy(topResult, results[0]->term, termLen);
			topResult[termLen] = '\0';

			topEd += results[0]->distance;
			//Naive Bayes Rule
			//we assume the word probabilities of two words to be independent
			//therefore the resulting probability of the word combination is the product of the two word probabilities

			//instead of computing the product of probabilities we are computing the sum of the logarithm of probabilities
			//because the probabilities of words are about 10^-10, the product of many such small numbers could exceed (underflow) the floating number range and become zero
			//log(ab)=log(a)+log(b)
			topProbabilityLog = (double)log10((double)results[0]->count / (double)N);
		    }
		    else
		    {
			delete[] topResult;
			topResult = part;
			//default, if word not found
			//otherwise long input text would win as long unknown word (with ed=edmax+1 ), although there there should many spaces inserted
			topEd += strlen(part);
			topProbabilityLog = (double)log10(10.0 / (N * pow(10.0, strlen(part))));
		    }

		    int destinationIndex = ((i + circularIndex) % arraySize);

		    //set values in first loop
		    if (j == 0)
		    {
			compositions[destinationIndex]->set(dupstring(part), dupstring(topResult), topEd, topProbabilityLog);
		    }
		    else if ((i == maxSegmentationWordLength)
			//replace values if better probabilityLogSum, if same edit distance OR one space difference
			|| (((compositions[circularIndex]->distanceSum + topEd == compositions[destinationIndex]->distanceSum) || (compositions[circularIndex]->distanceSum + separatorLength + topEd == compositions[destinationIndex]->distanceSum)) && (compositions[destinationIndex]->probabilityLogSum < compositions[circularIndex]->probabilityLogSum + topProbabilityLog))
			//replace values if smaller edit distance
			|| (compositions[circularIndex]->distanceSum + separatorLength + topEd < compositions[destinationIndex]->distanceSum))
		    {
			const char* segmented = compositions[circularIndex]->segmentedString;
			const char* corrected = compositions[circularIndex]->correctedString;

			size_t segmentedLen = strlen(segmented);
			size_t correctedLen = strlen(corrected);
			size_t partLen = strlen(part);
			size_t topResultLen = strlen(topResult);

			char* segmentedTmp = new char[segmentedLen + partLen + 2];
			char* correctedTmp = new char[correctedLen + topResultLen + 2];

			std::memcpy(segmentedTmp, segmented, segmentedLen);
			std::memcpy(segmentedTmp + segmentedLen, " ", 1);
			std::memcpy(segmentedTmp + segmentedLen + 1, part, partLen);
			segmentedTmp[segmentedLen + partLen + 1] = '\0';

			std::memcpy(correctedTmp, corrected, correctedLen);
			std::memcpy(correctedTmp + correctedLen, " ", 1);
			std::memcpy(correctedTmp + correctedLen + 1, topResult, topResultLen);
			correctedTmp[correctedLen + topResultLen + 1] = '\0';

			if (strlen(compositions[destinationIndex]->segmentedString) > 0)
			    delete[] compositions[destinationIndex]->segmentedString;

			if (strlen(compositions[destinationIndex]->correctedString) > 0)
			    delete[] compositions[destinationIndex]->correctedString;

			compositions[destinationIndex]->set(segmentedTmp,
			    correctedTmp,
			    compositions[circularIndex]->distanceSum + separatorLength + topEd,
			    compositions[circularIndex]->probabilityLogSum + topProbabilityLog);
		    }
		}
		++circularIndex; if (circularIndex == arraySize) circularIndex = 0;
	    }
	    return compositions[circularIndex];
	}

	/// <summary>Maximum edit distance for dictionary precalculation.</summary>
	size_t MaxDictionaryEditDistance() { return this->maxDictionaryEditDistance; }

	/// <summary>Length of prefix, from which deletes are generated.</summary>
	size_t PrefixLength() { return this->prefixLength; }

	/// <summary>Length of longest word in the dictionary.</summary>
	size_t MaxLength() { return this->maxDictionaryWordLength; }

	/// <summary>Count threshold for a word to be considered a valid word for spelling correction.</summary>
	int64_t CountThreshold() { return this->countThreshold; }

	/// <summary>Number of unique words in the dictionary.</summary>
	size_t WordCount() { return this->words.size(); }

	/// <summary>Number of word prefixes and intermediate word deletes encoded in the dictionary.</summary>
	size_t EntryCount() { return this->deletes.size(); }

    private:
	int32_t initialCapacity;
	int32_t maxDictionaryEditDistance;
	int32_t prefixLength; //prefix length  5..7
	int64_t countThreshold; //a treshold might be specifid, when a term occurs so frequently in the corpus that it is considered a valid word for spelling correction
	uint32_t compactMask;
	EditDistance::DistanceAlgorithm distanceAlgorithm = EditDistance::DistanceAlgorithm::DamerauOSA;
	size_t maxDictionaryWordLength; //maximum dictionary term length
	std::mutex mtx;

	vector<const char*> candidates;

	EditDistance* distanceComparer{ nullptr };
	CUSTOM_SET<size_t> edits;
	CUSTOM_SET<size_t> hashset1; //TODO: use CUSTOM_SET<size_t> hashset1;
	CUSTOM_SET<size_t> hashset2; //TODO: use CUSTOM_SET<size_t> hashset1;
	CUSTOM_SET<size_t>::iterator hashset2End;  //TODO: use CUSTOM_SET<size_t>::iterator hashset2End; 
	hash_c_string stringHash;
	long N = 1024908267229;

	CUSTOM_MAP<size_t, vector<const char*>> deletes;
	CUSTOM_MAP<size_t, vector<const char*>>::iterator deletesEnd;

	// Dictionary of unique correct spelling words, and the frequency count for each word.
	CUSTOM_MAP<const char*, int64_t, hash_c_string, comp_c_string> words;
	CUSTOM_MAP<const char*, int64_t, hash_c_string, comp_c_string>::iterator wordsEnd;

	// Dictionary of unique words that are below the count threshold for being considered correct spellings.
	CUSTOM_MAP<const char*, int64_t, hash_c_string, comp_c_string> belowThresholdWords;
	CUSTOM_MAP<const char*, int64_t, hash_c_string, comp_c_string>::iterator belowThresholdWordsEnd;

	bool DeleteInSuggestionPrefix(char const* del, int deleteLen, char const* suggestion, int suggestionLen)
	{
	    if (deleteLen == 0) return true;
	    if (prefixLength < suggestionLen) suggestionLen = prefixLength;
	    int j = 0;
	    for (int i = 0; i < deleteLen; ++i)
	    {
		char delChar = del[i];
		while (j < suggestionLen && delChar != suggestion[j]) ++j;
		if (j == suggestionLen) return false;
	    }
	    return true;
	}
    };
}

#endif // SYMSPELL6_H