diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
new file mode 100644
index 0000000..42c5028
--- /dev/null
+++ b/.github/workflows/build.yaml
@@ -0,0 +1,53 @@
+name: Build & Test Package
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  linux-node10:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v1
+        with:
+          node-version: '10.x'
+          registry-url: 'https://npm.pkg.github.com'
+      - run: npm install
+      - run: npm test
+
+  linux-node12:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v1
+        with:
+          node-version: '12.x'
+          registry-url: 'https://npm.pkg.github.com'
+      - run: npm install
+      - run: npm test
+
+  windows-node10:
+    runs-on: windows-2016
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v1
+        with:
+          node-version: '10.x'
+          registry-url: 'https://npm.pkg.github.com'
+      - run: npm install
+      - run: npm test
+
+  windows-node12:
+    runs-on: windows-2016
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v1
+        with:
+          node-version: '12.x'
+          registry-url: 'https://npm.pkg.github.com'
+      - run: npm install
+      - run: npm test
\ No newline at end of file
diff --git a/.github/workflows/publish-release.yaml b/.github/workflows/publish-release.yaml
new file mode 100644
index 0000000..c32675b
--- /dev/null
+++ b/.github/workflows/publish-release.yaml
@@ -0,0 +1,17 @@
+name: Publish Package
+on:
+  release:
+    types: [created]
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-node@v1
+        with:
+          node-version: '10.x'
+          registry-url: 'https://npm.pkg.github.com'
+      - run: npm install
+      - run: npm publish
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e84652d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.vscode/
+.idea/
+*.iml
+node_modules/
+build/
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..29ebfa5
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d815311
--- /dev/null
+++ b/README.md
@@ -0,0 +1,56 @@
+hasher-phi1
+===========
+
+This is a Node module for simple hashing of the phi1 (phi1612) algorithm. 
+Native code is adapted from [LUX](https://github.com/Lux-core/lux).
+
+This module has been developed and tested on [Node v10.16.3](https://nodejs.org/) and [Ubuntu 16.04](http://releases.ubuntu.com/16.04/)
+
+## Install ##
+__Install as Dependency in NodeJS Project__
+```bash
+# Install from Github git package
+
+sudo apt-get install build-essential
+npm install mintpond/hasher-phi1 --save
+```
+-or-
+```bash
+# Install from Github NPM repository
+
+sudo apt-get install build-essential
+npm config set @mintpond:registry https://npm.pkg.github.com/mintpond
+npm config set //npm.pkg.github.com/:_authToken <MY_GITHUB_AUTH_TOKEN>
+
+npm install @mintpond/hasher-phi1@1.0.0 --save
+```
+
+__Install & Test__
+```bash
+# Install nodejs v10
+curl -sL https://deb.nodesource.com/setup_10.x | sudo -E bash -
+sudo apt-get install nodejs -y
+
+# Download hasher-phi1
+git clone https://github.com/MintPond/hasher-phi1
+
+# build
+cd hasher-phi1
+sudo apt-get install build-essential
+npm install
+npm test
+```
+
+## Usage ##
+__Hash__
+```js
+const phi1 = require('hasher-phi1');
+
+/**
+ * Hash the data in a Buffer and return the result in a new Buffer.
+ *
+ * @param  input   {Buffer}  The data to hash.
+ * @returns {Buffer}
+ */
+const result = phi1.phi1612(input);
+```
\ No newline at end of file
diff --git a/binding.gyp b/binding.gyp
new file mode 100644
index 0000000..c44230d
--- /dev/null
+++ b/binding.gyp
@@ -0,0 +1,26 @@
+{
+    "targets": [
+        {
+            "target_name": "hasherphi1",
+            "sources": [
+                "hasherphi1.cc",
+                "phi1612.c",
+                "lyra2/lyra2.c",
+                "lyra2/sponge.c",
+                "sha3/aes_helper.c",
+                "sha3/gost_streebog.c",
+                "sha3/sph_cubehash.c",
+                "sha3/sph_echo.c",
+                "sha3/sph_fugue.c",
+                "sha3/sph_jh.c",
+                "sha3/sph_skein.c"
+            ],
+            "include_dirs": [
+                "<!(node -e \"require('nan')\")"
+            ],
+            "cflags_cc": [
+                "-std=c++0x"
+            ]
+        }
+    ]
+}
diff --git a/hasherphi1.cc b/hasherphi1.cc
new file mode 100644
index 0000000..47b78a0
--- /dev/null
+++ b/hasherphi1.cc
@@ -0,0 +1,42 @@
+#include <node.h>
+#include <node_buffer.h>
+#include <v8.h>
+#include <stdint.h>
+#include "nan.h"
+
+extern "C" {
+    #include "phi1612.h"
+}
+
+#define THROW_ERROR_EXCEPTION(x) Nan::ThrowError(x)
+#define THROW_ERROR_EXCEPTION_WITH_STATUS_CODE(x, y) NanThrowError(x, y)
+
+using namespace node;
+using namespace v8;
+
+NAN_METHOD(phi1612) {
+
+    if (info.Length() < 1)
+        return THROW_ERROR_EXCEPTION("You must provide one argument.");
+
+    Local<Object> target = Nan::To<Object>(info[0]).ToLocalChecked();
+
+    if(!Buffer::HasInstance(target))
+        return THROW_ERROR_EXCEPTION("Argument should be a buffer object.");
+
+    char* input = Buffer::Data(target);
+    char* output = (char*) malloc(sizeof(char) * 32);
+
+    uint32_t input_len = Buffer::Length(target);
+
+    phi1612_hash(input, output, input_len);
+
+    info.GetReturnValue().Set(Nan::NewBuffer(output, 32).ToLocalChecked());
+}
+
+
+NAN_MODULE_INIT(init) {
+    Nan::Set(target, Nan::New("phi1612").ToLocalChecked(), Nan::GetFunction(Nan::New<FunctionTemplate>(phi1612)).ToLocalChecked());
+}
+
+NODE_MODULE(hasherphi1, init)
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..90de88c
--- /dev/null
+++ b/index.js
@@ -0,0 +1 @@
+module.exports = require('bindings')('hasherphi1.node');
\ No newline at end of file
diff --git a/lyra2/lyra2.c b/lyra2/lyra2.c
new file mode 100644
index 0000000..b36cc5c
--- /dev/null
+++ b/lyra2/lyra2.c
@@ -0,0 +1,214 @@
+/**
+ * Implementation of the Lyra2 Password Hashing Scheme (PHS).
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "lyra2.h"
+#include "sponge.h"
+
+/**
+ * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords
+ * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits,
+ * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all
+ * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value
+ * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols).
+ *
+ * @param K The derived key to be output by the algorithm
+ * @param kLen Desired key length
+ * @param pwd User password
+ * @param pwdlen Password length
+ * @param salt Salt
+ * @param saltlen Salt length
+ * @param timeCost Parameter to determine the processing time (T)
+ * @param nRows Number or rows of the memory matrix (R)
+ * @param nCols Number of columns of the memory matrix (C)
+ *
+ * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation)
+ */
+int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int16_t nRows, const int16_t nCols)
+{
+    //============================= Basic variables ============================//
+    int64_t row = 2; //index of row to be processed
+    int64_t prev = 1; //index of prev (last row ever computed/modified)
+    int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering)
+    int64_t tau; //Time Loop iterator
+    int64_t step = 1; //Visitation step (used during Setup and Wandering phases)
+    int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup)
+    int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1
+    int64_t i; //auxiliary iteration counter
+    int64_t v64; // 64bit var for memcpy
+    //==========================================================================/
+
+    //========== Initializing the Memory Matrix and pointers to it =============//
+    //Tries to allocate enough space for the whole memory matrix
+
+    const int64_t ROW_LEN_INT64 = BLOCK_LEN_INT64 * nCols;
+    const int64_t ROW_LEN_BYTES = ROW_LEN_INT64 * 8;
+    // for Lyra2REv2, nCols = 4, v1 was using 8
+    const int64_t BLOCK_LEN = (nCols == 4) ? BLOCK_LEN_BLAKE2_SAFE_INT64 : BLOCK_LEN_BLAKE2_SAFE_BYTES;
+
+    i = (int64_t)ROW_LEN_BYTES * nRows;
+    uint64_t *wholeMatrix = malloc(i);
+    if (wholeMatrix == NULL) {
+        return -1;
+    }
+    memset(wholeMatrix, 0, i);
+
+    //Allocates pointers to each row of the matrix
+    uint64_t **memMatrix = malloc(sizeof(uint64_t*) * nRows);
+    if (memMatrix == NULL) {
+        return -1;
+    }
+    //Places the pointers in the correct positions
+    uint64_t *ptrWord = wholeMatrix;
+    for (i = 0; i < nRows; i++) {
+        memMatrix[i] = ptrWord;
+        ptrWord += ROW_LEN_INT64;
+    }
+    //==========================================================================/
+
+    //============= Getting the password + salt + basil padded with 10*1 ===============//
+    //OBS.:The memory matrix will temporarily hold the password: not for saving memory,
+    //but this ensures that the password copied locally will be overwritten as soon as possible
+
+    //First, we clean enough blocks for the password, salt, basil and padding
+    int64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof(uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1;
+
+    byte *ptrByte = (byte*) wholeMatrix;
+
+    //Prepends the password
+    memcpy(ptrByte, pwd, pwdlen);
+    ptrByte += pwdlen;
+
+    //Concatenates the salt
+    memcpy(ptrByte, salt, saltlen);
+    ptrByte += saltlen;
+
+    memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - (saltlen + pwdlen));
+
+    //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface
+    memcpy(ptrByte, &kLen, sizeof(int64_t));
+    ptrByte += sizeof(uint64_t);
+    v64 = pwdlen;
+    memcpy(ptrByte, &v64, sizeof(int64_t));
+    ptrByte += sizeof(uint64_t);
+    v64 = saltlen;
+    memcpy(ptrByte, &v64, sizeof(int64_t));
+    ptrByte += sizeof(uint64_t);
+    v64 = timeCost;
+    memcpy(ptrByte, &v64, sizeof(int64_t));
+    ptrByte += sizeof(uint64_t);
+    v64 = nRows;
+    memcpy(ptrByte, &v64, sizeof(int64_t));
+    ptrByte += sizeof(uint64_t);
+    v64 = nCols;
+    memcpy(ptrByte, &v64, sizeof(int64_t));
+    ptrByte += sizeof(uint64_t);
+
+    //Now comes the padding
+    *ptrByte = 0x80; //first byte of padding: right after the password
+    ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix
+    ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block
+    *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block
+    //==========================================================================/
+
+    //======================= Initializing the Sponge State ====================//
+    //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c)
+    uint64_t state[16];
+    initState(state);
+    //==========================================================================/
+
+    //================================ Setup Phase =============================//
+    //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits
+    ptrWord = wholeMatrix;
+    for (i = 0; i < nBlocksInput; i++) {
+        absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil)
+        ptrWord += BLOCK_LEN; //goes to next block of pad(pwd || salt || basil)
+    }
+
+    //Initializes M[0] and M[1]
+    reducedSqueezeRow0(state, memMatrix[0], nCols); //The locally copied password is most likely overwritten here
+
+    reducedDuplexRow1(state, memMatrix[0], memMatrix[1], nCols);
+
+    do {
+        //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand)
+
+        reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
+
+        //updates the value of row* (deterministically picked during Setup))
+        rowa = (rowa + step) & (window - 1);
+        //update prev: it now points to the last row ever computed
+        prev = row;
+        //updates row: goes to the next row to be computed
+        row++;
+
+        //Checks if all rows in the window where visited.
+        if (rowa == 0) {
+            step = window + gap; //changes the step: approximately doubles its value
+            window *= 2; //doubles the size of the re-visitation window
+            gap = -gap; //inverts the modifier to the step
+        }
+
+    } while (row < nRows);
+    //==========================================================================/
+
+    //============================ Wandering Phase =============================//
+    row = 0; //Resets the visitation to the first row of the memory matrix
+    for (tau = 1; tau <= timeCost; tau++) {
+        //Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1
+        step = (tau % 2 == 0) ? -1 : nRows / 2 - 1;
+        do {
+            //Selects a pseudorandom index row*
+            //------------------------------------------------------------------------------------------
+            rowa = state[0] & (unsigned int)(nRows-1);  //(USE THIS IF nRows IS A POWER OF 2)
+            //rowa = state[0] % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+            //------------------------------------------------------------------------------------------
+
+            //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row]
+            reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row], nCols);
+
+            //update prev: it now points to the last row ever computed
+            prev = row;
+
+            //updates row: goes to the next row to be computed
+            //------------------------------------------------------------------------------------------
+            row = (row + step) & (unsigned int)(nRows-1); //(USE THIS IF nRows IS A POWER OF 2)
+            //row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE)
+            //------------------------------------------------------------------------------------------
+
+        } while (row != 0);
+    }
+
+    //============================ Wrap-up Phase ===============================//
+    //Absorbs the last block of the memory matrix
+    absorbBlock(state, memMatrix[rowa]);
+
+    //Squeezes the key
+    squeeze(state, K, (unsigned int) kLen);
+
+    //========================= Freeing the memory =============================//
+    free(memMatrix);
+    free(wholeMatrix);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/lyra2/lyra2.h b/lyra2/lyra2.h
new file mode 100644
index 0000000..7b97df3
--- /dev/null
+++ b/lyra2/lyra2.h
@@ -0,0 +1,42 @@
+/**
+ * Header file for the Lyra2 Password Hashing Scheme (PHS).
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef LYRA2_H_
+#define LYRA2_H_
+
+#include <stdint.h>
+
+typedef unsigned char byte;
+
+//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED)
+#define BLOCK_LEN_BLAKE2_SAFE_INT64 8                                   //512 bits (=64 bytes, =8 uint64_t)
+#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8)   //same as above, in bytes
+
+
+#ifdef BLOCK_LEN_BITS
+#define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64)      //Block length: 768 bits (=96 bytes, =12 uint64_t)
+        #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8)       //Block length, in bytes
+#else   //default block lenght: 768 bits
+#define BLOCK_LEN_INT64 12                       //Block length: 768 bits (=96 bytes, =12 uint64_t)
+#define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8)    //Block length, in bytes
+#endif
+
+int LYRA2(void *K, int64_t kLen, const void *pwd, int32_t pwdlen, const void *salt, int32_t saltlen, int64_t timeCost, const int16_t nRows, const int16_t nCols);
+
+#endif /* LYRA2_H_ */
\ No newline at end of file
diff --git a/lyra2/sponge.c b/lyra2/sponge.c
new file mode 100644
index 0000000..ce7bf90
--- /dev/null
+++ b/lyra2/sponge.c
@@ -0,0 +1,410 @@
+/**
+ * A simple implementation of Blake2b's internal permutation
+ * in the form of a sponge.
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+#include <time.h>
+#include "sponge.h"
+#include "lyra2.h"
+
+
+/**
+ * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder
+ * receive Blake2b's IV as per Blake2b's specification. <b>Note:</b> Even though sponges
+ * typically have their internal state initialized with zeros, Blake2b's G function
+ * has a fixed point: if the internal state and message are both filled with zeros. the
+ * resulting permutation will always be a block filled with zeros; this happens because
+ * Blake2b does not use the constants originally employed in Blake2 inside its G function,
+ * relying on the IV for avoiding possible fixed points.
+ *
+ * @param state         The 1024-bit array to be initialized
+ */
+void initState(uint64_t state[/*16*/]) {
+    //First 512 bis are zeros
+    memset(state, 0, 64);
+    //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV
+    state[8] = blake2b_IV[0];
+    state[9] = blake2b_IV[1];
+    state[10] = blake2b_IV[2];
+    state[11] = blake2b_IV[3];
+    state[12] = blake2b_IV[4];
+    state[13] = blake2b_IV[5];
+    state[14] = blake2b_IV[6];
+    state[15] = blake2b_IV[7];
+}
+
+/**
+ * Execute Blake2b's G function, with all 12 rounds.
+ *
+ * @param v     A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
+ */
+__inline static void blake2bLyra(uint64_t *v) {
+    ROUND_LYRA(0);
+    ROUND_LYRA(1);
+    ROUND_LYRA(2);
+    ROUND_LYRA(3);
+    ROUND_LYRA(4);
+    ROUND_LYRA(5);
+    ROUND_LYRA(6);
+    ROUND_LYRA(7);
+    ROUND_LYRA(8);
+    ROUND_LYRA(9);
+    ROUND_LYRA(10);
+    ROUND_LYRA(11);
+}
+
+/**
+ * Executes a reduced version of Blake2b's G function with only one round
+ * @param v     A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function
+ */
+__inline static void reducedBlake2bLyra(uint64_t *v) {
+    ROUND_LYRA(0);
+}
+
+/**
+ * Performs a squeeze operation, using Blake2b's G function as the
+ * internal permutation
+ *
+ * @param state      The current state of the sponge
+ * @param out        Array that will receive the data squeezed
+ * @param len        The number of bytes to be squeezed into the "out" array
+ */
+void squeeze(uint64_t *state, byte *out, unsigned int len)
+{
+    int fullBlocks = len / BLOCK_LEN_BYTES;
+    byte *ptr = out;
+    int i;
+    //Squeezes full blocks
+    for (i = 0; i < fullBlocks; i++) {
+        memcpy(ptr, state, BLOCK_LEN_BYTES);
+        blake2bLyra(state);
+        ptr += BLOCK_LEN_BYTES;
+    }
+
+    //Squeezes remaining bytes
+    memcpy(ptr, state, (len % BLOCK_LEN_BYTES));
+}
+
+/**
+ * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words
+ * of type uint64_t), using Blake2b's G function as the internal permutation
+ *
+ * @param state The current state of the sponge
+ * @param in    The block to be absorbed (BLOCK_LEN_INT64 words)
+ */
+void absorbBlock(uint64_t *state, const uint64_t *in)
+{
+    //XORs the first BLOCK_LEN_INT64 words of "in" with the current state
+    state[0] ^= in[0];
+    state[1] ^= in[1];
+    state[2] ^= in[2];
+    state[3] ^= in[3];
+    state[4] ^= in[4];
+    state[5] ^= in[5];
+    state[6] ^= in[6];
+    state[7] ^= in[7];
+    state[8] ^= in[8];
+    state[9] ^= in[9];
+    state[10] ^= in[10];
+    state[11] ^= in[11];
+
+    //Applies the transformation f to the sponge's state
+    blake2bLyra(state);
+}
+
+/**
+ * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64
+ * words of type uint64_t), using Blake2b's G function as the internal permutation
+ *
+ * @param state The current state of the sponge
+ * @param in    The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words)
+ */
+void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in)
+{
+    //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state
+
+    state[0] ^= in[0];
+    state[1] ^= in[1];
+    state[2] ^= in[2];
+    state[3] ^= in[3];
+    state[4] ^= in[4];
+    state[5] ^= in[5];
+    state[6] ^= in[6];
+    state[7] ^= in[7];
+
+    //Applies the transformation f to the sponge's state
+    blake2bLyra(state);
+}
+
+/**
+ * Performs a reduced squeeze operation for a single row, from the highest to
+ * the lowest index, using the reduced-round Blake2b's G function as the
+ * internal permutation
+ *
+ * @param state     The current state of the sponge
+ * @param rowOut    Row to receive the data squeezed
+ */
+void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut, const uint32_t nCols)
+{
+    uint64_t* ptrWord = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1]
+    unsigned int i;
+    //M[row][C-1-col] = H.reduced_squeeze()
+    for (i = 0; i < nCols; i++) {
+        ptrWord[0] = state[0];
+        ptrWord[1] = state[1];
+        ptrWord[2] = state[2];
+        ptrWord[3] = state[3];
+        ptrWord[4] = state[4];
+        ptrWord[5] = state[5];
+        ptrWord[6] = state[6];
+        ptrWord[7] = state[7];
+        ptrWord[8] = state[8];
+        ptrWord[9] = state[9];
+        ptrWord[10] = state[10];
+        ptrWord[11] = state[11];
+
+        //Goes to next block (column) that will receive the squeezed data
+        ptrWord -= BLOCK_LEN_INT64;
+
+        //Applies the reduced-round transformation f to the sponge's state
+        reducedBlake2bLyra(state);
+    }
+}
+
+/**
+ * Performs a reduced duplex operation for a single row, from the highest to
+ * the lowest index, using the reduced-round Blake2b's G function as the
+ * internal permutation
+ *
+ * @param state		The current state of the sponge
+ * @param rowIn		Row to feed the sponge
+ * @param rowOut	Row to receive the sponge's output
+ */
+void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, const uint32_t nCols)
+{
+    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    unsigned int i;
+
+    for (i = 0; i < nCols; i++) {
+
+        //Absorbing "M[prev][col]"
+        state[0]  ^= (ptrWordIn[0]);
+        state[1]  ^= (ptrWordIn[1]);
+        state[2]  ^= (ptrWordIn[2]);
+        state[3]  ^= (ptrWordIn[3]);
+        state[4]  ^= (ptrWordIn[4]);
+        state[5]  ^= (ptrWordIn[5]);
+        state[6]  ^= (ptrWordIn[6]);
+        state[7]  ^= (ptrWordIn[7]);
+        state[8]  ^= (ptrWordIn[8]);
+        state[9]  ^= (ptrWordIn[9]);
+        state[10] ^= (ptrWordIn[10]);
+        state[11] ^= (ptrWordIn[11]);
+
+        //Applies the reduced-round transformation f to the sponge's state
+        reducedBlake2bLyra(state);
+
+        //M[row][C-1-col] = M[prev][col] XOR rand
+        ptrWordOut[0] = ptrWordIn[0]  ^ state[0];
+        ptrWordOut[1] = ptrWordIn[1]  ^ state[1];
+        ptrWordOut[2] = ptrWordIn[2]  ^ state[2];
+        ptrWordOut[3] = ptrWordIn[3]  ^ state[3];
+        ptrWordOut[4] = ptrWordIn[4]  ^ state[4];
+        ptrWordOut[5] = ptrWordIn[5]  ^ state[5];
+        ptrWordOut[6] = ptrWordIn[6]  ^ state[6];
+        ptrWordOut[7] = ptrWordIn[7]  ^ state[7];
+        ptrWordOut[8] = ptrWordIn[8]  ^ state[8];
+        ptrWordOut[9] = ptrWordIn[9]  ^ state[9];
+        ptrWordOut[10] = ptrWordIn[10] ^ state[10];
+        ptrWordOut[11] = ptrWordIn[11] ^ state[11];
+
+        //Input: next column (i.e., next block in sequence)
+        ptrWordIn += BLOCK_LEN_INT64;
+        //Output: goes to previous column
+        ptrWordOut -= BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
+ * the wordwise addition of two columns, ignoring carries between words). The
+ * output of this operation, "rand", is then used to make
+ * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and
+ * "M[rowInOut][col] =  M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
+ * rotation to the left and N_COLS is a system parameter.
+ *
+ * @param state          The current state of the sponge
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, const uint32_t nCols)
+{
+    uint64_t* ptrWordIn = rowIn;				//In Lyra2: pointer to prev
+    uint64_t* ptrWordInOut = rowInOut;				//In Lyra2: pointer to row*
+    uint64_t* ptrWordOut = rowOut + (nCols-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row
+    unsigned int i;
+
+    for (i = 0; i < nCols; i++) {
+
+        //Absorbing "M[prev] [+] M[row*]"
+        state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
+        state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
+        state[2]  ^= (ptrWordIn[2]  + ptrWordInOut[2]);
+        state[3]  ^= (ptrWordIn[3]  + ptrWordInOut[3]);
+        state[4]  ^= (ptrWordIn[4]  + ptrWordInOut[4]);
+        state[5]  ^= (ptrWordIn[5]  + ptrWordInOut[5]);
+        state[6]  ^= (ptrWordIn[6]  + ptrWordInOut[6]);
+        state[7]  ^= (ptrWordIn[7]  + ptrWordInOut[7]);
+        state[8]  ^= (ptrWordIn[8]  + ptrWordInOut[8]);
+        state[9]  ^= (ptrWordIn[9]  + ptrWordInOut[9]);
+        state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
+        state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
+
+        //Applies the reduced-round transformation f to the sponge's state
+        reducedBlake2bLyra(state);
+
+        //M[row][col] = M[prev][col] XOR rand
+        ptrWordOut[0] = ptrWordIn[0]  ^ state[0];
+        ptrWordOut[1] = ptrWordIn[1]  ^ state[1];
+        ptrWordOut[2] = ptrWordIn[2]  ^ state[2];
+        ptrWordOut[3] = ptrWordIn[3]  ^ state[3];
+        ptrWordOut[4] = ptrWordIn[4]  ^ state[4];
+        ptrWordOut[5] = ptrWordIn[5]  ^ state[5];
+        ptrWordOut[6] = ptrWordIn[6]  ^ state[6];
+        ptrWordOut[7] = ptrWordIn[7]  ^ state[7];
+        ptrWordOut[8] = ptrWordIn[8]  ^ state[8];
+        ptrWordOut[9] = ptrWordIn[9]  ^ state[9];
+        ptrWordOut[10] = ptrWordIn[10] ^ state[10];
+        ptrWordOut[11] = ptrWordIn[11] ^ state[11];
+
+        //M[row*][col] = M[row*][col] XOR rotW(rand)
+        ptrWordInOut[0]  ^= state[11];
+        ptrWordInOut[1]  ^= state[0];
+        ptrWordInOut[2]  ^= state[1];
+        ptrWordInOut[3]  ^= state[2];
+        ptrWordInOut[4]  ^= state[3];
+        ptrWordInOut[5]  ^= state[4];
+        ptrWordInOut[6]  ^= state[5];
+        ptrWordInOut[7]  ^= state[6];
+        ptrWordInOut[8]  ^= state[7];
+        ptrWordInOut[9]  ^= state[8];
+        ptrWordInOut[10] ^= state[9];
+        ptrWordInOut[11] ^= state[10];
+
+        //Inputs: next column (i.e., next block in sequence)
+        ptrWordInOut += BLOCK_LEN_INT64;
+        ptrWordIn += BLOCK_LEN_INT64;
+        //Output: goes to previous column
+        ptrWordOut -= BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e.,
+ * the wordwise addition of two columns, ignoring carries between words). The
+ * output of this operation, "rand", is then used to make
+ * "M[rowOut][col] = M[rowOut][col] XOR rand" and
+ * "M[rowInOut][col] =  M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit
+ * rotation to the left.
+ *
+ * @param state          The current state of the sponge
+ * @param rowIn          Row used only as input
+ * @param rowInOut       Row used as input and to receive output after rotation
+ * @param rowOut         Row receiving the output
+ *
+ */
+void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, const uint32_t nCols)
+{
+    uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row*
+    uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev
+    uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row
+    unsigned int i;
+
+    for (i = 0; i < nCols; i++) {
+
+        //Absorbing "M[prev] [+] M[row*]"
+        state[0]  ^= (ptrWordIn[0]  + ptrWordInOut[0]);
+        state[1]  ^= (ptrWordIn[1]  + ptrWordInOut[1]);
+        state[2]  ^= (ptrWordIn[2]  + ptrWordInOut[2]);
+        state[3]  ^= (ptrWordIn[3]  + ptrWordInOut[3]);
+        state[4]  ^= (ptrWordIn[4]  + ptrWordInOut[4]);
+        state[5]  ^= (ptrWordIn[5]  + ptrWordInOut[5]);
+        state[6]  ^= (ptrWordIn[6]  + ptrWordInOut[6]);
+        state[7]  ^= (ptrWordIn[7]  + ptrWordInOut[7]);
+        state[8]  ^= (ptrWordIn[8]  + ptrWordInOut[8]);
+        state[9]  ^= (ptrWordIn[9]  + ptrWordInOut[9]);
+        state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]);
+        state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]);
+
+        //Applies the reduced-round transformation f to the sponge's state
+        reducedBlake2bLyra(state);
+
+        //M[rowOut][col] = M[rowOut][col] XOR rand
+        ptrWordOut[0] ^= state[0];
+        ptrWordOut[1] ^= state[1];
+        ptrWordOut[2] ^= state[2];
+        ptrWordOut[3] ^= state[3];
+        ptrWordOut[4] ^= state[4];
+        ptrWordOut[5] ^= state[5];
+        ptrWordOut[6] ^= state[6];
+        ptrWordOut[7] ^= state[7];
+        ptrWordOut[8] ^= state[8];
+        ptrWordOut[9] ^= state[9];
+        ptrWordOut[10] ^= state[10];
+        ptrWordOut[11] ^= state[11];
+
+        //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)
+        ptrWordInOut[0] ^= state[11];
+        ptrWordInOut[1] ^= state[0];
+        ptrWordInOut[2] ^= state[1];
+        ptrWordInOut[3] ^= state[2];
+        ptrWordInOut[4] ^= state[3];
+        ptrWordInOut[5] ^= state[4];
+        ptrWordInOut[6] ^= state[5];
+        ptrWordInOut[7] ^= state[6];
+        ptrWordInOut[8] ^= state[7];
+        ptrWordInOut[9] ^= state[8];
+        ptrWordInOut[10] ^= state[9];
+        ptrWordInOut[11] ^= state[10];
+
+        //Goes to next block
+        ptrWordOut += BLOCK_LEN_INT64;
+        ptrWordInOut += BLOCK_LEN_INT64;
+        ptrWordIn += BLOCK_LEN_INT64;
+    }
+}
+
+/**
+ * Prints an array of unsigned chars
+ */
+void printArray(unsigned char *array, unsigned int size, char *name)
+{
+    unsigned int i;
+    printf("%s: ", name);
+    for (i = 0; i < size; i++) {
+        printf("%2x|", array[i]);
+    }
+    printf("\n");
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
\ No newline at end of file
diff --git a/lyra2/sponge.h b/lyra2/sponge.h
new file mode 100644
index 0000000..2d07dda
--- /dev/null
+++ b/lyra2/sponge.h
@@ -0,0 +1,88 @@
+/**
+ * Header file for Blake2b's internal permutation in the form of a sponge.
+ * This code is based on the original Blake2b's implementation provided by
+ * Samuel Neves (https://blake2.net/)
+ *
+ * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014.
+ *
+ * This software is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef SPONGE_H_
+#define SPONGE_H_
+
+#include <stdint.h>
+
+/* Blake2b IV Array */
+static const uint64_t blake2b_IV[8] =
+        {
+                0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
+                0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
+                0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+                0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+        };
+
+/* Blake2b's rotation */
+static __inline uint64_t rotr64(const uint64_t w, const unsigned c) {
+#ifdef _MSC_VER
+    return _rotr64(w, c);
+#else
+    return ( w >> c ) | ( w << ( 64 - c ) );
+#endif
+}
+
+/* Blake2b's G function */
+#define G(r,i,a,b,c,d) do { \
+	a = a + b; \
+	d = rotr64(d ^ a, 32); \
+	c = c + d; \
+	b = rotr64(b ^ c, 24); \
+	a = a + b; \
+	d = rotr64(d ^ a, 16); \
+	c = c + d; \
+	b = rotr64(b ^ c, 63); \
+  } while(0)
+
+
+/*One Round of the Blake2b's compression function*/
+#define ROUND_LYRA(r) \
+	G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+	G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+	G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+	G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+	G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+	G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+	G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+	G(r,7,v[ 3],v[ 4],v[ 9],v[14]);
+
+//---- Housekeeping
+void initState(uint64_t state[/*16*/]);
+
+//---- Squeezes
+void squeeze(uint64_t *state, unsigned char *out, unsigned int len);
+void reducedSqueezeRow0(uint64_t* state, uint64_t* row, const uint32_t nCols);
+
+//---- Absorbs
+void absorbBlock(uint64_t *state, const uint64_t *in);
+void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in);
+
+//---- Duplexes
+void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut, const uint32_t nCols);
+void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, const uint32_t nCols);
+void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut, const uint32_t nCols);
+
+//---- Misc
+void printArray(unsigned char *array, unsigned int size, char *name);
+
+#endif /* SPONGE_H_ */
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..839fb08
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,26 @@
+{
+    "name": "@mintpond/hasher-phi1",
+    "version": "1.0.0",
+    "lockfileVersion": 1,
+    "requires": true,
+    "dependencies": {
+        "bindings": {
+            "version": "1.5.0",
+            "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
+            "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
+            "requires": {
+                "file-uri-to-path": "1.0.0"
+            }
+        },
+        "file-uri-to-path": {
+            "version": "1.0.0",
+            "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
+            "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw=="
+        },
+        "nan": {
+            "version": "2.14.0",
+            "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.0.tgz",
+            "integrity": "sha512-INOFj37C7k3AfaNTtX8RhsTw7qRy7eLET14cROi9+5HAVbbHuIWUHEauBv5qT4Av2tWasiTY1Jw6puUNqRJXQg=="
+        }
+    }
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..3acccae
--- /dev/null
+++ b/package.json
@@ -0,0 +1,28 @@
+{
+    "name": "@mintpond/hasher-phi1",
+    "version": "1.0.0",
+    "description": "phi1612 algorithm hashing function for NodeJS.",
+    "author": "JCThePants",
+    "main": "index.js",
+    "dependencies": {
+        "bindings": "^1.3.0",
+        "nan": "^2.6.2"
+    },
+    "scripts": {
+        "test": "node test"
+    },
+    "homepage": "https://github.com/MintPond/hasher-phi1",
+    "bugs": {
+        "url": "https://github.com/MintPond/hasher-phi1/issues"
+    },
+    "repository": {
+        "type": "git",
+        "url": "https://github.com/MintPond/hasher-phi1.git"
+    },
+    "publishConfig": {
+        "registry": "https://npm.pkg.github.com/"
+    },
+    "engines": {
+        "node": ">=10.17.0"
+    }
+}
diff --git a/phi1612.c b/phi1612.c
new file mode 100644
index 0000000..4146746
--- /dev/null
+++ b/phi1612.c
@@ -0,0 +1,54 @@
+
+#include "phi1612.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+// Hashing modules
+
+#include "sha3/sph_skein.h"
+#include "sha3/sph_jh.h"
+#include "sha3/sph_cubehash.h"
+#include "sha3/sph_fugue.h"
+#include "sha3/gost_streebog.h"
+#include "sha3/sph_echo.h"
+
+void phi1612_hash(const char* input, char* output, uint32_t len)
+{
+	sph_skein512_context        ctx_skein;
+	sph_jh512_context           ctx_jh;
+	sph_cubehash512_context     ctx_cubehash;
+	sph_fugue512_context        ctx_fugue;
+	sph_gost512_context         ctx_gost;
+	sph_echo512_context         ctx_echo;
+
+	//uint8_t _ALIGN(128) hash[64];
+	uint8_t hash[64];
+
+	sph_skein512_init(&ctx_skein);
+	sph_skein512(&ctx_skein, input, len);
+	sph_skein512_close(&ctx_skein, (void*) hash);
+
+	sph_jh512_init(&ctx_jh);
+	sph_jh512(&ctx_jh, (const void*) hash, 64);
+	sph_jh512_close(&ctx_jh, (void*) hash);
+
+	sph_cubehash512_init(&ctx_cubehash);
+	sph_cubehash512(&ctx_cubehash, (const void*) hash, 64);
+	sph_cubehash512_close(&ctx_cubehash, (void*) hash);
+
+	sph_fugue512_init(&ctx_fugue);
+	sph_fugue512(&ctx_fugue, (const void*) hash, 64);
+	sph_fugue512_close(&ctx_fugue, (void*) hash);
+
+	sph_gost512_init(&ctx_gost);
+	sph_gost512(&ctx_gost, (const void*) hash, 64);
+	sph_gost512_close(&ctx_gost, (void*) hash);
+
+	sph_echo512_init(&ctx_echo);
+	sph_echo512(&ctx_echo, (const void*) hash, 64);
+	sph_echo512_close(&ctx_echo, (void*) hash);
+
+	memcpy(output, hash, 32);
+}
\ No newline at end of file
diff --git a/phi1612.h b/phi1612.h
new file mode 100644
index 0000000..806d331
--- /dev/null
+++ b/phi1612.h
@@ -0,0 +1,16 @@
+#ifndef PHI_H
+#define PHI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+void phi1612_hash(const char* input, char* output, uint32_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/sha3/aes_helper.c b/sha3/aes_helper.c
new file mode 100644
index 0000000..6d7838c
--- /dev/null
+++ b/sha3/aes_helper.c
@@ -0,0 +1,392 @@
+/* $Id: aes_helper.c 220 2010-06-09 09:21:50Z tp $ */
+/*
+ * AES tables. This file is not meant to be compiled by itself; it
+ * is included by some hash function implementations. It contains
+ * the precomputed tables and helper macros for evaluating an AES
+ * round, optionally with a final XOR with a subkey.
+ *
+ * By default, this file defines the tables and macros for little-endian
+ * processing (i.e. it is assumed that the input bytes have been read
+ * from memory and assembled with the little-endian convention). If
+ * the 'AES_BIG_ENDIAN' macro is defined (to a non-zero integer value)
+ * when this file is included, then the tables and macros for big-endian
+ * processing are defined instead. The big-endian tables and macros have
+ * names distinct from the little-endian tables and macros, hence it is
+ * possible to have both simultaneously, by including this file twice
+ * (with and without the AES_BIG_ENDIAN macro).
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include "sph_types.h"
+#ifdef __cplusplus
+extern "C"{
+#endif
+#if AES_BIG_ENDIAN
+
+#define AESx(x)   ( ((SPH_C32(x) >> 24) & SPH_C32(0x000000FF)) \
+                  | ((SPH_C32(x) >>  8) & SPH_C32(0x0000FF00)) \
+                  | ((SPH_C32(x) <<  8) & SPH_C32(0x00FF0000)) \
+                  | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
+
+#define AES0      AES0_BE
+#define AES1      AES1_BE
+#define AES2      AES2_BE
+#define AES3      AES3_BE
+
+#define AES_ROUND_BE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3)   do { \
+		(Y0) = AES0[((X0) >> 24) & 0xFF] \
+			^ AES1[((X1) >> 16) & 0xFF] \
+			^ AES2[((X2) >> 8) & 0xFF] \
+			^ AES3[(X3) & 0xFF] ^ (K0); \
+		(Y1) = AES0[((X1) >> 24) & 0xFF] \
+			^ AES1[((X2) >> 16) & 0xFF] \
+			^ AES2[((X3) >> 8) & 0xFF] \
+			^ AES3[(X0) & 0xFF] ^ (K1); \
+		(Y2) = AES0[((X2) >> 24) & 0xFF] \
+			^ AES1[((X3) >> 16) & 0xFF] \
+			^ AES2[((X0) >> 8) & 0xFF] \
+			^ AES3[(X1) & 0xFF] ^ (K2); \
+		(Y3) = AES0[((X3) >> 24) & 0xFF] \
+			^ AES1[((X0) >> 16) & 0xFF] \
+			^ AES2[((X1) >> 8) & 0xFF] \
+			^ AES3[(X2) & 0xFF] ^ (K3); \
+	} while (0)
+
+#define AES_ROUND_NOKEY_BE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
+	AES_ROUND_BE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
+
+#else
+
+#define AESx(x)   SPH_C32(x)
+#define AES0      AES0_LE
+#define AES1      AES1_LE
+#define AES2      AES2_LE
+#define AES3      AES3_LE
+
+#define AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3)   do { \
+		(Y0) = AES0[(X0) & 0xFF] \
+			^ AES1[((X1) >> 8) & 0xFF] \
+			^ AES2[((X2) >> 16) & 0xFF] \
+			^ AES3[((X3) >> 24) & 0xFF] ^ (K0); \
+		(Y1) = AES0[(X1) & 0xFF] \
+			^ AES1[((X2) >> 8) & 0xFF] \
+			^ AES2[((X3) >> 16) & 0xFF] \
+			^ AES3[((X0) >> 24) & 0xFF] ^ (K1); \
+		(Y2) = AES0[(X2) & 0xFF] \
+			^ AES1[((X3) >> 8) & 0xFF] \
+			^ AES2[((X0) >> 16) & 0xFF] \
+			^ AES3[((X1) >> 24) & 0xFF] ^ (K2); \
+		(Y3) = AES0[(X3) & 0xFF] \
+			^ AES1[((X0) >> 8) & 0xFF] \
+			^ AES2[((X1) >> 16) & 0xFF] \
+			^ AES3[((X2) >> 24) & 0xFF] ^ (K3); \
+	} while (0)
+
+#define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
+	AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
+
+#endif
+
+/*
+ * The AES*[] tables allow us to perform a fast evaluation of an AES
+ * round; table AESi[] combines SubBytes for a byte at row i, and
+ * MixColumns for the column where that byte goes after ShiftRows.
+ */
+
+static const sph_u32 AES0[256] = {
+        AESx(0xA56363C6), AESx(0x847C7CF8), AESx(0x997777EE), AESx(0x8D7B7BF6),
+        AESx(0x0DF2F2FF), AESx(0xBD6B6BD6), AESx(0xB16F6FDE), AESx(0x54C5C591),
+        AESx(0x50303060), AESx(0x03010102), AESx(0xA96767CE), AESx(0x7D2B2B56),
+        AESx(0x19FEFEE7), AESx(0x62D7D7B5), AESx(0xE6ABAB4D), AESx(0x9A7676EC),
+        AESx(0x45CACA8F), AESx(0x9D82821F), AESx(0x40C9C989), AESx(0x877D7DFA),
+        AESx(0x15FAFAEF), AESx(0xEB5959B2), AESx(0xC947478E), AESx(0x0BF0F0FB),
+        AESx(0xECADAD41), AESx(0x67D4D4B3), AESx(0xFDA2A25F), AESx(0xEAAFAF45),
+        AESx(0xBF9C9C23), AESx(0xF7A4A453), AESx(0x967272E4), AESx(0x5BC0C09B),
+        AESx(0xC2B7B775), AESx(0x1CFDFDE1), AESx(0xAE93933D), AESx(0x6A26264C),
+        AESx(0x5A36366C), AESx(0x413F3F7E), AESx(0x02F7F7F5), AESx(0x4FCCCC83),
+        AESx(0x5C343468), AESx(0xF4A5A551), AESx(0x34E5E5D1), AESx(0x08F1F1F9),
+        AESx(0x937171E2), AESx(0x73D8D8AB), AESx(0x53313162), AESx(0x3F15152A),
+        AESx(0x0C040408), AESx(0x52C7C795), AESx(0x65232346), AESx(0x5EC3C39D),
+        AESx(0x28181830), AESx(0xA1969637), AESx(0x0F05050A), AESx(0xB59A9A2F),
+        AESx(0x0907070E), AESx(0x36121224), AESx(0x9B80801B), AESx(0x3DE2E2DF),
+        AESx(0x26EBEBCD), AESx(0x6927274E), AESx(0xCDB2B27F), AESx(0x9F7575EA),
+        AESx(0x1B090912), AESx(0x9E83831D), AESx(0x742C2C58), AESx(0x2E1A1A34),
+        AESx(0x2D1B1B36), AESx(0xB26E6EDC), AESx(0xEE5A5AB4), AESx(0xFBA0A05B),
+        AESx(0xF65252A4), AESx(0x4D3B3B76), AESx(0x61D6D6B7), AESx(0xCEB3B37D),
+        AESx(0x7B292952), AESx(0x3EE3E3DD), AESx(0x712F2F5E), AESx(0x97848413),
+        AESx(0xF55353A6), AESx(0x68D1D1B9), AESx(0x00000000), AESx(0x2CEDEDC1),
+        AESx(0x60202040), AESx(0x1FFCFCE3), AESx(0xC8B1B179), AESx(0xED5B5BB6),
+        AESx(0xBE6A6AD4), AESx(0x46CBCB8D), AESx(0xD9BEBE67), AESx(0x4B393972),
+        AESx(0xDE4A4A94), AESx(0xD44C4C98), AESx(0xE85858B0), AESx(0x4ACFCF85),
+        AESx(0x6BD0D0BB), AESx(0x2AEFEFC5), AESx(0xE5AAAA4F), AESx(0x16FBFBED),
+        AESx(0xC5434386), AESx(0xD74D4D9A), AESx(0x55333366), AESx(0x94858511),
+        AESx(0xCF45458A), AESx(0x10F9F9E9), AESx(0x06020204), AESx(0x817F7FFE),
+        AESx(0xF05050A0), AESx(0x443C3C78), AESx(0xBA9F9F25), AESx(0xE3A8A84B),
+        AESx(0xF35151A2), AESx(0xFEA3A35D), AESx(0xC0404080), AESx(0x8A8F8F05),
+        AESx(0xAD92923F), AESx(0xBC9D9D21), AESx(0x48383870), AESx(0x04F5F5F1),
+        AESx(0xDFBCBC63), AESx(0xC1B6B677), AESx(0x75DADAAF), AESx(0x63212142),
+        AESx(0x30101020), AESx(0x1AFFFFE5), AESx(0x0EF3F3FD), AESx(0x6DD2D2BF),
+        AESx(0x4CCDCD81), AESx(0x140C0C18), AESx(0x35131326), AESx(0x2FECECC3),
+        AESx(0xE15F5FBE), AESx(0xA2979735), AESx(0xCC444488), AESx(0x3917172E),
+        AESx(0x57C4C493), AESx(0xF2A7A755), AESx(0x827E7EFC), AESx(0x473D3D7A),
+        AESx(0xAC6464C8), AESx(0xE75D5DBA), AESx(0x2B191932), AESx(0x957373E6),
+        AESx(0xA06060C0), AESx(0x98818119), AESx(0xD14F4F9E), AESx(0x7FDCDCA3),
+        AESx(0x66222244), AESx(0x7E2A2A54), AESx(0xAB90903B), AESx(0x8388880B),
+        AESx(0xCA46468C), AESx(0x29EEEEC7), AESx(0xD3B8B86B), AESx(0x3C141428),
+        AESx(0x79DEDEA7), AESx(0xE25E5EBC), AESx(0x1D0B0B16), AESx(0x76DBDBAD),
+        AESx(0x3BE0E0DB), AESx(0x56323264), AESx(0x4E3A3A74), AESx(0x1E0A0A14),
+        AESx(0xDB494992), AESx(0x0A06060C), AESx(0x6C242448), AESx(0xE45C5CB8),
+        AESx(0x5DC2C29F), AESx(0x6ED3D3BD), AESx(0xEFACAC43), AESx(0xA66262C4),
+        AESx(0xA8919139), AESx(0xA4959531), AESx(0x37E4E4D3), AESx(0x8B7979F2),
+        AESx(0x32E7E7D5), AESx(0x43C8C88B), AESx(0x5937376E), AESx(0xB76D6DDA),
+        AESx(0x8C8D8D01), AESx(0x64D5D5B1), AESx(0xD24E4E9C), AESx(0xE0A9A949),
+        AESx(0xB46C6CD8), AESx(0xFA5656AC), AESx(0x07F4F4F3), AESx(0x25EAEACF),
+        AESx(0xAF6565CA), AESx(0x8E7A7AF4), AESx(0xE9AEAE47), AESx(0x18080810),
+        AESx(0xD5BABA6F), AESx(0x887878F0), AESx(0x6F25254A), AESx(0x722E2E5C),
+        AESx(0x241C1C38), AESx(0xF1A6A657), AESx(0xC7B4B473), AESx(0x51C6C697),
+        AESx(0x23E8E8CB), AESx(0x7CDDDDA1), AESx(0x9C7474E8), AESx(0x211F1F3E),
+        AESx(0xDD4B4B96), AESx(0xDCBDBD61), AESx(0x868B8B0D), AESx(0x858A8A0F),
+        AESx(0x907070E0), AESx(0x423E3E7C), AESx(0xC4B5B571), AESx(0xAA6666CC),
+        AESx(0xD8484890), AESx(0x05030306), AESx(0x01F6F6F7), AESx(0x120E0E1C),
+        AESx(0xA36161C2), AESx(0x5F35356A), AESx(0xF95757AE), AESx(0xD0B9B969),
+        AESx(0x91868617), AESx(0x58C1C199), AESx(0x271D1D3A), AESx(0xB99E9E27),
+        AESx(0x38E1E1D9), AESx(0x13F8F8EB), AESx(0xB398982B), AESx(0x33111122),
+        AESx(0xBB6969D2), AESx(0x70D9D9A9), AESx(0x898E8E07), AESx(0xA7949433),
+        AESx(0xB69B9B2D), AESx(0x221E1E3C), AESx(0x92878715), AESx(0x20E9E9C9),
+        AESx(0x49CECE87), AESx(0xFF5555AA), AESx(0x78282850), AESx(0x7ADFDFA5),
+        AESx(0x8F8C8C03), AESx(0xF8A1A159), AESx(0x80898909), AESx(0x170D0D1A),
+        AESx(0xDABFBF65), AESx(0x31E6E6D7), AESx(0xC6424284), AESx(0xB86868D0),
+        AESx(0xC3414182), AESx(0xB0999929), AESx(0x772D2D5A), AESx(0x110F0F1E),
+        AESx(0xCBB0B07B), AESx(0xFC5454A8), AESx(0xD6BBBB6D), AESx(0x3A16162C)
+};
+
+static const sph_u32 AES1[256] = {
+        AESx(0x6363C6A5), AESx(0x7C7CF884), AESx(0x7777EE99), AESx(0x7B7BF68D),
+        AESx(0xF2F2FF0D), AESx(0x6B6BD6BD), AESx(0x6F6FDEB1), AESx(0xC5C59154),
+        AESx(0x30306050), AESx(0x01010203), AESx(0x6767CEA9), AESx(0x2B2B567D),
+        AESx(0xFEFEE719), AESx(0xD7D7B562), AESx(0xABAB4DE6), AESx(0x7676EC9A),
+        AESx(0xCACA8F45), AESx(0x82821F9D), AESx(0xC9C98940), AESx(0x7D7DFA87),
+        AESx(0xFAFAEF15), AESx(0x5959B2EB), AESx(0x47478EC9), AESx(0xF0F0FB0B),
+        AESx(0xADAD41EC), AESx(0xD4D4B367), AESx(0xA2A25FFD), AESx(0xAFAF45EA),
+        AESx(0x9C9C23BF), AESx(0xA4A453F7), AESx(0x7272E496), AESx(0xC0C09B5B),
+        AESx(0xB7B775C2), AESx(0xFDFDE11C), AESx(0x93933DAE), AESx(0x26264C6A),
+        AESx(0x36366C5A), AESx(0x3F3F7E41), AESx(0xF7F7F502), AESx(0xCCCC834F),
+        AESx(0x3434685C), AESx(0xA5A551F4), AESx(0xE5E5D134), AESx(0xF1F1F908),
+        AESx(0x7171E293), AESx(0xD8D8AB73), AESx(0x31316253), AESx(0x15152A3F),
+        AESx(0x0404080C), AESx(0xC7C79552), AESx(0x23234665), AESx(0xC3C39D5E),
+        AESx(0x18183028), AESx(0x969637A1), AESx(0x05050A0F), AESx(0x9A9A2FB5),
+        AESx(0x07070E09), AESx(0x12122436), AESx(0x80801B9B), AESx(0xE2E2DF3D),
+        AESx(0xEBEBCD26), AESx(0x27274E69), AESx(0xB2B27FCD), AESx(0x7575EA9F),
+        AESx(0x0909121B), AESx(0x83831D9E), AESx(0x2C2C5874), AESx(0x1A1A342E),
+        AESx(0x1B1B362D), AESx(0x6E6EDCB2), AESx(0x5A5AB4EE), AESx(0xA0A05BFB),
+        AESx(0x5252A4F6), AESx(0x3B3B764D), AESx(0xD6D6B761), AESx(0xB3B37DCE),
+        AESx(0x2929527B), AESx(0xE3E3DD3E), AESx(0x2F2F5E71), AESx(0x84841397),
+        AESx(0x5353A6F5), AESx(0xD1D1B968), AESx(0x00000000), AESx(0xEDEDC12C),
+        AESx(0x20204060), AESx(0xFCFCE31F), AESx(0xB1B179C8), AESx(0x5B5BB6ED),
+        AESx(0x6A6AD4BE), AESx(0xCBCB8D46), AESx(0xBEBE67D9), AESx(0x3939724B),
+        AESx(0x4A4A94DE), AESx(0x4C4C98D4), AESx(0x5858B0E8), AESx(0xCFCF854A),
+        AESx(0xD0D0BB6B), AESx(0xEFEFC52A), AESx(0xAAAA4FE5), AESx(0xFBFBED16),
+        AESx(0x434386C5), AESx(0x4D4D9AD7), AESx(0x33336655), AESx(0x85851194),
+        AESx(0x45458ACF), AESx(0xF9F9E910), AESx(0x02020406), AESx(0x7F7FFE81),
+        AESx(0x5050A0F0), AESx(0x3C3C7844), AESx(0x9F9F25BA), AESx(0xA8A84BE3),
+        AESx(0x5151A2F3), AESx(0xA3A35DFE), AESx(0x404080C0), AESx(0x8F8F058A),
+        AESx(0x92923FAD), AESx(0x9D9D21BC), AESx(0x38387048), AESx(0xF5F5F104),
+        AESx(0xBCBC63DF), AESx(0xB6B677C1), AESx(0xDADAAF75), AESx(0x21214263),
+        AESx(0x10102030), AESx(0xFFFFE51A), AESx(0xF3F3FD0E), AESx(0xD2D2BF6D),
+        AESx(0xCDCD814C), AESx(0x0C0C1814), AESx(0x13132635), AESx(0xECECC32F),
+        AESx(0x5F5FBEE1), AESx(0x979735A2), AESx(0x444488CC), AESx(0x17172E39),
+        AESx(0xC4C49357), AESx(0xA7A755F2), AESx(0x7E7EFC82), AESx(0x3D3D7A47),
+        AESx(0x6464C8AC), AESx(0x5D5DBAE7), AESx(0x1919322B), AESx(0x7373E695),
+        AESx(0x6060C0A0), AESx(0x81811998), AESx(0x4F4F9ED1), AESx(0xDCDCA37F),
+        AESx(0x22224466), AESx(0x2A2A547E), AESx(0x90903BAB), AESx(0x88880B83),
+        AESx(0x46468CCA), AESx(0xEEEEC729), AESx(0xB8B86BD3), AESx(0x1414283C),
+        AESx(0xDEDEA779), AESx(0x5E5EBCE2), AESx(0x0B0B161D), AESx(0xDBDBAD76),
+        AESx(0xE0E0DB3B), AESx(0x32326456), AESx(0x3A3A744E), AESx(0x0A0A141E),
+        AESx(0x494992DB), AESx(0x06060C0A), AESx(0x2424486C), AESx(0x5C5CB8E4),
+        AESx(0xC2C29F5D), AESx(0xD3D3BD6E), AESx(0xACAC43EF), AESx(0x6262C4A6),
+        AESx(0x919139A8), AESx(0x959531A4), AESx(0xE4E4D337), AESx(0x7979F28B),
+        AESx(0xE7E7D532), AESx(0xC8C88B43), AESx(0x37376E59), AESx(0x6D6DDAB7),
+        AESx(0x8D8D018C), AESx(0xD5D5B164), AESx(0x4E4E9CD2), AESx(0xA9A949E0),
+        AESx(0x6C6CD8B4), AESx(0x5656ACFA), AESx(0xF4F4F307), AESx(0xEAEACF25),
+        AESx(0x6565CAAF), AESx(0x7A7AF48E), AESx(0xAEAE47E9), AESx(0x08081018),
+        AESx(0xBABA6FD5), AESx(0x7878F088), AESx(0x25254A6F), AESx(0x2E2E5C72),
+        AESx(0x1C1C3824), AESx(0xA6A657F1), AESx(0xB4B473C7), AESx(0xC6C69751),
+        AESx(0xE8E8CB23), AESx(0xDDDDA17C), AESx(0x7474E89C), AESx(0x1F1F3E21),
+        AESx(0x4B4B96DD), AESx(0xBDBD61DC), AESx(0x8B8B0D86), AESx(0x8A8A0F85),
+        AESx(0x7070E090), AESx(0x3E3E7C42), AESx(0xB5B571C4), AESx(0x6666CCAA),
+        AESx(0x484890D8), AESx(0x03030605), AESx(0xF6F6F701), AESx(0x0E0E1C12),
+        AESx(0x6161C2A3), AESx(0x35356A5F), AESx(0x5757AEF9), AESx(0xB9B969D0),
+        AESx(0x86861791), AESx(0xC1C19958), AESx(0x1D1D3A27), AESx(0x9E9E27B9),
+        AESx(0xE1E1D938), AESx(0xF8F8EB13), AESx(0x98982BB3), AESx(0x11112233),
+        AESx(0x6969D2BB), AESx(0xD9D9A970), AESx(0x8E8E0789), AESx(0x949433A7),
+        AESx(0x9B9B2DB6), AESx(0x1E1E3C22), AESx(0x87871592), AESx(0xE9E9C920),
+        AESx(0xCECE8749), AESx(0x5555AAFF), AESx(0x28285078), AESx(0xDFDFA57A),
+        AESx(0x8C8C038F), AESx(0xA1A159F8), AESx(0x89890980), AESx(0x0D0D1A17),
+        AESx(0xBFBF65DA), AESx(0xE6E6D731), AESx(0x424284C6), AESx(0x6868D0B8),
+        AESx(0x414182C3), AESx(0x999929B0), AESx(0x2D2D5A77), AESx(0x0F0F1E11),
+        AESx(0xB0B07BCB), AESx(0x5454A8FC), AESx(0xBBBB6DD6), AESx(0x16162C3A)
+};
+
+static const sph_u32 AES2[256] = {
+        AESx(0x63C6A563), AESx(0x7CF8847C), AESx(0x77EE9977), AESx(0x7BF68D7B),
+        AESx(0xF2FF0DF2), AESx(0x6BD6BD6B), AESx(0x6FDEB16F), AESx(0xC59154C5),
+        AESx(0x30605030), AESx(0x01020301), AESx(0x67CEA967), AESx(0x2B567D2B),
+        AESx(0xFEE719FE), AESx(0xD7B562D7), AESx(0xAB4DE6AB), AESx(0x76EC9A76),
+        AESx(0xCA8F45CA), AESx(0x821F9D82), AESx(0xC98940C9), AESx(0x7DFA877D),
+        AESx(0xFAEF15FA), AESx(0x59B2EB59), AESx(0x478EC947), AESx(0xF0FB0BF0),
+        AESx(0xAD41ECAD), AESx(0xD4B367D4), AESx(0xA25FFDA2), AESx(0xAF45EAAF),
+        AESx(0x9C23BF9C), AESx(0xA453F7A4), AESx(0x72E49672), AESx(0xC09B5BC0),
+        AESx(0xB775C2B7), AESx(0xFDE11CFD), AESx(0x933DAE93), AESx(0x264C6A26),
+        AESx(0x366C5A36), AESx(0x3F7E413F), AESx(0xF7F502F7), AESx(0xCC834FCC),
+        AESx(0x34685C34), AESx(0xA551F4A5), AESx(0xE5D134E5), AESx(0xF1F908F1),
+        AESx(0x71E29371), AESx(0xD8AB73D8), AESx(0x31625331), AESx(0x152A3F15),
+        AESx(0x04080C04), AESx(0xC79552C7), AESx(0x23466523), AESx(0xC39D5EC3),
+        AESx(0x18302818), AESx(0x9637A196), AESx(0x050A0F05), AESx(0x9A2FB59A),
+        AESx(0x070E0907), AESx(0x12243612), AESx(0x801B9B80), AESx(0xE2DF3DE2),
+        AESx(0xEBCD26EB), AESx(0x274E6927), AESx(0xB27FCDB2), AESx(0x75EA9F75),
+        AESx(0x09121B09), AESx(0x831D9E83), AESx(0x2C58742C), AESx(0x1A342E1A),
+        AESx(0x1B362D1B), AESx(0x6EDCB26E), AESx(0x5AB4EE5A), AESx(0xA05BFBA0),
+        AESx(0x52A4F652), AESx(0x3B764D3B), AESx(0xD6B761D6), AESx(0xB37DCEB3),
+        AESx(0x29527B29), AESx(0xE3DD3EE3), AESx(0x2F5E712F), AESx(0x84139784),
+        AESx(0x53A6F553), AESx(0xD1B968D1), AESx(0x00000000), AESx(0xEDC12CED),
+        AESx(0x20406020), AESx(0xFCE31FFC), AESx(0xB179C8B1), AESx(0x5BB6ED5B),
+        AESx(0x6AD4BE6A), AESx(0xCB8D46CB), AESx(0xBE67D9BE), AESx(0x39724B39),
+        AESx(0x4A94DE4A), AESx(0x4C98D44C), AESx(0x58B0E858), AESx(0xCF854ACF),
+        AESx(0xD0BB6BD0), AESx(0xEFC52AEF), AESx(0xAA4FE5AA), AESx(0xFBED16FB),
+        AESx(0x4386C543), AESx(0x4D9AD74D), AESx(0x33665533), AESx(0x85119485),
+        AESx(0x458ACF45), AESx(0xF9E910F9), AESx(0x02040602), AESx(0x7FFE817F),
+        AESx(0x50A0F050), AESx(0x3C78443C), AESx(0x9F25BA9F), AESx(0xA84BE3A8),
+        AESx(0x51A2F351), AESx(0xA35DFEA3), AESx(0x4080C040), AESx(0x8F058A8F),
+        AESx(0x923FAD92), AESx(0x9D21BC9D), AESx(0x38704838), AESx(0xF5F104F5),
+        AESx(0xBC63DFBC), AESx(0xB677C1B6), AESx(0xDAAF75DA), AESx(0x21426321),
+        AESx(0x10203010), AESx(0xFFE51AFF), AESx(0xF3FD0EF3), AESx(0xD2BF6DD2),
+        AESx(0xCD814CCD), AESx(0x0C18140C), AESx(0x13263513), AESx(0xECC32FEC),
+        AESx(0x5FBEE15F), AESx(0x9735A297), AESx(0x4488CC44), AESx(0x172E3917),
+        AESx(0xC49357C4), AESx(0xA755F2A7), AESx(0x7EFC827E), AESx(0x3D7A473D),
+        AESx(0x64C8AC64), AESx(0x5DBAE75D), AESx(0x19322B19), AESx(0x73E69573),
+        AESx(0x60C0A060), AESx(0x81199881), AESx(0x4F9ED14F), AESx(0xDCA37FDC),
+        AESx(0x22446622), AESx(0x2A547E2A), AESx(0x903BAB90), AESx(0x880B8388),
+        AESx(0x468CCA46), AESx(0xEEC729EE), AESx(0xB86BD3B8), AESx(0x14283C14),
+        AESx(0xDEA779DE), AESx(0x5EBCE25E), AESx(0x0B161D0B), AESx(0xDBAD76DB),
+        AESx(0xE0DB3BE0), AESx(0x32645632), AESx(0x3A744E3A), AESx(0x0A141E0A),
+        AESx(0x4992DB49), AESx(0x060C0A06), AESx(0x24486C24), AESx(0x5CB8E45C),
+        AESx(0xC29F5DC2), AESx(0xD3BD6ED3), AESx(0xAC43EFAC), AESx(0x62C4A662),
+        AESx(0x9139A891), AESx(0x9531A495), AESx(0xE4D337E4), AESx(0x79F28B79),
+        AESx(0xE7D532E7), AESx(0xC88B43C8), AESx(0x376E5937), AESx(0x6DDAB76D),
+        AESx(0x8D018C8D), AESx(0xD5B164D5), AESx(0x4E9CD24E), AESx(0xA949E0A9),
+        AESx(0x6CD8B46C), AESx(0x56ACFA56), AESx(0xF4F307F4), AESx(0xEACF25EA),
+        AESx(0x65CAAF65), AESx(0x7AF48E7A), AESx(0xAE47E9AE), AESx(0x08101808),
+        AESx(0xBA6FD5BA), AESx(0x78F08878), AESx(0x254A6F25), AESx(0x2E5C722E),
+        AESx(0x1C38241C), AESx(0xA657F1A6), AESx(0xB473C7B4), AESx(0xC69751C6),
+        AESx(0xE8CB23E8), AESx(0xDDA17CDD), AESx(0x74E89C74), AESx(0x1F3E211F),
+        AESx(0x4B96DD4B), AESx(0xBD61DCBD), AESx(0x8B0D868B), AESx(0x8A0F858A),
+        AESx(0x70E09070), AESx(0x3E7C423E), AESx(0xB571C4B5), AESx(0x66CCAA66),
+        AESx(0x4890D848), AESx(0x03060503), AESx(0xF6F701F6), AESx(0x0E1C120E),
+        AESx(0x61C2A361), AESx(0x356A5F35), AESx(0x57AEF957), AESx(0xB969D0B9),
+        AESx(0x86179186), AESx(0xC19958C1), AESx(0x1D3A271D), AESx(0x9E27B99E),
+        AESx(0xE1D938E1), AESx(0xF8EB13F8), AESx(0x982BB398), AESx(0x11223311),
+        AESx(0x69D2BB69), AESx(0xD9A970D9), AESx(0x8E07898E), AESx(0x9433A794),
+        AESx(0x9B2DB69B), AESx(0x1E3C221E), AESx(0x87159287), AESx(0xE9C920E9),
+        AESx(0xCE8749CE), AESx(0x55AAFF55), AESx(0x28507828), AESx(0xDFA57ADF),
+        AESx(0x8C038F8C), AESx(0xA159F8A1), AESx(0x89098089), AESx(0x0D1A170D),
+        AESx(0xBF65DABF), AESx(0xE6D731E6), AESx(0x4284C642), AESx(0x68D0B868),
+        AESx(0x4182C341), AESx(0x9929B099), AESx(0x2D5A772D), AESx(0x0F1E110F),
+        AESx(0xB07BCBB0), AESx(0x54A8FC54), AESx(0xBB6DD6BB), AESx(0x162C3A16)
+};
+
+static const sph_u32 AES3[256] = {
+        AESx(0xC6A56363), AESx(0xF8847C7C), AESx(0xEE997777), AESx(0xF68D7B7B),
+        AESx(0xFF0DF2F2), AESx(0xD6BD6B6B), AESx(0xDEB16F6F), AESx(0x9154C5C5),
+        AESx(0x60503030), AESx(0x02030101), AESx(0xCEA96767), AESx(0x567D2B2B),
+        AESx(0xE719FEFE), AESx(0xB562D7D7), AESx(0x4DE6ABAB), AESx(0xEC9A7676),
+        AESx(0x8F45CACA), AESx(0x1F9D8282), AESx(0x8940C9C9), AESx(0xFA877D7D),
+        AESx(0xEF15FAFA), AESx(0xB2EB5959), AESx(0x8EC94747), AESx(0xFB0BF0F0),
+        AESx(0x41ECADAD), AESx(0xB367D4D4), AESx(0x5FFDA2A2), AESx(0x45EAAFAF),
+        AESx(0x23BF9C9C), AESx(0x53F7A4A4), AESx(0xE4967272), AESx(0x9B5BC0C0),
+        AESx(0x75C2B7B7), AESx(0xE11CFDFD), AESx(0x3DAE9393), AESx(0x4C6A2626),
+        AESx(0x6C5A3636), AESx(0x7E413F3F), AESx(0xF502F7F7), AESx(0x834FCCCC),
+        AESx(0x685C3434), AESx(0x51F4A5A5), AESx(0xD134E5E5), AESx(0xF908F1F1),
+        AESx(0xE2937171), AESx(0xAB73D8D8), AESx(0x62533131), AESx(0x2A3F1515),
+        AESx(0x080C0404), AESx(0x9552C7C7), AESx(0x46652323), AESx(0x9D5EC3C3),
+        AESx(0x30281818), AESx(0x37A19696), AESx(0x0A0F0505), AESx(0x2FB59A9A),
+        AESx(0x0E090707), AESx(0x24361212), AESx(0x1B9B8080), AESx(0xDF3DE2E2),
+        AESx(0xCD26EBEB), AESx(0x4E692727), AESx(0x7FCDB2B2), AESx(0xEA9F7575),
+        AESx(0x121B0909), AESx(0x1D9E8383), AESx(0x58742C2C), AESx(0x342E1A1A),
+        AESx(0x362D1B1B), AESx(0xDCB26E6E), AESx(0xB4EE5A5A), AESx(0x5BFBA0A0),
+        AESx(0xA4F65252), AESx(0x764D3B3B), AESx(0xB761D6D6), AESx(0x7DCEB3B3),
+        AESx(0x527B2929), AESx(0xDD3EE3E3), AESx(0x5E712F2F), AESx(0x13978484),
+        AESx(0xA6F55353), AESx(0xB968D1D1), AESx(0x00000000), AESx(0xC12CEDED),
+        AESx(0x40602020), AESx(0xE31FFCFC), AESx(0x79C8B1B1), AESx(0xB6ED5B5B),
+        AESx(0xD4BE6A6A), AESx(0x8D46CBCB), AESx(0x67D9BEBE), AESx(0x724B3939),
+        AESx(0x94DE4A4A), AESx(0x98D44C4C), AESx(0xB0E85858), AESx(0x854ACFCF),
+        AESx(0xBB6BD0D0), AESx(0xC52AEFEF), AESx(0x4FE5AAAA), AESx(0xED16FBFB),
+        AESx(0x86C54343), AESx(0x9AD74D4D), AESx(0x66553333), AESx(0x11948585),
+        AESx(0x8ACF4545), AESx(0xE910F9F9), AESx(0x04060202), AESx(0xFE817F7F),
+        AESx(0xA0F05050), AESx(0x78443C3C), AESx(0x25BA9F9F), AESx(0x4BE3A8A8),
+        AESx(0xA2F35151), AESx(0x5DFEA3A3), AESx(0x80C04040), AESx(0x058A8F8F),
+        AESx(0x3FAD9292), AESx(0x21BC9D9D), AESx(0x70483838), AESx(0xF104F5F5),
+        AESx(0x63DFBCBC), AESx(0x77C1B6B6), AESx(0xAF75DADA), AESx(0x42632121),
+        AESx(0x20301010), AESx(0xE51AFFFF), AESx(0xFD0EF3F3), AESx(0xBF6DD2D2),
+        AESx(0x814CCDCD), AESx(0x18140C0C), AESx(0x26351313), AESx(0xC32FECEC),
+        AESx(0xBEE15F5F), AESx(0x35A29797), AESx(0x88CC4444), AESx(0x2E391717),
+        AESx(0x9357C4C4), AESx(0x55F2A7A7), AESx(0xFC827E7E), AESx(0x7A473D3D),
+        AESx(0xC8AC6464), AESx(0xBAE75D5D), AESx(0x322B1919), AESx(0xE6957373),
+        AESx(0xC0A06060), AESx(0x19988181), AESx(0x9ED14F4F), AESx(0xA37FDCDC),
+        AESx(0x44662222), AESx(0x547E2A2A), AESx(0x3BAB9090), AESx(0x0B838888),
+        AESx(0x8CCA4646), AESx(0xC729EEEE), AESx(0x6BD3B8B8), AESx(0x283C1414),
+        AESx(0xA779DEDE), AESx(0xBCE25E5E), AESx(0x161D0B0B), AESx(0xAD76DBDB),
+        AESx(0xDB3BE0E0), AESx(0x64563232), AESx(0x744E3A3A), AESx(0x141E0A0A),
+        AESx(0x92DB4949), AESx(0x0C0A0606), AESx(0x486C2424), AESx(0xB8E45C5C),
+        AESx(0x9F5DC2C2), AESx(0xBD6ED3D3), AESx(0x43EFACAC), AESx(0xC4A66262),
+        AESx(0x39A89191), AESx(0x31A49595), AESx(0xD337E4E4), AESx(0xF28B7979),
+        AESx(0xD532E7E7), AESx(0x8B43C8C8), AESx(0x6E593737), AESx(0xDAB76D6D),
+        AESx(0x018C8D8D), AESx(0xB164D5D5), AESx(0x9CD24E4E), AESx(0x49E0A9A9),
+        AESx(0xD8B46C6C), AESx(0xACFA5656), AESx(0xF307F4F4), AESx(0xCF25EAEA),
+        AESx(0xCAAF6565), AESx(0xF48E7A7A), AESx(0x47E9AEAE), AESx(0x10180808),
+        AESx(0x6FD5BABA), AESx(0xF0887878), AESx(0x4A6F2525), AESx(0x5C722E2E),
+        AESx(0x38241C1C), AESx(0x57F1A6A6), AESx(0x73C7B4B4), AESx(0x9751C6C6),
+        AESx(0xCB23E8E8), AESx(0xA17CDDDD), AESx(0xE89C7474), AESx(0x3E211F1F),
+        AESx(0x96DD4B4B), AESx(0x61DCBDBD), AESx(0x0D868B8B), AESx(0x0F858A8A),
+        AESx(0xE0907070), AESx(0x7C423E3E), AESx(0x71C4B5B5), AESx(0xCCAA6666),
+        AESx(0x90D84848), AESx(0x06050303), AESx(0xF701F6F6), AESx(0x1C120E0E),
+        AESx(0xC2A36161), AESx(0x6A5F3535), AESx(0xAEF95757), AESx(0x69D0B9B9),
+        AESx(0x17918686), AESx(0x9958C1C1), AESx(0x3A271D1D), AESx(0x27B99E9E),
+        AESx(0xD938E1E1), AESx(0xEB13F8F8), AESx(0x2BB39898), AESx(0x22331111),
+        AESx(0xD2BB6969), AESx(0xA970D9D9), AESx(0x07898E8E), AESx(0x33A79494),
+        AESx(0x2DB69B9B), AESx(0x3C221E1E), AESx(0x15928787), AESx(0xC920E9E9),
+        AESx(0x8749CECE), AESx(0xAAFF5555), AESx(0x50782828), AESx(0xA57ADFDF),
+        AESx(0x038F8C8C), AESx(0x59F8A1A1), AESx(0x09808989), AESx(0x1A170D0D),
+        AESx(0x65DABFBF), AESx(0xD731E6E6), AESx(0x84C64242), AESx(0xD0B86868),
+        AESx(0x82C34141), AESx(0x29B09999), AESx(0x5A772D2D), AESx(0x1E110F0F),
+        AESx(0x7BCBB0B0), AESx(0xA8FC5454), AESx(0x6DD6BBBB), AESx(0x2C3A1616)
+};
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/sha3/gost_streebog.c b/sha3/gost_streebog.c
new file mode 100644
index 0000000..678486f
--- /dev/null
+++ b/sha3/gost_streebog.c
@@ -0,0 +1,1023 @@
+/* GOST-R Streebog hash function for sib algo SibCoin */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+
+#include "gost_streebog.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+//--------------------------------------------------------------------------------------------
+//
+// streebog implementation
+//
+//--------------------------------------------------------------------------------------------
+
+
+// Tables for function F
+static const sph_u64 TG[8][256] = {{
+                                           0xE6F87E5C5B711FD0,0x258377800924FA16,0xC849E07E852EA4A8,0x5B4686A18F06C16A,
+                                           0x0B32E9A2D77B416E,0xABDA37A467815C66,0xF61796A81A686676,0xF5DC0B706391954B,
+                                           0x4862F38DB7E64BF1,0xFF5C629A68BD85C5,0xCB827DA6FCD75795,0x66D36DAF69B9F089,
+                                           0x356C9F74483D83B0,0x7CBCECB1238C99A1,0x36A702AC31C4708D,0x9EB6A8D02FBCDFD6,
+                                           0x8B19FA51E5B3AE37,0x9CCFB5408A127D0B,0xBC0C78B508208F5A,0xE533E3842288ECED,
+                                           0xCEC2C7D377C15FD2,0xEC7817B6505D0F5E,0xB94CC2C08336871D,0x8C205DB4CB0B04AD,
+                                           0x763C855B28A0892F,0x588D1B79F6FF3257,0x3FECF69E4311933E,0x0FC0D39F803A18C9,
+                                           0xEE010A26F5F3AD83,0x10EFE8F4411979A6,0x5DCDA10C7DE93A10,0x4A1BEE1D1248E92C,
+                                           0x53BFF2DB21847339,0xB4F50CCFA6A23D09,0x5FB4BC9CD84798CD,0xE88A2D8B071C56F9,
+                                           0x7F7771695A756A9C,0xC5F02E71A0BA1EBC,0xA663F9AB4215E672,0x2EB19E22DE5FBB78,
+                                           0x0DB9CE0F2594BA14,0x82520E6397664D84,0x2F031E6A0208EA98,0x5C7F2144A1BE6BF0,
+                                           0x7A37CB1CD16362DB,0x83E08E2B4B311C64,0xCF70479BAB960E32,0x856BA986B9DEE71E,
+                                           0xB5478C877AF56CE9,0xB8FE42885F61D6FD,0x1BDD0156966238C8,0x622157923EF8A92E,
+                                           0xFC97FF42114476F8,0x9D7D350856452CEB,0x4C90C9B0E0A71256,0x2308502DFBCB016C,
+                                           0x2D7A03FAA7A64845,0xF46E8B38BFC6C4AB,0xBDBEF8FDD477DEBA,0x3AAC4CEBC8079B79,
+                                           0xF09CB105E8879D0C,0x27FA6A10AC8A58CB,0x8960E7C1401D0CEA,0x1A6F811E4A356928,
+                                           0x90C4FB0773D196FF,0x43501A2F609D0A9F,0xF7A516E0C63F3796,0x1CE4A6B3B8DA9252,
+                                           0x1324752C38E08A9B,0xA5A864733BEC154F,0x2BF124575549B33F,0xD766DB15440DC5C7,
+                                           0xA7D179E39E42B792,0xDADF151A61997FD3,0x86A0345EC0271423,0x38D5517B6DA939A4,
+                                           0x6518F077104003B4,0x02791D90A5AEA2DD,0x88D267899C4A5D0A,0x930F66DF0A2865C2,
+                                           0x4EE9D4204509B08B,0x325538916685292A,0x412907BFC533A842,0xB27E2B62544DC673,
+                                           0x6C5304456295E007,0x5AF406E95351908A,0x1F2F3B6BC123616F,0xC37B09DC5255E5C6,
+                                           0x3967D133B1FE6844,0x298839C7F0E711E2,0x409B87F71964F9A2,0xE938ADC3DB4B0719,
+                                           0x0C0B4E47F9C3EBF4,0x5534D576D36B8843,0x4610A05AEB8B02D8,0x20C3CDF58232F251,
+                                           0x6DE1840DBEC2B1E7,0xA0E8DE06B0FA1D08,0x7B854B540D34333B,0x42E29A67BCCA5B7F,
+                                           0xD8A6088AC437DD0E,0xC63BB3A9D943ED81,0x21714DBD5E65A3B1,0x6761EDE7B5EEA169,
+                                           0x2431F7C8D573ABF6,0xD51FC685E1A3671A,0x5E063CD40410C92D,0x283AB98F2CB04002,
+                                           0x8FEBC06CB2F2F790,0x17D64F116FA1D33C,0xE07359F1A99EE4AA,0x784ED68C74CDC006,
+                                           0x6E2A19D5C73B42DA,0x8712B4161C7045C3,0x371582E4ED93216D,0xACE390414939F6FC,
+                                           0x7EC5F12186223B7C,0xC0B094042BAC16FB,0xF9D745379A527EBF,0x737C3F2EA3B68168,
+                                           0x33E7B8D9BAD278CA,0xA9A32A34C22FFEBB,0xE48163CCFEDFBD0D,0x8E5940246EA5A670,
+                                           0x51C6EF4B842AD1E4,0x22BAD065279C508C,0xD91488C218608CEE,0x319EA5491F7CDA17,
+                                           0xD394E128134C9C60,0x094BF43272D5E3B3,0x9BF612A5A4AAD791,0xCCBBDA43D26FFD0F,
+                                           0x34DE1F3C946AD250,0x4F5B5468995EE16B,0xDF9FAF6FEA8F7794,0x2648EA5870DD092B,
+                                           0xBFC7E56D71D97C67,0xDDE6B2FF4F21D549,0x3C276B463AE86003,0x91767B4FAF86C71F,
+                                           0x68A13E7835D4B9A0,0xB68C115F030C9FD4,0x141DD2C916582001,0x983D8F7DDD5324AC,
+                                           0x64AA703FCC175254,0xC2C989948E02B426,0x3E5E76D69F46C2DE,0x50746F03587D8004,
+                                           0x45DB3D829272F1E5,0x60584A029B560BF3,0xFBAE58A73FFCDC62,0xA15A5E4E6CAD4CE8,
+                                           0x4BA96E55CE1FB8CC,0x08F9747AAE82B253,0xC102144CF7FB471B,0x9F042898F3EB8E36,
+                                           0x068B27ADF2EFFB7A,0xEDCA97FE8C0A5EBE,0x778E0513F4F7D8CF,0x302C2501C32B8BF7,
+                                           0x8D92DDFC175C554D,0xF865C57F46052F5F,0xEAF3301BA2B2F424,0xAA68B7ECBBD60D86,
+                                           0x998F0F350104754C,0x0000000000000000,0xF12E314D34D0CCEC,0x710522BE061823B5,
+                                           0xAF280D9930C005C1,0x97FD5CE25D693C65,0x19A41CC633CC9A15,0x95844172F8C79EB8,
+                                           0xDC5432B7937684A9,0x9436C13A2490CF58,0x802B13F332C8EF59,0xC442AE397CED4F5C,
+                                           0xFA1CD8EFE3AB8D82,0xF2E5AC954D293FD1,0x6AD823E8907A1B7D,0x4D2249F83CF043B6,
+                                           0x03CB9DD879F9F33D,0xDE2D2F2736D82674,0x2A43A41F891EE2DF,0x6F98999D1B6C133A,
+                                           0xD4AD46CD3DF436FA,0xBB35DF50269825C0,0x964FDCAA813E6D85,0xEB41B0537EE5A5C4,
+                                           0x0540BA758B160847,0xA41AE43BE7BB44AF,0xE3B8C429D0671797,0x819993BBEE9FBEB9,
+                                           0xAE9A8DD1EC975421,0xF3572CDD917E6E31,0x6393D7DAE2AFF8CE,0x47A2201237DC5338,
+                                           0xA32343DEC903EE35,0x79FC56C4A89A91E6,0x01B28048DC5751E0,0x1296F564E4B7DB7B,
+                                           0x75F7188351597A12,0xDB6D9552BDCE2E33,0x1E9DBB231D74308F,0x520D7293FDD322D9,
+                                           0xE20A44610C304677,0xFEEEE2D2B4EAD425,0xCA30FDEE20800675,0x61EACA4A47015A13,
+                                           0xE74AFE1487264E30,0x2CC883B27BF119A5,0x1664CF59B3F682DC,0xA811AA7C1E78AF5B,
+                                           0x1D5626FB648DC3B2,0xB73E9117DF5BCE34,0xD05F7CF06AB56F5D,0xFD257F0ACD132718,
+                                           0x574DC8E676C52A9E,0x0739A7E52EB8AA9A,0x5486553E0F3CD9A3,0x56FF48AEAA927B7E,
+                                           0xBE756525AD8E2D87,0x7D0E6CF9FFDBC841,0x3B1ECCA31450CA99,0x6913BE30E983E840,
+                                           0xAD511009956EA71C,0xB1B5B6BA2DB4354E,0x4469BDCA4E25A005,0x15AF5281CA0F71E1,
+                                           0x744598CB8D0E2BF2,0x593F9B312AA863B7,0xEFB38A6E29A4FC63,0x6B6AA3A04C2D4A9D,
+                                           0x3D95EB0EE6BF31E3,0xA291C3961554BFD5,0x18169C8EEF9BCBF5,0x115D68BC9D4E2846,
+                                           0xBA875F18FACF7420,0xD1EDFCB8B6E23EBD,0xB00736F2F1E364AE,0x84D929CE6589B6FE,
+                                           0x70B7A2F6DA4F7255,0x0E7253D75C6D4929,0x04F23A3D574159A7,0x0A8069EA0B2C108E,
+                                           0x49D073C56BB11A11,0x8AAB7A1939E4FFD7,0xCD095A0B0E38ACEF,0xC9FB60365979F548,
+                                           0x92BDE697D67F3422,0xC78933E10514BC61,0xE1C1D9B975C9B54A,0xD2266160CF1BCD80,
+                                           0x9A4492ED78FD8671,0xB3CCAB2A881A9793,0x72CEBF667FE1D088,0xD6D45B5D985A9427
+                                   },{
+                                           0xC811A8058C3F55DE,0x65F5B43196B50619,0xF74F96B1D6706E43,0x859D1E8BCB43D336,
+                                           0x5AAB8A85CCFA3D84,0xF9C7BF99C295FCFD,0xA21FD5A1DE4B630F,0xCDB3EF763B8B456D,
+                                           0x803F59F87CF7C385,0xB27C73BE5F31913C,0x98E3AC6633B04821,0xBF61674C26B8F818,
+                                           0x0FFBC995C4C130C8,0xAAA0862010761A98,0x6057F342210116AA,0xF63C760C0654CC35,
+                                           0x2DDB45CC667D9042,0xBCF45A964BD40382,0x68E8A0C3EF3C6F3D,0xA7BD92D269FF73BC,
+                                           0x290AE20201ED2287,0xB7DE34CDE885818F,0xD901EEA7DD61059B,0xD6FA273219A03553,
+                                           0xD56F1AE874CCCEC9,0xEA31245C2E83F554,0x7034555DA07BE499,0xCE26D2AC56E7BEF7,
+                                           0xFD161857A5054E38,0x6A0E7DA4527436D1,0x5BD86A381CDE9FF2,0xCAF7756231770C32,
+                                           0xB09AAED9E279C8D0,0x5DEF1091C60674DB,0x111046A2515E5045,0x23536CE4729802FC,
+                                           0xC50CBCF7F5B63CFA,0x73A16887CD171F03,0x7D2941AFD9F28DBD,0x3F5E3EB45A4F3B9D,
+                                           0x84EEFE361B677140,0x3DB8E3D3E7076271,0x1A3A28F9F20FD248,0x7EBC7C75B49E7627,
+                                           0x74E5F293C7EB565C,0x18DCF59E4F478BA4,0x0C6EF44FA9ADCB52,0xC699812D98DAC760,
+                                           0x788B06DC6E469D0E,0xFC65F8EA7521EC4E,0x30A5F7219E8E0B55,0x2BEC3F65BCA57B6B,
+                                           0xDDD04969BAF1B75E,0x99904CDBE394EA57,0x14B201D1E6EA40F6,0xBBB0C08241284ADD,
+                                           0x50F20463BF8F1DFF,0xE8D7F93B93CBACB8,0x4D8CB68E477C86E8,0xC1DD1B3992268E3F,
+                                           0x7C5AA11209D62FCB,0x2F3D98ABDB35C9AE,0x671369562BFD5FF5,0x15C1E16C36CEE280,
+                                           0x1D7EB2EDF8F39B17,0xDA94D37DB00DFE01,0x877BC3EC760B8ADA,0xCB8495DFE153AE44,
+                                           0x05A24773B7B410B3,0x12857B783C32ABDF,0x8EB770D06812513B,0x536739B9D2E3E665,
+                                           0x584D57E271B26468,0xD789C78FC9849725,0xA935BBFA7D1AE102,0x8B1537A3DFA64188,
+                                           0xD0CD5D9BC378DE7A,0x4AC82C9A4D80CFB7,0x42777F1B83BDB620,0x72D2883A1D33BD75,
+                                           0x5E7A2D4BAB6A8F41,0xF4DAAB6BBB1C95D9,0x905CFFE7FD8D31B6,0x83AA6422119B381F,
+                                           0xC0AEFB8442022C49,0xA0F908C663033AE3,0xA428AF0804938826,0xADE41C341A8A53C7,
+                                           0xAE7121EE77E6A85D,0xC47F5C4A25929E8C,0xB538E9AA55CDD863,0x06377AA9DAD8EB29,
+                                           0xA18AE87BB3279895,0x6EDFDA6A35E48414,0x6B7D9D19825094A7,0xD41CFA55A4E86CBF,
+                                           0xE5CAEDC9EA42C59C,0xA36C351C0E6FC179,0x5181E4DE6FABBF89,0xFFF0C530184D17D4,
+                                           0x9D41EB1584045892,0x1C0D525028D73961,0xF178EC180CA8856A,0x9A0571018EF811CD,
+                                           0x4091A27C3EF5EFCC,0x19AF15239F6329D2,0x347450EFF91EB990,0xE11B4A078DD27759,
+                                           0xB9561DE5FC601331,0x912F1F5A2DA993C0,0x1654DCB65BA2191A,0x3E2DDE098A6B99EB,
+                                           0x8A66D71E0F82E3FE,0x8C51ADB7D55A08D7,0x4533E50F8941FF7F,0x02E6DD67BD4859EC,
+                                           0xE068AABA5DF6D52F,0xC24826E3FF4A75A5,0x6C39070D88ACDDF8,0x6486548C4691A46F,
+                                           0xD1BEBD26135C7C0C,0xB30F93038F15334A,0x82D9849FC1BF9A69,0x9C320BA85420FAE4,
+                                           0xFA528243AFF90767,0x9ED4D6CFE968A308,0xB825FD582C44B147,0x9B7691BC5EDCB3BB,
+                                           0xC7EA619048FE6516,0x1063A61F817AF233,0x47D538683409A693,0x63C2CE984C6DED30,
+                                           0x2A9FDFD86C81D91D,0x7B1E3B06032A6694,0x666089EBFBD9FD83,0x0A598EE67375207B,
+                                           0x07449A140AFC495F,0x2CA8A571B6593234,0x1F986F8A45BBC2FB,0x381AA4A050B372C2,
+                                           0x5423A3ADD81FAF3A,0x17273C0B8B86BB6C,0xFE83258DC869B5A2,0x287902BFD1C980F1,
+                                           0xF5A94BD66B3837AF,0x88800A79B2CABA12,0x55504310083B0D4C,0xDF36940E07B9EEB2,
+                                           0x04D1A7CE6790B2C5,0x612413FFF125B4DC,0x26F12B97C52C124F,0x86082351A62F28AC,
+                                           0xEF93632F9937E5E7,0x3507B052293A1BE6,0xE72C30AE570A9C70,0xD3586041AE1425E0,
+                                           0xDE4574B3D79D4CC4,0x92BA228040C5685A,0xF00B0CA5DC8C271C,0xBE1287F1F69C5A6E,
+                                           0xF39E317FB1E0DC86,0x495D114020EC342D,0x699B407E3F18CD4B,0xDCA3A9D46AD51528,
+                                           0x0D1D14F279896924,0x0000000000000000,0x593EB75FA196C61E,0x2E4E78160B116BD8,
+                                           0x6D4AE7B058887F8E,0xE65FD013872E3E06,0x7A6DDBBBD30EC4E2,0xAC97FC89CAAEF1B1,
+                                           0x09CCB33C1E19DBE1,0x89F3EAC462EE1864,0x7770CF49AA87ADC6,0x56C57ECA6557F6D6,
+                                           0x03953DDA6D6CFB9A,0x36928D884456E07C,0x1EEB8F37959F608D,0x31D6179C4EAAA923,
+                                           0x6FAC3AD7E5C02662,0x43049FA653991456,0xABD3669DC052B8EE,0xAF02C153A7C20A2B,
+                                           0x3CCB036E3723C007,0x93C9C23D90E1CA2C,0xC33BC65E2F6ED7D3,0x4CFF56339758249E,
+                                           0xB1E94E64325D6AA6,0x37E16D359472420A,0x79F8E661BE623F78,0x5214D90402C74413,
+                                           0x482EF1FDF0C8965B,0x13F69BC5EC1609A9,0x0E88292814E592BE,0x4E198B542A107D72,
+                                           0xCCC00FCBEBAFE71B,0x1B49C844222B703E,0x2564164DA840E9D5,0x20C6513E1FF4F966,
+                                           0xBAC3203F910CE8AB,0xF2EDD1C261C47EF0,0x814CB945ACD361F3,0x95FEB8944A392105,
+                                           0x5C9CF02C1622D6AD,0x971865F3F77178E9,0xBD87BA2B9BF0A1F4,0x444005B259655D09,
+                                           0xED75BE48247FBC0B,0x7596122E17CFF42A,0xB44B091785E97A15,0x966B854E2755DA9F,
+                                           0xEEE0839249134791,0x32432A4623C652B9,0xA8465B47AD3E4374,0xF8B45F2412B15E8B,
+                                           0x2417F6F078644BA3,0xFB2162FE7FDDA511,0x4BBBCC279DA46DC1,0x0173E0BDD024A276,
+                                           0x22208C59A2BCA08A,0x8FC4906DB836F34D,0xE4B90D743A6667EA,0x7147B5E0705F46EF,
+                                           0x2782CB2A1508B039,0xEC065EF5F45B1E7D,0x21B5B183CFD05B10,0xDBE733C060295C77,
+                                           0x9FA73672394C017E,0xCF55321186C31C81,0xD8720E1A0D45A7ED,0x3B8F997A3DDF8958,
+                                           0x3AFC79C7EDFB2B2E,0xE9A4198643EF0ECE,0x5F09CDF67B4E2D37,0x4F6A6BE9FA34DF04,
+                                           0xB6ADD47038A123F9,0x8D224D0A057EAAA1,0xC96248B85C1BF7A8,0xE3FD9760309A2EB5,
+                                           0x0B2A6E5BA351820D,0xEB42C4E1FEA75722,0x948D58299A1D8373,0x7FCF9CC864BAD451,
+                                           0xA55B4FB5D4B72A50,0x08BF5381CE3D7997,0x46A6D8D5E42D04E5,0xD22B80FC7E308796,
+                                           0x57B69E77B57354A0,0x3969441D8097D0B4,0x3330CAFBF3E2F0CF,0xE28E77DDE0BE8CC3,
+                                           0x62B12E259C494F46,0xA6CE726FB9DBD1CA,0x41E242C1EED14DBA,0x76032FF47AA30FB0
+                                   },{
+                                           0x45B268A93ACDE4CC,0xAF7F0BE884549D08,0x048354B3C1468263,0x925435C2C80EFED2,
+                                           0xEE4E37F27FDFFBA7,0x167A33920C60F14D,0xFB123B52EA03E584,0x4A0CAB53FDBB9007,
+                                           0x9DEAF6380F788A19,0xCB48EC558F0CB32A,0xB59DC4B2D6FEF7E0,0xDCDBCA22F4F3ECB6,
+                                           0x11DF5813549A9C40,0xE33FDEDF568ACED3,0xA0C1C8124322E9C3,0x07A56B8158FA6D0D,
+                                           0x77279579B1E1F3DD,0xD9B18B74422AC004,0xB8EC2D9FFFABC294,0xF4ACF8A82D75914F,
+                                           0x7BBF69B1EF2B6878,0xC4F62FAF487AC7E1,0x76CE809CC67E5D0C,0x6711D88F92E4C14C,
+                                           0x627B99D9243DEDFE,0x234AA5C3DFB68B51,0x909B1F15262DBF6D,0x4F66EA054B62BCB5,
+                                           0x1AE2CF5A52AA6AE8,0xBEA053FBD0CE0148,0xED6808C0E66314C9,0x43FE16CD15A82710,
+                                           0xCD049231A06970F6,0xE7BC8A6C97CC4CB0,0x337CE835FCB3B9C0,0x65DEF2587CC780F3,
+                                           0x52214EDE4132BB50,0x95F15E4390F493DF,0x870839625DD2E0F1,0x41313C1AFB8B66AF,
+                                           0x91720AF051B211BC,0x477D427ED4EEA573,0x2E3B4CEEF6E3BE25,0x82627834EB0BCC43,
+                                           0x9C03E3DD78E724C8,0x2877328AD9867DF9,0x14B51945E243B0F2,0x574B0F88F7EB97E2,
+                                           0x88B6FA989AA4943A,0x19C4F068CB168586,0x50EE6409AF11FAEF,0x7DF317D5C04EABA4,
+                                           0x7A567C5498B4C6A9,0xB6BBFB804F42188E,0x3CC22BCF3BC5CD0B,0xD04336EAAA397713,
+                                           0xF02FAC1BEC33132C,0x2506DBA7F0D3488D,0xD7E65D6BF2C31A1E,0x5EB9B2161FF820F5,
+                                           0x842E0650C46E0F9F,0x716BEB1D9E843001,0xA933758CAB315ED4,0x3FE414FDA2792265,
+                                           0x27C9F1701EF00932,0x73A4C1CA70A771BE,0x94184BA6E76B3D0E,0x40D829FF8C14C87E,
+                                           0x0FBEC3FAC77674CB,0x3616A9634A6A9572,0x8F139119C25EF937,0xF545ED4D5AEA3F9E,
+                                           0xE802499650BA387B,0x6437E7BD0B582E22,0xE6559F89E053E261,0x80AD52E305288DFC,
+                                           0x6DC55A23E34B9935,0xDE14E0F51AD0AD09,0xC6390578A659865E,0x96D7617109487CB1,
+                                           0xE2D6CB3A21156002,0x01E915E5779FAED1,0xADB0213F6A77DCB7,0x9880B76EB9A1A6AB,
+                                           0x5D9F8D248644CF9B,0xFD5E4536C5662658,0xF1C6B9FE9BACBDFD,0xEACD6341BE9979C4,
+                                           0xEFA7221708405576,0x510771ECD88E543E,0xC2BA51CB671F043D,0x0AD482AC71AF5879,
+                                           0xFE787A045CDAC936,0xB238AF338E049AED,0xBD866CC94972EE26,0x615DA6EBBD810290,
+                                           0x3295FDD08B2C1711,0xF834046073BF0AEA,0xF3099329758FFC42,0x1CAEB13E7DCFA934,
+                                           0xBA2307481188832B,0x24EFCE42874CE65C,0x0E57D61FB0E9DA1A,0xB3D1BAD6F99B343C,
+                                           0xC0757B1C893C4582,0x2B510DB8403A9297,0x5C7698C1F1DB614A,0x3E0D0118D5E68CB4,
+                                           0xD60F488E855CB4CF,0xAE961E0DF3CB33D9,0x3A8E55AB14A00ED7,0x42170328623789C1,
+                                           0x838B6DD19C946292,0x895FEF7DED3B3AEB,0xCFCBB8E64E4A3149,0x064C7E642F65C3DC,
+                                           0x3D2B3E2A4C5A63DA,0x5BD3F340A9210C47,0xB474D157A1615931,0xAC5934DA1DE87266,
+                                           0x6EE365117AF7765B,0xC86ED36716B05C44,0x9BA6885C201D49C5,0xB905387A88346C45,
+                                           0x131072C4BAB9DDFF,0xBF49461EA751AF99,0xD52977BC1CE05BA1,0xB0F785E46027DB52,
+                                           0x546D30BA6E57788C,0x305AD707650F56AE,0xC987C682612FF295,0xA5AB8944F5FBC571,
+                                           0x7ED528E759F244CA,0x8DDCBBCE2C7DB888,0xAA154ABE328DB1BA,0x1E619BE993ECE88B,
+                                           0x09F2BD9EE813B717,0x7401AA4B285D1CB3,0x21858F143195CAEE,0x48C381841398D1B8,
+                                           0xFCB750D3B2F98889,0x39A86A998D1CE1B9,0x1F888E0CE473465A,0x7899568376978716,
+                                           0x02CF2AD7EE2341BF,0x85C713B5B3F1A14E,0xFF916FE12B4567E7,0x7C1A0230B7D10575,
+                                           0x0C98FCC85ECA9BA5,0xA3E7F720DA9E06AD,0x6A6031A2BBB1F438,0x973E74947ED7D260,
+                                           0x2CF4663918C0FF9A,0x5F50A7F368678E24,0x34D983B4A449D4CD,0x68AF1B755592B587,
+                                           0x7F3C3D022E6DEA1B,0xABFC5F5B45121F6B,0x0D71E92D29553574,0xDFFDF5106D4F03D8,
+                                           0x081BA87B9F8C19C6,0xDB7EA1A3AC0981BB,0xBBCA12AD66172DFA,0x79704366010829C7,
+                                           0x179326777BFF5F9C,0x0000000000000000,0xEB2476A4C906D715,0x724DD42F0738DF6F,
+                                           0xB752EE6538DDB65F,0x37FFBC863DF53BA3,0x8EFA84FCB5C157E6,0xE9EB5C73272596AA,
+                                           0x1B0BDABF2535C439,0x86E12C872A4D4E20,0x9969A28BCE3E087A,0xFAFB2EB79D9C4B55,
+                                           0x056A4156B6D92CB2,0x5A3AE6A5DEBEA296,0x22A3B026A8292580,0x53C85B3B36AD1581,
+                                           0xB11E900117B87583,0xC51F3A4A3FE56930,0xE019E1EDCF3621BD,0xEC811D2591FCBA18,
+                                           0x445B7D4C4D524A1D,0xA8DA6069DCAEF005,0x58F5CC72309DE329,0xD4C062596B7FF570,
+                                           0xCE22AD0339D59F98,0x591CD99747024DF8,0x8B90C5AA03187B54,0xF663D27FC356D0F0,
+                                           0xD8589E9135B56ED5,0x35309651D3D67A1C,0x12F96721CD26732E,0xD28C1C3D441A36AC,
+                                           0x492A946164077F69,0x2D1D73DC6F5F514B,0x6F0A70F40D68D88A,0x60B4B30ECA1EAC41,
+                                           0xD36509D83385987D,0x0B3D97490630F6A8,0x9ECCC90A96C46577,0xA20EE2C5AD01A87C,
+                                           0xE49AB55E0E70A3DE,0xA4429CA182646BA0,0xDA97B446DB962F6A,0xCCED87D4D7F6DE27,
+                                           0x2AB8185D37A53C46,0x9F25DCEFE15BCBA6,0xC19C6EF9FEA3EB53,0xA764A3931BD884CE,
+                                           0x2FD2590B817C10F4,0x56A21A6D80743933,0xE573A0BB79EF0D0F,0x155C0CA095DC1E23,
+                                           0x6C2C4FC694D437E4,0x10364DF623053291,0xDD32DFC7836C4267,0x03263F3299BCEF6E,
+                                           0x66F8CD6AE57B6F9D,0x8C35AE2B5BE21659,0x31B3C2E21290F87F,0x93BD2027BF915003,
+                                           0x69460E90220D1B56,0x299E276FAE19D328,0x63928C3C53A2432F,0x7082FEF8E91B9ED0,
+                                           0xBC6F792C3EED40F7,0x4C40D537D2DE53DB,0x75E8BFAE5FC2B262,0x4DA9C0D2A541FD0A,
+                                           0x4E8FFFE03CFD1264,0x2620E495696FA7E3,0xE1F0F408B8A98F6C,0xD1AA230FDDA6D9C2,
+                                           0xC7D0109DD1C6288F,0x8A79D04F7487D585,0x4694579BA3710BA2,0x38417F7CFA834F68,
+                                           0x1D47A4DB0A5007E5,0x206C9AF1460A643F,0xA128DDF734BD4712,0x8144470672B7232D,
+                                           0xF2E086CC02105293,0x182DE58DBC892B57,0xCAA1F9B0F8931DFB,0x6B892447CC2E5AE9,
+                                           0xF9DD11850420A43B,0x4BE5BEB68A243ED6,0x5584255F19C8D65D,0x3B67404E633FA006,
+                                           0xA68DB6766C472A1F,0xF78AC79AB4C97E21,0xC353442E1080AAEC,0x9A4F9DB95782E714
+                                   },{
+                                           0x05BA7BC82C9B3220,0x31A54665F8B65E4F,0xB1B651F77547F4D4,0x8BFA0D857BA46682,
+                                           0x85A96C5AA16A98BB,0x990FAEF908EB79C9,0xA15E37A247F4A62D,0x76857DCD5D27741E,
+                                           0xF8C50B800A1820BC,0xBE65DCB201F7A2B4,0x666D1B986F9426E7,0x4CC921BF53C4E648,
+                                           0x95410A0F93D9CA42,0x20CDCCAA647BA4EF,0x429A4060890A1871,0x0C4EA4F69B32B38B,
+                                           0xCCDA362DDE354CD3,0x96DC23BC7C5B2FA9,0xC309BB68AA851AB3,0xD26131A73648E013,
+                                           0x021DC52941FC4DB2,0xCD5ADAB7704BE48A,0xA77965D984ED71E6,0x32386FD61734BBA4,
+                                           0xE82D6DD538AB7245,0x5C2147EA6177B4B1,0x5DA1AB70CF091CE8,0xAC907FCE72B8BDFF,
+                                           0x57C85DFD972278A8,0xA4E44C6A6B6F940D,0x3851995B4F1FDFE4,0x62578CCAED71BC9E,
+                                           0xD9882BB0C01D2C0A,0x917B9D5D113C503B,0xA2C31E11A87643C6,0xE463C923A399C1CE,
+                                           0xF71686C57EA876DC,0x87B4A973E096D509,0xAF0D567D9D3A5814,0xB40C2A3F59DCC6F4,
+                                           0x3602F88495D121DD,0xD3E1DD3D9836484A,0xF945E71AA46688E5,0x7518547EB2A591F5,
+                                           0x9366587450C01D89,0x9EA81018658C065B,0x4F54080CBC4603A3,0x2D0384C65137BF3D,
+                                           0xDC325078EC861E2A,0xEA30A8FC79573FF7,0x214D2030CA050CB6,0x65F0322B8016C30C,
+                                           0x69BE96DD1B247087,0xDB95EE9981E161B8,0xD1FC1814D9CA05F8,0x820ED2BBCC0DE729,
+                                           0x63D76050430F14C7,0x3BCCB0E8A09D3A0F,0x8E40764D573F54A2,0x39D175C1E16177BD,
+                                           0x12F5A37C734F1F4B,0xAB37C12F1FDFC26D,0x5648B167395CD0F1,0x6C04ED1537BF42A7,
+                                           0xED97161D14304065,0x7D6C67DAAB72B807,0xEC17FA87BA4EE83C,0xDFAF79CB0304FBC1,
+                                           0x733F060571BC463E,0x78D61C1287E98A27,0xD07CF48E77B4ADA1,0xB9C262536C90DD26,
+                                           0xE2449B5860801605,0x8FC09AD7F941FCFB,0xFAD8CEA94BE46D0E,0xA343F28B0608EB9F,
+                                           0x9B126BD04917347B,0x9A92874AE7699C22,0x1B017C42C4E69EE0,0x3A4C5C720EE39256,
+                                           0x4B6E9F5E3EA399DA,0x6BA353F45AD83D35,0xE7FEE0904C1B2425,0x22D009832587E95D,
+                                           0x842980C00F1430E2,0xC6B3C0A0861E2893,0x087433A419D729F2,0x341F3DADD42D6C6F,
+                                           0xEE0A3FAEFBB2A58E,0x4AEE73C490DD3183,0xAAB72DB5B1A16A34,0xA92A04065E238FDF,
+                                           0x7B4B35A1686B6FCC,0x6A23BF6EF4A6956C,0x191CB96B851AD352,0x55D598D4D6DE351A,
+                                           0xC9604DE5F2AE7EF3,0x1CA6C2A3A981E172,0xDE2F9551AD7A5398,0x3025AAFF56C8F616,
+                                           0x15521D9D1E2860D9,0x506FE31CFA45073A,0x189C55F12B647B0B,0x0180EC9AAE7EA859,
+                                           0x7CEC8B40050C105E,0x2350E5198BF94104,0xEF8AD33455CC0DD7,0x07A7BEE16D677F92,
+                                           0xE5E325B90DE76997,0x5A061591A26E637A,0xB611EF1618208B46,0x09F4DF3EB7A981AB,
+                                           0x1EBB078AE87DACC0,0xB791038CB65E231F,0x0FD38D4574B05660,0x67EDF702C1EA8EBE,
+                                           0xBA5F4BE0831238CD,0xE3C477C2CEFEBE5C,0x0DCE486C354C1BD2,0x8C5DB36416C31910,
+                                           0x26EA9ED1A7627324,0x039D29B3EF82E5EB,0x9F28FC82CBF2AE02,0xA8AAE89CF05D2786,
+                                           0x431AACFA2774B028,0xCF471F9E31B7A938,0x581BD0B8E3922EC8,0xBC78199B400BEF06,
+                                           0x90FB71C7BF42F862,0x1F3BEB1046030499,0x683E7A47B55AD8DE,0x988F4263A695D190,
+                                           0xD808C72A6E638453,0x0627527BC319D7CB,0xEBB04466D72997AE,0xE67E0C0AE2658C7C,
+                                           0x14D2F107B056C880,0x7122C32C30400B8C,0x8A7AE11FD5DACEDB,0xA0DEDB38E98A0E74,
+                                           0xAD109354DCC615A6,0x0BE91A17F655CC19,0x8DDD5FFEB8BDB149,0xBFE53028AF890AED,
+                                           0xD65BA6F5B4AD7A6A,0x7956F0882997227E,0x10E8665532B352F9,0x0E5361DFDACEFE39,
+                                           0xCEC7F3049FC90161,0xFF62B561677F5F2E,0x975CCF26D22587F0,0x51EF0F86543BAF63,
+                                           0x2F1E41EF10CBF28F,0x52722635BBB94A88,0xAE8DBAE73344F04D,0x410769D36688FD9A,
+                                           0xB3AB94DE34BBB966,0x801317928DF1AA9B,0xA564A0F0C5113C54,0xF131D4BEBDB1A117,
+                                           0x7F71A2F3EA8EF5B5,0x40878549C8F655C3,0x7EF14E6944F05DEC,0xD44663DCF55137D8,
+                                           0xF2ACFD0D523344FC,0x0000000000000000,0x5FBC6E598EF5515A,0x16CF342EF1AA8532,
+                                           0xB036BD6DDB395C8D,0x13754FE6DD31B712,0xBBDFA77A2D6C9094,0x89E7C8AC3A582B30,
+                                           0x3C6B0E09CDFA459D,0xC4AE0589C7E26521,0x49735A777F5FD468,0xCAFD64561D2C9B18,
+                                           0xDA1502032F9FC9E1,0x8867243694268369,0x3782141E3BAF8984,0x9CB5D53124704BE9,
+                                           0xD7DB4A6F1AD3D233,0xA6F989432A93D9BF,0x9D3539AB8A0EE3B0,0x53F2CAAF15C7E2D1,
+                                           0x6E19283C76430F15,0x3DEBE2936384EDC4,0x5E3C82C3208BF903,0x33B8834CB94A13FD,
+                                           0x6470DEB12E686B55,0x359FD1377A53C436,0x61CAA57902F35975,0x043A975282E59A79,
+                                           0xFD7F70482683129C,0xC52EE913699CCD78,0x28B9FF0E7DAC8D1D,0x5455744E78A09D43,
+                                           0xCB7D88CCB3523341,0x44BD121B4A13CFBA,0x4D49CD25FDBA4E11,0x3E76CB208C06082F,
+                                           0x3FF627BA2278A076,0xC28957F204FBB2EA,0x453DFE81E46D67E3,0x94C1E6953DA7621B,
+                                           0x2C83685CFF491764,0xF32C1197FC4DECA5,0x2B24D6BD922E68F6,0xB22B78449AC5113F,
+                                           0x48F3B6EDD1217C31,0x2E9EAD75BEB55AD6,0x174FD8B45FD42D6B,0x4ED4E4961238ABFA,
+                                           0x92E6B4EEFEBEB5D0,0x46A0D7320BEF8208,0x47203BA8A5912A51,0x24F75BF8E69E3E96,
+                                           0xF0B1382413CF094E,0xFEE259FBC901F777,0x276A724B091CDB7D,0xBDF8F501EE75475F,
+                                           0x599B3C224DEC8691,0x6D84018F99C1EAFE,0x7498B8E41CDB39AC,0xE0595E71217C5BB7,
+                                           0x2AA43A273C50C0AF,0xF50B43EC3F543B6E,0x838E3E2162734F70,0xC09492DB4507FF58,
+                                           0x72BFEA9FDFC2EE67,0x11688ACF9CCDFAA0,0x1A8190D86A9836B9,0x7ACBD93BC615C795,
+                                           0xC7332C3A286080CA,0x863445E94EE87D50,0xF6966A5FD0D6DE85,0xE9AD814F96D5DA1C,
+                                           0x70A22FB69E3EA3D5,0x0A69F68D582B6440,0xB8428EC9C2EE757F,0x604A49E3AC8DF12C,
+                                           0x5B86F90B0C10CB23,0xE1D9B2EB8F02F3EE,0x29391394D3D22544,0xC8E0A17F5CD0D6AA,
+                                           0xB58CC6A5F7A26EAD,0x8193FB08238F02C2,0xD5C68F465B2F9F81,0xFCFF9CD288FDBAC5,
+                                           0x77059157F359DC47,0x1D262E3907FF492B,0xFB582233E59AC557,0xDDB2BCE242F8B673,
+                                           0x2577B76248E096CF,0x6F99C4A6D83DA74C,0xC1147E41EB795701,0xF48BAF76912A9337
+                                   },{
+                                           0x3EF29D249B2C0A19,0xE9E16322B6F8622F,0x5536994047757F7A,0x9F4D56D5A47B0B33,
+                                           0x822567466AA1174C,0xB8F5057DEB082FB2,0xCC48C10BF4475F53,0x373088D4275DEC3A,
+                                           0x968F4325180AED10,0x173D232CF7016151,0xAE4ED09F946FCC13,0xFD4B4741C4539873,
+                                           0x1B5B3F0DD9933765,0x2FFCB0967B644052,0xE02376D20A89840C,0xA3AE3A70329B18D7,
+                                           0x419CBD2335DE8526,0xFAFEBF115B7C3199,0x0397074F85AA9B0D,0xC58AD4FB4836B970,
+                                           0xBEC60BE3FC4104A8,0x1EFF36DC4B708772,0x131FDC33ED8453B6,0x0844E33E341764D3,
+                                           0x0FF11B6EAB38CD39,0x64351F0A7761B85A,0x3B5694F509CFBA0E,0x30857084B87245D0,
+                                           0x47AFB3BD2297AE3C,0xF2BA5C2F6F6B554A,0x74BDC4761F4F70E1,0xCFDFC64471EDC45E,
+                                           0xE610784C1DC0AF16,0x7ACA29D63C113F28,0x2DED411776A859AF,0xAC5F211E99A3D5EE,
+                                           0xD484F949A87EF33B,0x3CE36CA596E013E4,0xD120F0983A9D432C,0x6BC40464DC597563,
+                                           0x69D5F5E5D1956C9E,0x9AE95F043698BB24,0xC9ECC8DA66A4EF44,0xD69508C8A5B2EAC6,
+                                           0xC40C2235C0503B80,0x38C193BA8C652103,0x1CEEC75D46BC9E8F,0xD331011937515AD1,
+                                           0xD8E2E56886ECA50F,0xB137108D5779C991,0x709F3B6905CA4206,0x4FEB50831680CAEF,
+                                           0xEC456AF3241BD238,0x58D673AFE181ABBE,0x242F54E7CAD9BF8C,0x0211F1810DCC19FD,
+                                           0x90BC4DBB0F43C60A,0x9518446A9DA0761D,0xA1BFCBF13F57012A,0x2BDE4F8961E172B5,
+                                           0x27B853A84F732481,0xB0B1E643DF1F4B61,0x18CC38425C39AC68,0xD2B7F7D7BF37D821,
+                                           0x3103864A3014C720,0x14AA246372ABFA5C,0x6E600DB54EBAC574,0x394765740403A3F3,
+                                           0x09C215F0BC71E623,0x2A58B947E987F045,0x7B4CDF18B477BDD8,0x9709B5EB906C6FE0,
+                                           0x73083C268060D90B,0xFEDC400E41F9037E,0x284948C6E44BE9B8,0x728ECAE808065BFB,
+                                           0x06330E9E17492B1A,0x5950856169E7294E,0xBAE4F4FCE6C4364F,0xCA7BCF95E30E7449,
+                                           0x7D7FD186A33E96C2,0x52836110D85AD690,0x4DFAA1021B4CD312,0x913ABB75872544FA,
+                                           0xDD46ECB9140F1518,0x3D659A6B1E869114,0xC23F2CABD719109A,0xD713FE062DD46836,
+                                           0xD0A60656B2FBC1DC,0x221C5A79DD909496,0xEFD26DBCA1B14935,0x0E77EDA0235E4FC9,
+                                           0xCBFD395B6B68F6B9,0x0DE0EAEFA6F4D4C4,0x0422FF1F1A8532E7,0xF969B85EDED6AA94,
+                                           0x7F6E2007AEF28F3F,0x3AD0623B81A938FE,0x6624EE8B7AADA1A7,0xB682E8DDC856607B,
+                                           0xA78CC56F281E2A30,0xC79B257A45FAA08D,0x5B4174E0642B30B3,0x5F638BFF7EAE0254,
+                                           0x4BC9AF9C0C05F808,0xCE59308AF98B46AE,0x8FC58DA9CC55C388,0x803496C7676D0EB1,
+                                           0xF33CAAE1E70DD7BA,0xBB6202326EA2B4BF,0xD5020F87201871CB,0x9D5CA754A9B712CE,
+                                           0x841669D87DE83C56,0x8A6184785EB6739F,0x420BBA6CB0741E2B,0xF12D5B60EAC1CE47,
+                                           0x76AC35F71283691C,0x2C6BB7D9FECEDB5F,0xFCCDB18F4C351A83,0x1F79C012C3160582,
+                                           0xF0ABADAE62A74CB7,0xE1A5801C82EF06FC,0x67A21845F2CB2357,0x5114665F5DF04D9D,
+                                           0xBF40FD2D74278658,0xA0393D3FB73183DA,0x05A409D192E3B017,0xA9FB28CF0B4065F9,
+                                           0x25A9A22942BF3D7C,0xDB75E22703463E02,0xB326E10C5AB5D06C,0xE7968E8295A62DE6,
+                                           0xB973F3B3636EAD42,0xDF571D3819C30CE5,0xEE549B7229D7CBC5,0x12992AFD65E2D146,
+                                           0xF8EF4E9056B02864,0xB7041E134030E28B,0xC02EDD2ADAD50967,0x932B4AF48AE95D07,
+                                           0x6FE6FB7BC6DC4784,0x239AACB755F61666,0x401A4BEDBDB807D6,0x485EA8D389AF6305,
+                                           0xA41BC220ADB4B13D,0x753B32B89729F211,0x997E584BB3322029,0x1D683193CEDA1C7F,
+                                           0xFF5AB6C0C99F818E,0x16BBD5E27F67E3A1,0xA59D34EE25D233CD,0x98F8AE853B54A2D9,
+                                           0x6DF70AFACB105E79,0x795D2E99B9BBA425,0x8E437B6744334178,0x0186F6CE886682F0,
+                                           0xEBF092A3BB347BD2,0xBCD7FA62F18D1D55,0xADD9D7D011C5571E,0x0BD3E471B1BDFFDE,
+                                           0xAA6C2F808EEAFEF4,0x5EE57D31F6C880A4,0xF50FA47FF044FCA0,0x1ADDC9C351F5B595,
+                                           0xEA76646D3352F922,0x0000000000000000,0x85909F16F58EBEA6,0x46294573AAF12CCC,
+                                           0x0A5512BF39DB7D2E,0x78DBD85731DD26D5,0x29CFBE086C2D6B48,0x218B5D36583A0F9B,
+                                           0x152CD2ADFACD78AC,0x83A39188E2C795BC,0xC3B9DA655F7F926A,0x9ECBA01B2C1D89C3,
+                                           0x07B5F8509F2FA9EA,0x7EE8D6C926940DCF,0x36B67E1AAF3B6ECA,0x86079859702425AB,
+                                           0xFB7849DFD31AB369,0x4C7C57CC932A51E2,0xD96413A60E8A27FF,0x263EA566C715A671,
+                                           0x6C71FC344376DC89,0x4A4F595284637AF8,0xDAF314E98B20BCF2,0x572768C14AB96687,
+                                           0x1088DB7C682EC8BB,0x887075F9537A6A62,0x2E7A4658F302C2A2,0x619116DBE582084D,
+                                           0xA87DDE018326E709,0xDCC01A779C6997E8,0xEDC39C3DAC7D50C8,0xA60A33A1A078A8C0,
+                                           0xC1A82BE452B38B97,0x3F746BEA134A88E9,0xA228CCBEBAFD9A27,0xABEAD94E068C7C04,
+                                           0xF48952B178227E50,0x5CF48CB0FB049959,0x6017E0156DE48ABD,0x4438B4F2A73D3531,
+                                           0x8C528AE649FF5885,0xB515EF924DFCFB76,0x0C661C212E925634,0xB493195CC59A7986,
+                                           0x9CDA519A21D1903E,0x32948105B5BE5C2D,0x194ACE8CD45F2E98,0x438D4CA238129CDB,
+                                           0x9B6FA9CABEFE39D4,0x81B26009EF0B8C41,0xDED1EBF691A58E15,0x4E6DA64D9EE6481F,
+                                           0x54B06F8ECF13FD8A,0x49D85E1D01C9E1F5,0xAFC826511C094EE3,0xF698A33075EE67AD,
+                                           0x5AC7822EEC4DB243,0x8DD47C28C199DA75,0x89F68337DB1CE892,0xCDCE37C57C21DDA3,
+                                           0x530597DE503C5460,0x6A42F2AA543FF793,0x5D727A7E73621BA9,0xE232875307459DF1,
+                                           0x56A19E0FC2DFE477,0xC61DD3B4CD9C227D,0xE5877F03986A341B,0x949EB2A415C6F4ED,
+                                           0x6206119460289340,0x6380E75AE84E11B0,0x8BE772B6D6D0F16F,0x50929091D596CF6D,
+                                           0xE86795EC3E9EE0DF,0x7CF927482B581432,0xC86A3E14EEC26DB4,0x7119CDA78DACC0F6,
+                                           0xE40189CD100CB6EB,0x92ADBC3A028FDFF7,0xB2A017C2D2D3529C,0x200DABF8D05C8D6B,
+                                           0x34A78F9BA2F77737,0xE3B4719D8F231F01,0x45BE423C2F5BB7C1,0xF71E55FEFD88E55D,
+                                           0x6853032B59F3EE6E,0x65B3E9C4FF073AAA,0x772AC3399AE5EBEC,0x87816E97F842A75B,
+                                           0x110E2DB2E0484A4B,0x331277CB3DD8DEDD,0xBD510CAC79EB9FA5,0x352179552A91F5C7
+                                   },{
+                                           0x8AB0A96846E06A6D,0x43C7E80B4BF0B33A,0x08C9B3546B161EE5,0x39F1C235EBA990BE,
+                                           0xC1BEF2376606C7B2,0x2C209233614569AA,0xEB01523B6FC3289A,0x946953AB935ACEDD,
+                                           0x272838F63E13340E,0x8B0455ECA12BA052,0x77A1B2C4978FF8A2,0xA55122CA13E54086,
+                                           0x2276135862D3F1CD,0xDB8DDFDE08B76CFE,0x5D1E12C89E4A178A,0x0E56816B03969867,
+                                           0xEE5F79953303ED59,0xAFED748BAB78D71D,0x6D929F2DF93E53EE,0xF5D8A8F8BA798C2A,
+                                           0xF619B1698E39CF6B,0x95DDAF2F749104E2,0xEC2A9C80E0886427,0xCE5C8FD8825B95EA,
+                                           0xC4E0D9993AC60271,0x4699C3A5173076F9,0x3D1B151F50A29F42,0x9ED505EA2BC75946,
+                                           0x34665ACFDC7F4B98,0x61B1FB53292342F7,0xC721C0080E864130,0x8693CD1696FD7B74,
+                                           0x872731927136B14B,0xD3446C8A63A1721B,0x669A35E8A6680E4A,0xCAB658F239509A16,
+                                           0xA4E5DE4EF42E8AB9,0x37A7435EE83F08D9,0x134E6239E26C7F96,0x82791A3C2DF67488,
+                                           0x3F6EF00A8329163C,0x8E5A7E42FDEB6591,0x5CAAEE4C7981DDB5,0x19F234785AF1E80D,
+                                           0x255DDDE3ED98BD70,0x50898A32A99CCCAC,0x28CA4519DA4E6656,0xAE59880F4CB31D22,
+                                           0x0D9798FA37D6DB26,0x32F968F0B4FFCD1A,0xA00F09644F258545,0xFA3AD5175E24DE72,
+                                           0xF46C547C5DB24615,0x713E80FBFF0F7E20,0x7843CF2B73D2AAFA,0xBD17EA36AEDF62B4,
+                                           0xFD111BACD16F92CF,0x4ABAA7DBC72D67E0,0xB3416B5DAD49FAD3,0xBCA316B24914A88B,
+                                           0x15D150068AECF914,0xE27C1DEBE31EFC40,0x4FE48C759BEDA223,0x7EDCFD141B522C78,
+                                           0x4E5070F17C26681C,0xE696CAC15815F3BC,0x35D2A64B3BB481A7,0x800CFF29FE7DFDF6,
+                                           0x1ED9FAC3D5BAA4B0,0x6C2663A91EF599D1,0x03C1199134404341,0xF7AD4DED69F20554,
+                                           0xCD9D9649B61BD6AB,0xC8C3BDE7EADB1368,0xD131899FB02AFB65,0x1D18E352E1FAE7F1,
+                                           0xDA39235AEF7CA6C1,0xA1BBF5E0A8EE4F7A,0x91377805CF9A0B1E,0x3138716180BF8E5B,
+                                           0xD9F83ACBDB3CE580,0x0275E515D38B897E,0x472D3F21F0FBBCC6,0x2D946EB7868EA395,
+                                           0xBA3C248D21942E09,0xE7223645BFDE3983,0xFF64FEB902E41BB1,0xC97741630D10D957,
+                                           0xC3CB1722B58D4ECC,0xA27AEC719CAE0C3B,0x99FECB51A48C15FB,0x1465AC826D27332B,
+                                           0xE1BD047AD75EBF01,0x79F733AF941960C5,0x672EC96C41A3C475,0xC27FEBA6524684F3,
+                                           0x64EFD0FD75E38734,0xED9E60040743AE18,0xFB8E2993B9EF144D,0x38453EB10C625A81,
+                                           0x6978480742355C12,0x48CF42CE14A6EE9E,0x1CAC1FD606312DCE,0x7B82D6BA4792E9BB,
+                                           0x9D141C7B1F871A07,0x5616B80DC11C4A2E,0xB849C198F21FA777,0x7CA91801C8D9A506,
+                                           0xB1348E487EC273AD,0x41B20D1E987B3A44,0x7460AB55A3CFBBE3,0x84E628034576F20A,
+                                           0x1B87D16D897A6173,0x0FE27DEFE45D5258,0x83CDE6B8CA3DBEB7,0x0C23647ED01D1119,
+                                           0x7A362A3EA0592384,0xB61F40F3F1893F10,0x75D457D1440471DC,0x4558DA34237035B8,
+                                           0xDCA6116587FC2043,0x8D9B67D3C9AB26D0,0x2B0B5C88EE0E2517,0x6FE77A382AB5DA90,
+                                           0x269CC472D9D8FE31,0x63C41E46FAA8CB89,0xB7ABBC771642F52F,0x7D1DE4852F126F39,
+                                           0xA8C6BA3024339BA0,0x600507D7CEE888C8,0x8FEE82C61A20AFAE,0x57A2448926D78011,
+                                           0xFCA5E72836A458F0,0x072BCEBB8F4B4CBD,0x497BBE4AF36D24A1,0x3CAFE99BB769557D,
+                                           0x12FA9EBD05A7B5A9,0xE8C04BAA5B836BDB,0x4273148FAC3B7905,0x908384812851C121,
+                                           0xE557D3506C55B0FD,0x72FF996ACB4F3D61,0x3EDA0C8E64E2DC03,0xF0868356E6B949E9,
+                                           0x04EAD72ABB0B0FFC,0x17A4B5135967706A,0xE3C8E16F04D5367F,0xF84F30028DAF570C,
+                                           0x1846C8FCBD3A2232,0x5B8120F7F6CA9108,0xD46FA231ECEA3EA6,0x334D947453340725,
+                                           0x58403966C28AD249,0xBED6F3A79A9F21F5,0x68CCB483A5FE962D,0xD085751B57E1315A,
+                                           0xFED0023DE52FD18E,0x4B0E5B5F20E6ADDF,0x1A332DE96EB1AB4C,0xA3CE10F57B65C604,
+                                           0x108F7BA8D62C3CD7,0xAB07A3A11073D8E1,0x6B0DAD1291BED56C,0xF2F366433532C097,
+                                           0x2E557726B2CEE0D4,0x0000000000000000,0xCB02A476DE9B5029,0xE4E32FD48B9E7AC2,
+                                           0x734B65EE2C84F75E,0x6E5386BCCD7E10AF,0x01B4FC84E7CBCA3F,0xCFE8735C65905FD5,
+                                           0x3613BFDA0FF4C2E6,0x113B872C31E7F6E8,0x2FE18BA255052AEB,0xE974B72EBC48A1E4,
+                                           0x0ABC5641B89D979B,0xB46AA5E62202B66E,0x44EC26B0C4BBFF87,0xA6903B5B27A503C7,
+                                           0x7F680190FC99E647,0x97A84A3AA71A8D9C,0xDD12EDE16037EA7C,0xC554251DDD0DC84E,
+                                           0x88C54C7D956BE313,0x4D91696048662B5D,0xB08072CC9909B992,0xB5DE5962C5C97C51,
+                                           0x81B803AD19B637C9,0xB2F597D94A8230EC,0x0B08AAC55F565DA4,0xF1327FD2017283D6,
+                                           0xAD98919E78F35E63,0x6AB9519676751F53,0x24E921670A53774F,0xB9FD3D1C15D46D48,
+                                           0x92F66194FBDA485F,0x5A35DC7311015B37,0xDED3F4705477A93D,0xC00A0EB381CD0D8D,
+                                           0xBB88D809C65FE436,0x16104997BEACBA55,0x21B70AC95693B28C,0x59F4C5E225411876,
+                                           0xD5DB5EB50B21F499,0x55D7A19CF55C096F,0xA97246B4C3F8519F,0x8552D487A2BD3835,
+                                           0x54635D181297C350,0x23C2EFDC85183BF2,0x9F61F96ECC0C9379,0x534893A39DDC8FED,
+                                           0x5EDF0B59AA0A54CB,0xAC2C6D1A9F38945C,0xD7AEBBA0D8AA7DE7,0x2ABFA00C09C5EF28,
+                                           0xD84CC64F3CF72FBF,0x2003F64DB15878B3,0xA724C7DFC06EC9F8,0x069F323F68808682,
+                                           0xCC296ACD51D01C94,0x055E2BAE5CC0C5C3,0x6270E2C21D6301B6,0x3B842720382219C0,
+                                           0xD2F0900E846AB824,0x52FC6F277A1745D2,0xC6953C8CE94D8B0F,0xE009F8FE3095753E,
+                                           0x655B2C7992284D0B,0x984A37D54347DFC4,0xEAB5AEBF8808E2A5,0x9A3FD2C090CC56BA,
+                                           0x9CA0E0FFF84CD038,0x4C2595E4AFADE162,0xDF6708F4B3BC6302,0xBF620F237D54EBCA,
+                                           0x93429D101C118260,0x097D4FD08CDDD4DA,0x8C2F9B572E60ECEF,0x708A7C7F18C4B41F,
+                                           0x3A30DBA4DFE9D3FF,0x4006F19A7FB0F07B,0x5F6BF7DD4DC19EF4,0x1F6D064732716E8F,
+                                           0xF9FBCC866A649D33,0x308C8DE567744464,0x8971B0F972A0292C,0xD61A47243F61B7D8,
+                                           0xEFEB8511D4C82766,0x961CB6BE40D147A3,0xAAB35F25F7B812DE,0x76154E407044329D,
+                                           0x513D76B64E570693,0xF3479AC7D2F90AA8,0x9B8B2E4477079C85,0x297EB99D3D85AC69
+                                   },{
+                                           0x7E37E62DFC7D40C3,0x776F25A4EE939E5B,0xE045C850DD8FB5AD,0x86ED5BA711FF1952,
+                                           0xE91D0BD9CF616B35,0x37E0AB256E408FFB,0x9607F6C031025A7A,0x0B02F5E116D23C9D,
+                                           0xF3D8486BFB50650C,0x621CFF27C40875F5,0x7D40CB71FA5FD34A,0x6DAA6616DAA29062,
+                                           0x9F5F354923EC84E2,0xEC847C3DC507C3B3,0x025A3668043CE205,0xA8BF9E6C4DAC0B19,
+                                           0xFA808BE2E9BEBB94,0xB5B99C5277C74FA3,0x78D9BC95F0397BCC,0xE332E50CDBAD2624,
+                                           0xC74FCE129332797E,0x1729ECEB2EA709AB,0xC2D6B9F69954D1F8,0x5D898CBFBAB8551A,
+                                           0x859A76FB17DD8ADB,0x1BE85886362F7FB5,0xF6413F8FF136CD8A,0xD3110FA5BBB7E35C,
+                                           0x0A2FEED514CC4D11,0xE83010EDCD7F1AB9,0xA1E75DE55F42D581,0xEEDE4A55C13B21B6,
+                                           0xF2F5535FF94E1480,0x0CC1B46D1888761E,0xBCE15FDB6529913B,0x2D25E8975A7181C2,
+                                           0x71817F1CE2D7A554,0x2E52C5CB5C53124B,0xF9F7A6BEEF9C281D,0x9E722E7D21F2F56E,
+                                           0xCE170D9B81DCA7E6,0x0E9B82051CB4941B,0x1E712F623C49D733,0x21E45CFA42F9F7DC,
+                                           0xCB8E7A7F8BBA0F60,0x8E98831A010FB646,0x474CCF0D8E895B23,0xA99285584FB27A95,
+                                           0x8CC2B57205335443,0x42D5B8E984EFF3A5,0x012D1B34021E718C,0x57A6626AAE74180B,
+                                           0xFF19FC06E3D81312,0x35BA9D4D6A7C6DFE,0xC9D44C178F86ED65,0x506523E6A02E5288,
+                                           0x03772D5C06229389,0x8B01F4FE0B691EC0,0xF8DABD8AED825991,0x4C4E3AEC985B67BE,
+                                           0xB10DF0827FBF96A9,0x6A69279AD4F8DAE1,0xE78689DCD3D5FF2E,0x812E1A2B1FA553D1,
+                                           0xFBAD90D6EBA0CA18,0x1AC543B234310E39,0x1604F7DF2CB97827,0xA6241C6951189F02,
+                                           0x753513CCEAAF7C5E,0x64F2A59FC84C4EFA,0x247D2B1E489F5F5A,0xDB64D718AB474C48,
+                                           0x79F4A7A1F2270A40,0x1573DA832A9BEBAE,0x3497867968621C72,0x514838D2A2302304,
+                                           0xF0AF6537FD72F685,0x1D06023E3A6B44BA,0x678588C3CE6EDD73,0x66A893F7CC70ACFF,
+                                           0xD4D24E29B5EDA9DF,0x3856321470EA6A6C,0x07C3418C0E5A4A83,0x2BCBB22F5635BACD,
+                                           0x04B46CD00878D90A,0x06EE5AB80C443B0F,0x3B211F4876C8F9E5,0x0958C38912EEDE98,
+                                           0xD14B39CDBF8B0159,0x397B292072F41BE0,0x87C0409313E168DE,0xAD26E98847CAA39F,
+                                           0x4E140C849C6785BB,0xD5FF551DB7F3D853,0xA0CA46D15D5CA40D,0xCD6020C787FE346F,
+                                           0x84B76DCF15C3FB57,0xDEFDA0FCA121E4CE,0x4B8D7B6096012D3D,0x9AC642AD298A2C64,
+                                           0x0875D8BD10F0AF14,0xB357C6EA7B8374AC,0x4D6321D89A451632,0xEDA96709C719B23F,
+                                           0xF76C24BBF328BC06,0xC662D526912C08F2,0x3CE25EC47892B366,0xB978283F6F4F39BD,
+                                           0xC08C8F9E9D6833FD,0x4F3917B09E79F437,0x593DE06FB2C08C10,0xD6887841B1D14BDA,
+                                           0x19B26EEE32139DB0,0xB494876675D93E2F,0x825937771987C058,0x90E9AC783D466175,
+                                           0xF1827E03FF6C8709,0x945DC0A8353EB87F,0x4516F9658AB5B926,0x3F9573987EB020EF,
+                                           0xB855330B6D514831,0x2AE6A91B542BCB41,0x6331E413C6160479,0x408F8E8180D311A0,
+                                           0xEFF35161C325503A,0xD06622F9BD9570D5,0x8876D9A20D4B8D49,0xA5533135573A0C8B,
+                                           0xE168D364DF91C421,0xF41B09E7F50A2F8F,0x12B09B0F24C1A12D,0xDA49CC2CA9593DC4,
+                                           0x1F5C34563E57A6BF,0x54D14F36A8568B82,0xAF7CDFE043F6419A,0xEA6A2685C943F8BC,
+                                           0xE5DCBFB4D7E91D2B,0xB27ADDDE799D0520,0x6B443CAED6E6AB6D,0x7BAE91C9F61BE845,
+                                           0x3EB868AC7CAE5163,0x11C7B65322E332A4,0xD23C1491B9A992D0,0x8FB5982E0311C7CA,
+                                           0x70AC6428E0C9D4D8,0x895BC2960F55FCC5,0x76423E90EC8DEFD7,0x6FF0507EDE9E7267,
+                                           0x3DCF45F07A8CC2EA,0x4AA06054941F5CB1,0x5810FB5BB0DEFD9C,0x5EFEA1E3BC9AC693,
+                                           0x6EDD4B4ADC8003EB,0x741808F8E8B10DD2,0x145EC1B728859A22,0x28BC9F7350172944,
+                                           0x270A06424EBDCCD3,0x972AEDF4331C2BF6,0x059977E40A66A886,0x2550302A4A812ED6,
+                                           0xDD8A8DA0A7037747,0xC515F87A970E9B7B,0x3023EAA9601AC578,0xB7E3AA3A73FBADA6,
+                                           0x0FB699311EAAE597,0x0000000000000000,0x310EF19D6204B4F4,0x229371A644DB6455,
+                                           0x0DECAF591A960792,0x5CA4978BB8A62496,0x1C2B190A38753536,0x41A295B582CD602C,
+                                           0x3279DCC16426277D,0xC1A194AA9F764271,0x139D803B26DFD0A1,0xAE51C4D441E83016,
+                                           0xD813FA44AD65DFC1,0xAC0BF2BC45D4D213,0x23BE6A9246C515D9,0x49D74D08923DCF38,
+                                           0x9D05032127D066E7,0x2F7FDEFF5E4D63C7,0xA47E2A0155247D07,0x99B16FF12FA8BFED,
+                                           0x4661D4398C972AAF,0xDFD0BBC8A33F9542,0xDCA79694A51D06CB,0xB020EBB67DA1E725,
+                                           0xBA0F0563696DAA34,0xE4F1A480D5F76CA7,0xC438E34E9510EAF7,0x939E81243B64F2FC,
+                                           0x8DEFAE46072D25CF,0x2C08F3A3586FF04E,0xD7A56375B3CF3A56,0x20C947CE40E78650,
+                                           0x43F8A3DD86F18229,0x568B795EAC6A6987,0x8003011F1DBB225D,0xF53612D3F7145E03,
+                                           0x189F75DA300DEC3C,0x9570DB9C3720C9F3,0xBB221E576B73DBB8,0x72F65240E4F536DD,
+                                           0x443BE25188ABC8AA,0xE21FFE38D9B357A8,0xFD43CA6EE7E4F117,0xCAA3614B89A47EEC,
+                                           0xFE34E732E1C6629E,0x83742C431B99B1D4,0xCF3A16AF83C2D66A,0xAAE5A8044990E91C,
+                                           0x26271D764CA3BD5F,0x91C4B74C3F5810F9,0x7C6DD045F841A2C6,0x7F1AFD19FE63314F,
+                                           0xC8F957238D989CE9,0xA709075D5306EE8E,0x55FC5402AA48FA0E,0x48FA563C9023BEB4,
+                                           0x65DFBEABCA523F76,0x6C877D22D8BCE1EE,0xCC4D3BF385E045E3,0xBEBB69B36115733E,
+                                           0x10EAAD6720FD4328,0xB6CEB10E71E5DC2A,0xBDCC44EF6737E0B7,0x523F158EA412B08D,
+                                           0x989C74C52DB6CE61,0x9BEB59992B945DE8,0x8A2CEFCA09776F4C,0xA3BD6B8D5B7E3784,
+                                           0xEB473DB1CB5D8930,0xC3FBA2C29B4AA074,0x9C28181525CE176B,0x683311F2D0C438E4,
+                                           0x5FD3BAD7BE84B71F,0xFC6ED15AE5FA809B,0x36CDB0116C5EFE77,0x29918447520958C8,
+                                           0xA29070B959604608,0x53120EBAA60CC101,0x3A0C047C74D68869,0x691E0AC6D2DA4968,
+                                           0x73DB4974E6EB4751,0x7A838AFDF40599C9,0x5A4ACD33B4E21F99,0x6046C94FC03497F0,
+                                           0xE6AB92E8D1CB8EA2,0x3354C7F5663856F1,0xD93EE170AF7BAE4D,0x616BD27BC22AE67C,
+                                           0x92B39A10397A8370,0xABC8B3304B8E9890,0xBF967287630B02B2,0x5B67D607B6FC6E15
+                                   },{
+                                           0xD031C397CE553FE6,0x16BA5B01B006B525,0xA89BADE6296E70C8,0x6A1F525D77D3435B,
+                                           0x6E103570573DFA0B,0x660EFB2A17FC95AB,0x76327A9E97634BF6,0x4BAD9D6462458BF5,
+                                           0xF1830CAEDBC3F748,0xC5C8F542669131FF,0x95044A1CDC48B0CB,0x892962DF3CF8B866,
+                                           0xB0B9E208E930C135,0xA14FB3F0611A767C,0x8D2605F21C160136,0xD6B71922FECC549E,
+                                           0x37089438A5907D8B,0x0B5DA38E5803D49C,0x5A5BCC9CEA6F3CBC,0xEDAE246D3B73FFE5,
+                                           0xD2B87E0FDE22EDCE,0x5E54ABB1CA8185EC,0x1DE7F88FE80561B9,0xAD5E1A870135A08C,
+                                           0x2F2ADBD665CECC76,0x5780B5A782F58358,0x3EDC8A2EEDE47B3F,0xC9D95C3506BEE70F,
+                                           0x83BE111D6C4E05EE,0xA603B90959367410,0x103C81B4809FDE5D,0x2C69B6027D0C774A,
+                                           0x399080D7D5C87953,0x09D41E16487406B4,0xCDD63B1826505E5F,0xF99DC2F49B0298E8,
+                                           0x9CD0540A943CB67F,0xBCA84B7F891F17C5,0x723D1DB3B78DF2A6,0x78AA6E71E73B4F2E,
+                                           0x1433E699A071670D,0x84F21BE454620782,0x98DF3327B4D20F2F,0xF049DCE2D3769E5C,
+                                           0xDB6C60199656EB7A,0x648746B2078B4783,0x32CD23598DCBADCF,0x1EA4955BF0C7DA85,
+                                           0xE9A143401B9D46B5,0xFD92A5D9BBEC21B8,0xC8138C790E0B8E1B,0x2EE00B9A6D7BA562,
+                                           0xF85712B893B7F1FC,0xEB28FED80BEA949D,0x564A65EB8A40EA4C,0x6C9988E8474A2823,
+                                           0x4535898B121D8F2D,0xABD8C03231ACCBF4,0xBA2E91CAB9867CBD,0x7960BE3DEF8E263A,
+                                           0x0C11A977602FD6F0,0xCB50E1AD16C93527,0xEAE22E94035FFD89,0x2866D12F5DE2CE1A,
+                                           0xFF1B1841AB9BF390,0x9F9339DE8CFE0D43,0x964727C8C48A0BF7,0x524502C6AAAE531C,
+                                           0x9B9C5EF3AC10B413,0x4FA2FA4942AB32A5,0x3F165A62E551122B,0xC74148DA76E6E3D7,
+                                           0x924840E5E464B2A7,0xD372AE43D69784DA,0x233B72A105E11A86,0xA48A04914941A638,
+                                           0xB4B68525C9DE7865,0xDDEABAACA6CF8002,0x0A9773C250B6BD88,0xC284FFBB5EBD3393,
+                                           0x8BA0DF472C8F6A4E,0x2AEF6CB74D951C32,0x427983722A318D41,0x73F7CDFFBF389BB2,
+                                           0x074C0AF9382C026C,0x8A6A0F0B243A035A,0x6FDAE53C5F88931F,0xC68B98967E538AC3,
+                                           0x44FF59C71AA8E639,0xE2FCE0CE439E9229,0xA20CDE2479D8CD40,0x19E89FA2C8EBD8E9,
+                                           0xF446BBCFF398270C,0x43B3533E2284E455,0xD82F0DCD8E945046,0x51066F12B26CE820,
+                                           0xE73957AF6BC5426D,0x081ECE5A40C16FA0,0x3B193D4FC5BFAB7B,0x7FE66488DF174D42,
+                                           0x0E9814EF705804D8,0x8137AC857C39D7C6,0xB1733244E185A821,0x695C3F896F11F867,
+                                           0xF6CF0657E3EFF524,0x1AABF276D02963D5,0x2DA3664E75B91E5E,0x0289BD981077D228,
+                                           0x90C1FD7DF413608F,0x3C5537B6FD93A917,0xAA12107E3919A2E0,0x0686DAB530996B78,
+                                           0xDAA6B0559EE3826E,0xC34E2FF756085A87,0x6D5358A44FFF4137,0xFC587595B35948AC,
+                                           0x7CA5095CC7D5F67E,0xFB147F6C8B754AC0,0xBFEB26AB91DDACF9,0x6896EFC567A49173,
+                                           0xCA9A31E11E7C5C33,0xBBE44186B13315A9,0x0DDB793B689ABFE4,0x70B4A02BA7FA208E,
+                                           0xE47A3A7B7307F951,0x8CECD5BE14A36822,0xEEED49B923B144D9,0x17708B4DB8B3DC31,
+                                           0x6088219F2765FED3,0xB3FA8FDCF1F27A09,0x910B2D31FCA6099B,0x0F52C4A378ED6DCC,
+                                           0x50CCBF5EBAD98134,0x6BD582117F662A4F,0x94CE9A50D4FDD9DF,0x2B25BCFB45207526,
+                                           0x67C42B661F49FCBF,0x492420FC723259DD,0x03436DD418C2BB3C,0x1F6E4517F872B391,
+                                           0xA08563BC69AF1F68,0xD43EA4BAEEBB86B6,0x01CAD04C08B56914,0xAC94CACB0980C998,
+                                           0x54C3D8739A373864,0x26FEC5C02DBACAC2,0xDEA9D778BE0D3B3E,0x040F672D20EEB950,
+                                           0xE5B0EA377BB29045,0xF30AB136CBB42560,0x62019C0737122CFB,0xE86B930C13282FA1,
+                                           0xCC1CEB542EE5374B,0x538FD28AA21B3A08,0x1B61223AD89C0AC1,0x36C24474AD25149F,
+                                           0x7A23D3E9F74C9D06,0xBE21F6E79968C5ED,0xCF5F868036278C77,0xF705D61BEB5A9C30,
+                                           0x4D2B47D152DCE08D,0x5F9E7BFDC234ECF8,0x247778583DCD18EA,0x867BA67C4415D5AA,
+                                           0x4CE1979D5A698999,0x0000000000000000,0xEC64F42133C696F1,0xB57C5569C16B1171,
+                                           0xC1C7926F467F88AF,0x654D96FE0F3E2E97,0x15F936D5A8C40E19,0xB8A72C52A9F1AE95,
+                                           0xA9517DAA21DB19DC,0x58D27104FA18EE94,0x5918A148F2AD8780,0x5CDD1629DAF657C4,
+                                           0x8274C15164FB6CFA,0xD1FB13DBC6E056F2,0x7D6FD910CF609F6A,0xB63F38BDD9A9AA4D,
+                                           0x3D9FE7FAF526C003,0x74BBC706871499DE,0xDF630734B6B8522A,0x3AD3ED03CD0AC26F,
+                                           0xFADEAF2083C023D4,0xC00D42234ECAE1BB,0x8538CBA85CD76E96,0xC402250E6E2458EB,
+                                           0x47BC3413026A5D05,0xAFD7A71F114272A4,0x978DF784CC3F62E3,0xB96DFC1EA144C781,
+                                           0x21B2CF391596C8AE,0x318E4E8D950916F3,0xCE9556CC3E92E563,0x385A509BDD7D1047,
+                                           0x358129A0B5E7AFA3,0xE6F387E363702B79,0xE0755D5653E94001,0x7BE903A5FFF9F412,
+                                           0x12B53C2C90E80C75,0x3307F315857EC4DB,0x8FAFB86A0C61D31E,0xD9E5DD8186213952,
+                                           0x77F8AAD29FD622E2,0x25BDA814357871FE,0x7571174A8FA1F0CA,0x137FEC60985D6561,
+                                           0x30449EC19DBC7FE7,0xA540D4DD41F4CF2C,0xDC206AE0AE7AE916,0x5B911CD0E2DA55A8,
+                                           0xB2305F90F947131D,0x344BF9ECBD52C6B7,0x5D17C665D2433ED0,0x18224FEEC05EB1FD,
+                                           0x9E59E992844B6457,0x9A568EBFA4A5DD07,0xA3C60E68716DA454,0x7E2CB4C4D7A22456,
+                                           0x87B176304CA0BCBE,0x413AEEA632F3367D,0x9915E36BBC67663B,0x40F03EEA3A465F69,
+                                           0x1C2D28C3E0B008AD,0x4E682A054A1E5BB1,0x05C5B761285BD044,0xE1BF8D1A5B5C2915,
+                                           0xF2C0617AC3014C74,0xB7F5E8F1D11CC359,0x63CB4C4B3FA745EF,0x9D1A84469C89DF6B,
+                                           0xE33630824B2BFB3D,0xD5F474F6E60EEFA2,0xF58C6B83FB2D4E18,0x4676E45F0ADF3411,
+                                           0x20781F751D23A1BA,0xBD629B3381AA7ED1,0xAE1D775319F71BB0,0xFED1C80DA32E9A84,
+                                           0x5509083F92825170,0x29AC01635557A70E,0xA7C9694551831D04,0x8E65682604D4BA0A,
+                                           0x11F651F8882AB749,0xD77DC96EF6793D8A,0xEF2799F52B042DCD,0x48EEF0B07A8730C9,
+                                           0x22F1A2ED0D547392,0x6142F1D32FD097C7,0x4A674D286AF0E2E1,0x80FD7CC9748CBED2,
+                                           0x717E7067AF4F499A,0x938290A9ECD1DBB3,0x88E3B293344DD172,0x2734158C250FA3D6
+                                   }};
+
+// Constant values for KeySchedule function
+const unsigned char C[12][64] = {{
+                                         0xB1,0x08,0x5B,0xDA,0x1E,0xCA,0xDA,0xE9,0xEB,0xCB,0x2F,0x81,0xC0,0x65,0x7C,0x1F,
+                                         0x2F,0x6A,0x76,0x43,0x2E,0x45,0xD0,0x16,0x71,0x4E,0xB8,0x8D,0x75,0x85,0xC4,0xFC,
+                                         0x4B,0x7C,0xE0,0x91,0x92,0x67,0x69,0x01,0xA2,0x42,0x2A,0x08,0xA4,0x60,0xD3,0x15,
+                                         0x05,0x76,0x74,0x36,0xCC,0x74,0x4D,0x23,0xDD,0x80,0x65,0x59,0xF2,0xA6,0x45,0x07
+                                 },{
+                                         0x6F,0xA3,0xB5,0x8A,0xA9,0x9D,0x2F,0x1A,0x4F,0xE3,0x9D,0x46,0x0F,0x70,0xB5,0xD7,
+                                         0xF3,0xFE,0xEA,0x72,0x0A,0x23,0x2B,0x98,0x61,0xD5,0x5E,0x0F,0x16,0xB5,0x01,0x31,
+                                         0x9A,0xB5,0x17,0x6B,0x12,0xD6,0x99,0x58,0x5C,0xB5,0x61,0xC2,0xDB,0x0A,0xA7,0xCA,
+                                         0x55,0xDD,0xA2,0x1B,0xD7,0xCB,0xCD,0x56,0xE6,0x79,0x04,0x70,0x21,0xB1,0x9B,0xB7
+                                 },{
+                                         0xF5,0x74,0xDC,0xAC,0x2B,0xCE,0x2F,0xC7,0x0A,0x39,0xFC,0x28,0x6A,0x3D,0x84,0x35,
+                                         0x06,0xF1,0x5E,0x5F,0x52,0x9C,0x1F,0x8B,0xF2,0xEA,0x75,0x14,0xB1,0x29,0x7B,0x7B,
+                                         0xD3,0xE2,0x0F,0xE4,0x90,0x35,0x9E,0xB1,0xC1,0xC9,0x3A,0x37,0x60,0x62,0xDB,0x09,
+                                         0xC2,0xB6,0xF4,0x43,0x86,0x7A,0xDB,0x31,0x99,0x1E,0x96,0xF5,0x0A,0xBA,0x0A,0xB2
+                                 },{
+                                         0xEF,0x1F,0xDF,0xB3,0xE8,0x15,0x66,0xD2,0xF9,0x48,0xE1,0xA0,0x5D,0x71,0xE4,0xDD,
+                                         0x48,0x8E,0x85,0x7E,0x33,0x5C,0x3C,0x7D,0x9D,0x72,0x1C,0xAD,0x68,0x5E,0x35,0x3F,
+                                         0xA9,0xD7,0x2C,0x82,0xED,0x03,0xD6,0x75,0xD8,0xB7,0x13,0x33,0x93,0x52,0x03,0xBE,
+                                         0x34,0x53,0xEA,0xA1,0x93,0xE8,0x37,0xF1,0x22,0x0C,0xBE,0xBC,0x84,0xE3,0xD1,0x2E
+                                 },{
+                                         0x4B,0xEA,0x6B,0xAC,0xAD,0x47,0x47,0x99,0x9A,0x3F,0x41,0x0C,0x6C,0xA9,0x23,0x63,
+                                         0x7F,0x15,0x1C,0x1F,0x16,0x86,0x10,0x4A,0x35,0x9E,0x35,0xD7,0x80,0x0F,0xFF,0xBD,
+                                         0xBF,0xCD,0x17,0x47,0x25,0x3A,0xF5,0xA3,0xDF,0xFF,0x00,0xB7,0x23,0x27,0x1A,0x16,
+                                         0x7A,0x56,0xA2,0x7E,0xA9,0xEA,0x63,0xF5,0x60,0x17,0x58,0xFD,0x7C,0x6C,0xFE,0x57
+                                 },{
+                                         0xAE,0x4F,0xAE,0xAE,0x1D,0x3A,0xD3,0xD9,0x6F,0xA4,0xC3,0x3B,0x7A,0x30,0x39,0xC0,
+                                         0x2D,0x66,0xC4,0xF9,0x51,0x42,0xA4,0x6C,0x18,0x7F,0x9A,0xB4,0x9A,0xF0,0x8E,0xC6,
+                                         0xCF,0xFA,0xA6,0xB7,0x1C,0x9A,0xB7,0xB4,0x0A,0xF2,0x1F,0x66,0xC2,0xBE,0xC6,0xB6,
+                                         0xBF,0x71,0xC5,0x72,0x36,0x90,0x4F,0x35,0xFA,0x68,0x40,0x7A,0x46,0x64,0x7D,0x6E
+                                 },{
+                                         0xF4,0xC7,0x0E,0x16,0xEE,0xAA,0xC5,0xEC,0x51,0xAC,0x86,0xFE,0xBF,0x24,0x09,0x54,
+                                         0x39,0x9E,0xC6,0xC7,0xE6,0xBF,0x87,0xC9,0xD3,0x47,0x3E,0x33,0x19,0x7A,0x93,0xC9,
+                                         0x09,0x92,0xAB,0xC5,0x2D,0x82,0x2C,0x37,0x06,0x47,0x69,0x83,0x28,0x4A,0x05,0x04,
+                                         0x35,0x17,0x45,0x4C,0xA2,0x3C,0x4A,0xF3,0x88,0x86,0x56,0x4D,0x3A,0x14,0xD4,0x93
+                                 },{
+                                         0x9B,0x1F,0x5B,0x42,0x4D,0x93,0xC9,0xA7,0x03,0xE7,0xAA,0x02,0x0C,0x6E,0x41,0x41,
+                                         0x4E,0xB7,0xF8,0x71,0x9C,0x36,0xDE,0x1E,0x89,0xB4,0x44,0x3B,0x4D,0xDB,0xC4,0x9A,
+                                         0xF4,0x89,0x2B,0xCB,0x92,0x9B,0x06,0x90,0x69,0xD1,0x8D,0x2B,0xD1,0xA5,0xC4,0x2F,
+                                         0x36,0xAC,0xC2,0x35,0x59,0x51,0xA8,0xD9,0xA4,0x7F,0x0D,0xD4,0xBF,0x02,0xE7,0x1E
+                                 },{
+                                         0x37,0x8F,0x5A,0x54,0x16,0x31,0x22,0x9B,0x94,0x4C,0x9A,0xD8,0xEC,0x16,0x5F,0xDE,
+                                         0x3A,0x7D,0x3A,0x1B,0x25,0x89,0x42,0x24,0x3C,0xD9,0x55,0xB7,0xE0,0x0D,0x09,0x84,
+                                         0x80,0x0A,0x44,0x0B,0xDB,0xB2,0xCE,0xB1,0x7B,0x2B,0x8A,0x9A,0xA6,0x07,0x9C,0x54,
+                                         0x0E,0x38,0xDC,0x92,0xCB,0x1F,0x2A,0x60,0x72,0x61,0x44,0x51,0x83,0x23,0x5A,0xDB
+                                 },{
+                                         0xAB,0xBE,0xDE,0xA6,0x80,0x05,0x6F,0x52,0x38,0x2A,0xE5,0x48,0xB2,0xE4,0xF3,0xF3,
+                                         0x89,0x41,0xE7,0x1C,0xFF,0x8A,0x78,0xDB,0x1F,0xFF,0xE1,0x8A,0x1B,0x33,0x61,0x03,
+                                         0x9F,0xE7,0x67,0x02,0xAF,0x69,0x33,0x4B,0x7A,0x1E,0x6C,0x30,0x3B,0x76,0x52,0xF4,
+                                         0x36,0x98,0xFA,0xD1,0x15,0x3B,0xB6,0xC3,0x74,0xB4,0xC7,0xFB,0x98,0x45,0x9C,0xED
+                                 },{
+                                         0x7B,0xCD,0x9E,0xD0,0xEF,0xC8,0x89,0xFB,0x30,0x02,0xC6,0xCD,0x63,0x5A,0xFE,0x94,
+                                         0xD8,0xFA,0x6B,0xBB,0xEB,0xAB,0x07,0x61,0x20,0x01,0x80,0x21,0x14,0x84,0x66,0x79,
+                                         0x8A,0x1D,0x71,0xEF,0xEA,0x48,0xB9,0xCA,0xEF,0xBA,0xCD,0x1D,0x7D,0x47,0x6E,0x98,
+                                         0xDE,0xA2,0x59,0x4A,0xC0,0x6F,0xD8,0x5D,0x6B,0xCA,0xA4,0xCD,0x81,0xF3,0x2D,0x1B
+                                 },{
+                                         0x37,0x8E,0xE7,0x67,0xF1,0x16,0x31,0xBA,0xD2,0x13,0x80,0xB0,0x04,0x49,0xB1,0x7A,
+                                         0xCD,0xA4,0x3C,0x32,0xBC,0xDF,0x1D,0x77,0xF8,0x20,0x12,0xD4,0x30,0x21,0x9F,0x9B,
+                                         0x5D,0x80,0xEF,0x9D,0x18,0x91,0xCC,0x86,0xE7,0x1D,0xA4,0xAA,0x88,0xE1,0x28,0x52,
+                                         0xFA,0xF4,0x17,0xD5,0xD9,0xB2,0x1B,0x99,0x48,0xBC,0x92,0x4A,0xF1,0x1B,0xD7,0x20
+                                 }};
+
+
+static void AddModulo512(const void *a,const void *b,void *c)
+{
+    const unsigned char *A=a, *B=b;
+    unsigned char *C=c;
+    int t = 0;
+#ifdef FULL_UNROLL
+    #define ADDBYTE_8(i) t = A[i] + B[i] + (t >> 8); C[i] = t & 0xFF;
+
+	ADDBYTE_8(63)
+	ADDBYTE_8(62)
+	ADDBYTE_8(61)
+	ADDBYTE_8(60)
+	ADDBYTE_8(59)
+	ADDBYTE_8(58)
+	ADDBYTE_8(57)
+	ADDBYTE_8(56)
+	ADDBYTE_8(55)
+	ADDBYTE_8(54)
+	ADDBYTE_8(53)
+	ADDBYTE_8(52)
+	ADDBYTE_8(51)
+	ADDBYTE_8(50)
+	ADDBYTE_8(49)
+	ADDBYTE_8(48)
+	ADDBYTE_8(47)
+	ADDBYTE_8(46)
+	ADDBYTE_8(45)
+	ADDBYTE_8(44)
+	ADDBYTE_8(43)
+	ADDBYTE_8(42)
+	ADDBYTE_8(41)
+	ADDBYTE_8(40)
+	ADDBYTE_8(39)
+	ADDBYTE_8(38)
+	ADDBYTE_8(37)
+	ADDBYTE_8(36)
+	ADDBYTE_8(35)
+	ADDBYTE_8(34)
+	ADDBYTE_8(33)
+	ADDBYTE_8(32)
+	ADDBYTE_8(31)
+	ADDBYTE_8(30)
+	ADDBYTE_8(29)
+	ADDBYTE_8(28)
+	ADDBYTE_8(27)
+	ADDBYTE_8(26)
+	ADDBYTE_8(25)
+	ADDBYTE_8(24)
+	ADDBYTE_8(23)
+	ADDBYTE_8(22)
+	ADDBYTE_8(21)
+	ADDBYTE_8(20)
+	ADDBYTE_8(19)
+	ADDBYTE_8(18)
+	ADDBYTE_8(17)
+	ADDBYTE_8(16)
+	ADDBYTE_8(15)
+	ADDBYTE_8(14)
+	ADDBYTE_8(13)
+	ADDBYTE_8(12)
+	ADDBYTE_8(11)
+	ADDBYTE_8(10)
+	ADDBYTE_8(9)
+	ADDBYTE_8(8)
+	ADDBYTE_8(7)
+	ADDBYTE_8(6)
+	ADDBYTE_8(5)
+	ADDBYTE_8(4)
+	ADDBYTE_8(3)
+	ADDBYTE_8(2)
+	ADDBYTE_8(1)
+	ADDBYTE_8(0)
+
+#else
+    int i = 0;
+
+    for(i=63;i>=0;i--)
+    {
+        t = A[i] + B[i] + (t >> 8);
+        C[i] = t & 0xFF;
+    }
+#endif
+}
+
+static void AddXor512(const void *a,const void *b,void *c)
+{
+    const unsigned long long *A=a, *B=b;
+    unsigned long long *C=c;
+#ifdef FULL_UNROLL
+    C[0] = A[0] ^ B[0];
+	C[1] = A[1] ^ B[1];
+	C[2] = A[2] ^ B[2];
+	C[3] = A[3] ^ B[3];
+	C[4] = A[4] ^ B[4];
+	C[5] = A[5] ^ B[5];
+	C[6] = A[6] ^ B[6];
+	C[7] = A[7] ^ B[7];
+#else
+    int i = 0;
+
+    for(i=0; i<8; i++) {
+        C[i] = A[i] ^ B[i];
+    }
+#endif
+}
+
+static void F(unsigned char *state)
+{
+    unsigned long long return_state[8];
+    register unsigned long long r = 0;
+    r ^= TG[0][state[56]];
+    r ^= TG[1][state[48]];
+    r ^= TG[2][state[40]];
+    r ^= TG[3][state[32]];
+    r ^= TG[4][state[24]];
+    r ^= TG[5][state[16]];
+    r ^= TG[6][state[8]];
+    r ^= TG[7][state[0]];
+    return_state[0] = r;
+    r = 0;
+
+    r ^= TG[0][state[57]];
+    r ^= TG[1][state[49]];
+    r ^= TG[2][state[41]];
+    r ^= TG[3][state[33]];
+    r ^= TG[4][state[25]];
+    r ^= TG[5][state[17]];
+    r ^= TG[6][state[9]];
+    r ^= TG[7][state[1]];
+    return_state[1] = r;
+    r = 0;
+
+    r ^= TG[0][state[58]];
+    r ^= TG[1][state[50]];
+    r ^= TG[2][state[42]];
+    r ^= TG[3][state[34]];
+    r ^= TG[4][state[26]];
+    r ^= TG[5][state[18]];
+    r ^= TG[6][state[10]];
+    r ^= TG[7][state[2]];
+    return_state[2] = r;
+    r = 0;
+
+    r ^= TG[0][state[59]];
+    r ^= TG[1][state[51]];
+    r ^= TG[2][state[43]];
+    r ^= TG[3][state[35]];
+    r ^= TG[4][state[27]];
+    r ^= TG[5][state[19]];
+    r ^= TG[6][state[11]];
+    r ^= TG[7][state[3]];
+    return_state[3] = r;
+    r = 0;
+
+    r ^= TG[0][state[60]];
+    r ^= TG[1][state[52]];
+    r ^= TG[2][state[44]];
+    r ^= TG[3][state[36]];
+    r ^= TG[4][state[28]];
+    r ^= TG[5][state[20]];
+    r ^= TG[6][state[12]];
+    r ^= TG[7][state[4]];
+    return_state[4] = r;
+    r = 0;
+
+    r ^= TG[0][state[61]];
+    r ^= TG[1][state[53]];
+    r ^= TG[2][state[45]];
+    r ^= TG[3][state[37]];
+    r ^= TG[4][state[29]];
+    r ^= TG[5][state[21]];
+    r ^= TG[6][state[13]];
+    r ^= TG[7][state[5]];
+    return_state[5] = r;
+    r = 0;
+
+    r ^= TG[0][state[62]];
+    r ^= TG[1][state[54]];
+    r ^= TG[2][state[46]];
+    r ^= TG[3][state[38]];
+    r ^= TG[4][state[30]];
+    r ^= TG[5][state[22]];
+    r ^= TG[6][state[14]];
+    r ^= TG[7][state[6]];
+    return_state[6] = r;
+    r = 0;
+
+    r ^= TG[0][state[63]];
+    r ^= TG[1][state[55]];
+    r ^= TG[2][state[47]];
+    r ^= TG[3][state[39]];
+    r ^= TG[4][state[31]];
+    r ^= TG[5][state[23]];
+    r ^= TG[6][state[15]];
+    r ^= TG[7][state[7]];
+    return_state[7] = r;
+
+    memcpy(state,(unsigned char*)return_state,64);
+}
+
+#define KeySchedule(K,i) AddXor512(K,C[i],K); F(K);
+
+static void E(unsigned char *K,const unsigned char *m, unsigned char *state)
+{
+#ifdef FULL_UNROLL
+    AddXor512(m,K,state);
+
+	F(state);
+	KeySchedule(K,0);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,1);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,2);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,3);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,4);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,5);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,6);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,7);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,8);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,9);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,10);
+	AddXor512(state,K,state);
+
+	F(state);
+	KeySchedule(K,11);
+	AddXor512(state,K,state);
+#else
+    int i = 0;
+
+    AddXor512(m,K,state);
+
+    for(i=0;i<12;i++) {
+        F(state);
+        KeySchedule(K,i);
+        AddXor512(state,K,state);
+    }
+#endif
+}
+
+static void g_N(const unsigned char *N,unsigned char *h,const unsigned char *m)
+{
+    unsigned char t[64], K[64];
+
+    AddXor512(N,h,K);
+
+    F(K);
+
+    E(K,m,t);
+
+    AddXor512(t,h,t);
+    AddXor512(t,m,h);
+}
+
+static void hash_X(unsigned char *IV,const unsigned char *message,unsigned long long length,unsigned char *out)
+{
+    unsigned char v512[64] = {
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
+    };
+    unsigned char v0[64] = {
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+    };
+    unsigned char Sigma[64] = {
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+    };
+    unsigned char N[64] = {
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+    };
+    unsigned char m[64], *hash = IV;
+    unsigned long long len = length;
+
+    // Stage 2
+    while (len >= 512)
+    {
+        memcpy(m, message + len/8 - 63 - ( (len & 0x7) == 0 ), 64);
+
+        g_N(N,hash,m);
+        AddModulo512(N,v512,N);
+        AddModulo512(Sigma,m,Sigma);
+        len -= 512;
+    }
+
+    memset(m,0,64);
+    memcpy(m + 63 - len/8 + ( (len & 0x7) == 0 ), message, len/8 + 1 - ( (len & 0x7) == 0 ));
+
+    // Stage 3
+    m[ 63 - len/8 ] |= (1 << (len & 0x7));
+
+    g_N(N,hash,m);
+    v512[63] = len & 0xFF;
+    v512[62] = (unsigned char) (len >> 8);
+    AddModulo512(N,v512,N);
+
+    AddModulo512(Sigma,m,Sigma);
+
+    g_N(v0,hash,N);
+    g_N(v0,hash,Sigma);
+
+    memcpy(out, hash, 64);
+}
+
+static void hash_512(const unsigned char *message, unsigned long long length, unsigned char *out)
+{
+    unsigned char IV[64] = {
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+            0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+    };
+
+    hash_X(IV,message,length,out);
+}
+
+static void hash_256(const unsigned char *message, unsigned long long length, unsigned char *out)
+{
+    unsigned char IV[64] = {
+            0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+            0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+            0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+            0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01
+    };
+    unsigned char hash[64];
+
+    hash_X(IV,message,length,hash);
+
+    memcpy(out,hash,32);
+}
+
+
+/* exported functions, to rename (streebog) */
+
+
+void sph_gost256_init(void *cc)
+{
+}
+
+void sph_gost256(void *cc, const void *data, size_t len)
+{
+    hash_256(data, 8*len, cc);
+}
+
+void sph_gost256_close(void *cc, void *dst)
+{
+    memcpy(dst, cc, 32);
+}
+
+void sph_gost256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+}
+
+void sph_gost512_init(void *cc)
+{
+}
+
+void sph_gost512(void *cc, const void *data, size_t len)
+{
+    hash_512(data, 8*len, cc);
+}
+
+void sph_gost512_close(void *cc, void *dst)
+{
+    memcpy(dst, cc, 64);
+}
+
+void sph_gost512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+}
+
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/sha3/gost_streebog.h b/sha3/gost_streebog.h
new file mode 100644
index 0000000..801902c
--- /dev/null
+++ b/sha3/gost_streebog.h
@@ -0,0 +1,185 @@
+/* $Id: sph_gost.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * GOST interface. This is the interface for GOST R 12 with the
+ * recommended parameters for SHA-3, with output lengths 256
+ * and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_gost.h
+ * @author   Mish <mish@btchouse.com>
+ */
+
+#ifndef SPH_GOST_H__
+#define SPH_GOST_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for GOST-256.
+ */
+#define SPH_SIZE_gost256   256
+
+/**
+ * Output size (in bits) for GOST-512.
+ */
+#define SPH_SIZE_gost512   512
+
+/**
+ * This structure is a context for Keccak computations: it contains the
+ * intermediate values and some data from the last entered block. Once a
+ * GOST computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running GOST computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+
+/**
+ * This structure is a context for Gost-256 computations.
+ */
+
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+    unsigned char buf[32];    /* first field, for alignment */
+    size_t ptr;
+    sph_u32 V[3][8];
+#endif
+} sph_gost256_context;
+
+/**
+ * This structure is a context for Gost-512 computations.
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+    unsigned char buf[64];    /* first field, for alignment */
+    size_t ptr;
+    sph_u32 V[5][8];
+#endif
+} sph_gost512_context;
+
+
+/**
+ * Initialize a GOST-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the GOST-256 context (pointer to a
+ *             <code>sph_gost256_context</code>)
+ */
+void sph_gost256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Gost-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_gost256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current GOST-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the GOST-256 context
+ * @param dst   the destination buffer
+ */
+void sph_gost256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the GOST-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_gost256_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Gost-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the GOST-512 context (pointer to a
+ *             <code>sph_gost512_context</code>)
+ */
+void sph_gost512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the GOST-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_gost512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current GOST-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the GOST-512 context
+ * @param dst   the destination buffer
+ */
+void sph_gost512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the GOST-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_gost512_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/sha3/sph_cubehash.c b/sha3/sph_cubehash.c
new file mode 100644
index 0000000..75eb3e9
--- /dev/null
+++ b/sha3/sph_cubehash.c
@@ -0,0 +1,723 @@
+/* $Id: cubehash.c 227 2010-06-16 17:28:38Z tp $ */
+/*
+ * CubeHash implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <limits.h>
+
+#include "sph_cubehash.h"
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_CUBEHASH
+#define SPH_SMALL_FOOTPRINT_CUBEHASH   1
+#endif
+
+/*
+ * Some tests were conducted on an Intel Core2 Q6600 (32-bit and 64-bit
+ * mode), a PowerPC G3, and a MIPS-compatible CPU (Broadcom BCM3302).
+ * It appears that the optimal settings are:
+ *  -- full unroll, no state copy on the "big" systems (x86, PowerPC)
+ *  -- unroll to 4 or 8, state copy on the "small" system (MIPS)
+ */
+
+#if SPH_SMALL_FOOTPRINT_CUBEHASH
+
+#if !defined SPH_CUBEHASH_UNROLL
+#define SPH_CUBEHASH_UNROLL   4
+#endif
+#if !defined SPH_CUBEHASH_NOCOPY
+#define SPH_CUBEHASH_NOCOPY   1
+#endif
+
+#else
+
+#if !defined SPH_CUBEHASH_UNROLL
+#define SPH_CUBEHASH_UNROLL   0
+#endif
+#if !defined SPH_CUBEHASH_NOCOPY
+#define SPH_CUBEHASH_NOCOPY   0
+#endif
+
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+static const sph_u32 IV224[] = {
+        SPH_C32(0xB0FC8217), SPH_C32(0x1BEE1A90), SPH_C32(0x829E1A22),
+        SPH_C32(0x6362C342), SPH_C32(0x24D91C30), SPH_C32(0x03A7AA24),
+        SPH_C32(0xA63721C8), SPH_C32(0x85B0E2EF), SPH_C32(0xF35D13F3),
+        SPH_C32(0x41DA807D), SPH_C32(0x21A70CA6), SPH_C32(0x1F4E9774),
+        SPH_C32(0xB3E1C932), SPH_C32(0xEB0A79A8), SPH_C32(0xCDDAAA66),
+        SPH_C32(0xE2F6ECAA), SPH_C32(0x0A713362), SPH_C32(0xAA3080E0),
+        SPH_C32(0xD8F23A32), SPH_C32(0xCEF15E28), SPH_C32(0xDB086314),
+        SPH_C32(0x7F709DF7), SPH_C32(0xACD228A4), SPH_C32(0x704D6ECE),
+        SPH_C32(0xAA3EC95F), SPH_C32(0xE387C214), SPH_C32(0x3A6445FF),
+        SPH_C32(0x9CAB81C3), SPH_C32(0xC73D4B98), SPH_C32(0xD277AEBE),
+        SPH_C32(0xFD20151C), SPH_C32(0x00CB573E)
+};
+
+static const sph_u32 IV256[] = {
+        SPH_C32(0xEA2BD4B4), SPH_C32(0xCCD6F29F), SPH_C32(0x63117E71),
+        SPH_C32(0x35481EAE), SPH_C32(0x22512D5B), SPH_C32(0xE5D94E63),
+        SPH_C32(0x7E624131), SPH_C32(0xF4CC12BE), SPH_C32(0xC2D0B696),
+        SPH_C32(0x42AF2070), SPH_C32(0xD0720C35), SPH_C32(0x3361DA8C),
+        SPH_C32(0x28CCECA4), SPH_C32(0x8EF8AD83), SPH_C32(0x4680AC00),
+        SPH_C32(0x40E5FBAB), SPH_C32(0xD89041C3), SPH_C32(0x6107FBD5),
+        SPH_C32(0x6C859D41), SPH_C32(0xF0B26679), SPH_C32(0x09392549),
+        SPH_C32(0x5FA25603), SPH_C32(0x65C892FD), SPH_C32(0x93CB6285),
+        SPH_C32(0x2AF2B5AE), SPH_C32(0x9E4B4E60), SPH_C32(0x774ABFDD),
+        SPH_C32(0x85254725), SPH_C32(0x15815AEB), SPH_C32(0x4AB6AAD6),
+        SPH_C32(0x9CDAF8AF), SPH_C32(0xD6032C0A)
+};
+
+static const sph_u32 IV384[] = {
+        SPH_C32(0xE623087E), SPH_C32(0x04C00C87), SPH_C32(0x5EF46453),
+        SPH_C32(0x69524B13), SPH_C32(0x1A05C7A9), SPH_C32(0x3528DF88),
+        SPH_C32(0x6BDD01B5), SPH_C32(0x5057B792), SPH_C32(0x6AA7A922),
+        SPH_C32(0x649C7EEE), SPH_C32(0xF426309F), SPH_C32(0xCB629052),
+        SPH_C32(0xFC8E20ED), SPH_C32(0xB3482BAB), SPH_C32(0xF89E5E7E),
+        SPH_C32(0xD83D4DE4), SPH_C32(0x44BFC10D), SPH_C32(0x5FC1E63D),
+        SPH_C32(0x2104E6CB), SPH_C32(0x17958F7F), SPH_C32(0xDBEAEF70),
+        SPH_C32(0xB4B97E1E), SPH_C32(0x32C195F6), SPH_C32(0x6184A8E4),
+        SPH_C32(0x796C2543), SPH_C32(0x23DE176D), SPH_C32(0xD33BBAEC),
+        SPH_C32(0x0C12E5D2), SPH_C32(0x4EB95A7B), SPH_C32(0x2D18BA01),
+        SPH_C32(0x04EE475F), SPH_C32(0x1FC5F22E)
+};
+
+static const sph_u32 IV512[] = {
+        SPH_C32(0x2AEA2A61), SPH_C32(0x50F494D4), SPH_C32(0x2D538B8B),
+        SPH_C32(0x4167D83E), SPH_C32(0x3FEE2313), SPH_C32(0xC701CF8C),
+        SPH_C32(0xCC39968E), SPH_C32(0x50AC5695), SPH_C32(0x4D42C787),
+        SPH_C32(0xA647A8B3), SPH_C32(0x97CF0BEF), SPH_C32(0x825B4537),
+        SPH_C32(0xEEF864D2), SPH_C32(0xF22090C4), SPH_C32(0xD0E5CD33),
+        SPH_C32(0xA23911AE), SPH_C32(0xFCD398D9), SPH_C32(0x148FE485),
+        SPH_C32(0x1B017BEF), SPH_C32(0xB6444532), SPH_C32(0x6A536159),
+        SPH_C32(0x2FF5781C), SPH_C32(0x91FA7934), SPH_C32(0x0DBADEA9),
+        SPH_C32(0xD65C8A2B), SPH_C32(0xA5A70E75), SPH_C32(0xB1C62456),
+        SPH_C32(0xBC796576), SPH_C32(0x1921C8F7), SPH_C32(0xE7989AF1),
+        SPH_C32(0x7795D246), SPH_C32(0xD43E3B44)
+};
+
+#define T32      SPH_T32
+#define ROTL32   SPH_ROTL32
+
+#if SPH_CUBEHASH_NOCOPY
+
+#define DECL_STATE
+#define READ_STATE(cc)
+#define WRITE_STATE(cc)
+
+#define x0   ((sc)->state[ 0])
+#define x1   ((sc)->state[ 1])
+#define x2   ((sc)->state[ 2])
+#define x3   ((sc)->state[ 3])
+#define x4   ((sc)->state[ 4])
+#define x5   ((sc)->state[ 5])
+#define x6   ((sc)->state[ 6])
+#define x7   ((sc)->state[ 7])
+#define x8   ((sc)->state[ 8])
+#define x9   ((sc)->state[ 9])
+#define xa   ((sc)->state[10])
+#define xb   ((sc)->state[11])
+#define xc   ((sc)->state[12])
+#define xd   ((sc)->state[13])
+#define xe   ((sc)->state[14])
+#define xf   ((sc)->state[15])
+#define xg   ((sc)->state[16])
+#define xh   ((sc)->state[17])
+#define xi   ((sc)->state[18])
+#define xj   ((sc)->state[19])
+#define xk   ((sc)->state[20])
+#define xl   ((sc)->state[21])
+#define xm   ((sc)->state[22])
+#define xn   ((sc)->state[23])
+#define xo   ((sc)->state[24])
+#define xp   ((sc)->state[25])
+#define xq   ((sc)->state[26])
+#define xr   ((sc)->state[27])
+#define xs   ((sc)->state[28])
+#define xt   ((sc)->state[29])
+#define xu   ((sc)->state[30])
+#define xv   ((sc)->state[31])
+
+#else
+
+#define DECL_STATE \
+	sph_u32 x0, x1, x2, x3, x4, x5, x6, x7; \
+	sph_u32 x8, x9, xa, xb, xc, xd, xe, xf; \
+	sph_u32 xg, xh, xi, xj, xk, xl, xm, xn; \
+	sph_u32 xo, xp, xq, xr, xs, xt, xu, xv;
+
+#define READ_STATE(cc)   do { \
+		x0 = (cc)->state[ 0]; \
+		x1 = (cc)->state[ 1]; \
+		x2 = (cc)->state[ 2]; \
+		x3 = (cc)->state[ 3]; \
+		x4 = (cc)->state[ 4]; \
+		x5 = (cc)->state[ 5]; \
+		x6 = (cc)->state[ 6]; \
+		x7 = (cc)->state[ 7]; \
+		x8 = (cc)->state[ 8]; \
+		x9 = (cc)->state[ 9]; \
+		xa = (cc)->state[10]; \
+		xb = (cc)->state[11]; \
+		xc = (cc)->state[12]; \
+		xd = (cc)->state[13]; \
+		xe = (cc)->state[14]; \
+		xf = (cc)->state[15]; \
+		xg = (cc)->state[16]; \
+		xh = (cc)->state[17]; \
+		xi = (cc)->state[18]; \
+		xj = (cc)->state[19]; \
+		xk = (cc)->state[20]; \
+		xl = (cc)->state[21]; \
+		xm = (cc)->state[22]; \
+		xn = (cc)->state[23]; \
+		xo = (cc)->state[24]; \
+		xp = (cc)->state[25]; \
+		xq = (cc)->state[26]; \
+		xr = (cc)->state[27]; \
+		xs = (cc)->state[28]; \
+		xt = (cc)->state[29]; \
+		xu = (cc)->state[30]; \
+		xv = (cc)->state[31]; \
+	} while (0)
+
+#define WRITE_STATE(cc)   do { \
+		(cc)->state[ 0] = x0; \
+		(cc)->state[ 1] = x1; \
+		(cc)->state[ 2] = x2; \
+		(cc)->state[ 3] = x3; \
+		(cc)->state[ 4] = x4; \
+		(cc)->state[ 5] = x5; \
+		(cc)->state[ 6] = x6; \
+		(cc)->state[ 7] = x7; \
+		(cc)->state[ 8] = x8; \
+		(cc)->state[ 9] = x9; \
+		(cc)->state[10] = xa; \
+		(cc)->state[11] = xb; \
+		(cc)->state[12] = xc; \
+		(cc)->state[13] = xd; \
+		(cc)->state[14] = xe; \
+		(cc)->state[15] = xf; \
+		(cc)->state[16] = xg; \
+		(cc)->state[17] = xh; \
+		(cc)->state[18] = xi; \
+		(cc)->state[19] = xj; \
+		(cc)->state[20] = xk; \
+		(cc)->state[21] = xl; \
+		(cc)->state[22] = xm; \
+		(cc)->state[23] = xn; \
+		(cc)->state[24] = xo; \
+		(cc)->state[25] = xp; \
+		(cc)->state[26] = xq; \
+		(cc)->state[27] = xr; \
+		(cc)->state[28] = xs; \
+		(cc)->state[29] = xt; \
+		(cc)->state[30] = xu; \
+		(cc)->state[31] = xv; \
+	} while (0)
+
+#endif
+
+#define INPUT_BLOCK   do { \
+		x0 ^= sph_dec32le_aligned(buf +  0); \
+		x1 ^= sph_dec32le_aligned(buf +  4); \
+		x2 ^= sph_dec32le_aligned(buf +  8); \
+		x3 ^= sph_dec32le_aligned(buf + 12); \
+		x4 ^= sph_dec32le_aligned(buf + 16); \
+		x5 ^= sph_dec32le_aligned(buf + 20); \
+		x6 ^= sph_dec32le_aligned(buf + 24); \
+		x7 ^= sph_dec32le_aligned(buf + 28); \
+	} while (0)
+
+#define ROUND_EVEN   do { \
+		xg = T32(x0 + xg); \
+		x0 = ROTL32(x0, 7); \
+		xh = T32(x1 + xh); \
+		x1 = ROTL32(x1, 7); \
+		xi = T32(x2 + xi); \
+		x2 = ROTL32(x2, 7); \
+		xj = T32(x3 + xj); \
+		x3 = ROTL32(x3, 7); \
+		xk = T32(x4 + xk); \
+		x4 = ROTL32(x4, 7); \
+		xl = T32(x5 + xl); \
+		x5 = ROTL32(x5, 7); \
+		xm = T32(x6 + xm); \
+		x6 = ROTL32(x6, 7); \
+		xn = T32(x7 + xn); \
+		x7 = ROTL32(x7, 7); \
+		xo = T32(x8 + xo); \
+		x8 = ROTL32(x8, 7); \
+		xp = T32(x9 + xp); \
+		x9 = ROTL32(x9, 7); \
+		xq = T32(xa + xq); \
+		xa = ROTL32(xa, 7); \
+		xr = T32(xb + xr); \
+		xb = ROTL32(xb, 7); \
+		xs = T32(xc + xs); \
+		xc = ROTL32(xc, 7); \
+		xt = T32(xd + xt); \
+		xd = ROTL32(xd, 7); \
+		xu = T32(xe + xu); \
+		xe = ROTL32(xe, 7); \
+		xv = T32(xf + xv); \
+		xf = ROTL32(xf, 7); \
+		x8 ^= xg; \
+		x9 ^= xh; \
+		xa ^= xi; \
+		xb ^= xj; \
+		xc ^= xk; \
+		xd ^= xl; \
+		xe ^= xm; \
+		xf ^= xn; \
+		x0 ^= xo; \
+		x1 ^= xp; \
+		x2 ^= xq; \
+		x3 ^= xr; \
+		x4 ^= xs; \
+		x5 ^= xt; \
+		x6 ^= xu; \
+		x7 ^= xv; \
+		xi = T32(x8 + xi); \
+		x8 = ROTL32(x8, 11); \
+		xj = T32(x9 + xj); \
+		x9 = ROTL32(x9, 11); \
+		xg = T32(xa + xg); \
+		xa = ROTL32(xa, 11); \
+		xh = T32(xb + xh); \
+		xb = ROTL32(xb, 11); \
+		xm = T32(xc + xm); \
+		xc = ROTL32(xc, 11); \
+		xn = T32(xd + xn); \
+		xd = ROTL32(xd, 11); \
+		xk = T32(xe + xk); \
+		xe = ROTL32(xe, 11); \
+		xl = T32(xf + xl); \
+		xf = ROTL32(xf, 11); \
+		xq = T32(x0 + xq); \
+		x0 = ROTL32(x0, 11); \
+		xr = T32(x1 + xr); \
+		x1 = ROTL32(x1, 11); \
+		xo = T32(x2 + xo); \
+		x2 = ROTL32(x2, 11); \
+		xp = T32(x3 + xp); \
+		x3 = ROTL32(x3, 11); \
+		xu = T32(x4 + xu); \
+		x4 = ROTL32(x4, 11); \
+		xv = T32(x5 + xv); \
+		x5 = ROTL32(x5, 11); \
+		xs = T32(x6 + xs); \
+		x6 = ROTL32(x6, 11); \
+		xt = T32(x7 + xt); \
+		x7 = ROTL32(x7, 11); \
+		xc ^= xi; \
+		xd ^= xj; \
+		xe ^= xg; \
+		xf ^= xh; \
+		x8 ^= xm; \
+		x9 ^= xn; \
+		xa ^= xk; \
+		xb ^= xl; \
+		x4 ^= xq; \
+		x5 ^= xr; \
+		x6 ^= xo; \
+		x7 ^= xp; \
+		x0 ^= xu; \
+		x1 ^= xv; \
+		x2 ^= xs; \
+		x3 ^= xt; \
+	} while (0)
+
+#define ROUND_ODD   do { \
+		xj = T32(xc + xj); \
+		xc = ROTL32(xc, 7); \
+		xi = T32(xd + xi); \
+		xd = ROTL32(xd, 7); \
+		xh = T32(xe + xh); \
+		xe = ROTL32(xe, 7); \
+		xg = T32(xf + xg); \
+		xf = ROTL32(xf, 7); \
+		xn = T32(x8 + xn); \
+		x8 = ROTL32(x8, 7); \
+		xm = T32(x9 + xm); \
+		x9 = ROTL32(x9, 7); \
+		xl = T32(xa + xl); \
+		xa = ROTL32(xa, 7); \
+		xk = T32(xb + xk); \
+		xb = ROTL32(xb, 7); \
+		xr = T32(x4 + xr); \
+		x4 = ROTL32(x4, 7); \
+		xq = T32(x5 + xq); \
+		x5 = ROTL32(x5, 7); \
+		xp = T32(x6 + xp); \
+		x6 = ROTL32(x6, 7); \
+		xo = T32(x7 + xo); \
+		x7 = ROTL32(x7, 7); \
+		xv = T32(x0 + xv); \
+		x0 = ROTL32(x0, 7); \
+		xu = T32(x1 + xu); \
+		x1 = ROTL32(x1, 7); \
+		xt = T32(x2 + xt); \
+		x2 = ROTL32(x2, 7); \
+		xs = T32(x3 + xs); \
+		x3 = ROTL32(x3, 7); \
+		x4 ^= xj; \
+		x5 ^= xi; \
+		x6 ^= xh; \
+		x7 ^= xg; \
+		x0 ^= xn; \
+		x1 ^= xm; \
+		x2 ^= xl; \
+		x3 ^= xk; \
+		xc ^= xr; \
+		xd ^= xq; \
+		xe ^= xp; \
+		xf ^= xo; \
+		x8 ^= xv; \
+		x9 ^= xu; \
+		xa ^= xt; \
+		xb ^= xs; \
+		xh = T32(x4 + xh); \
+		x4 = ROTL32(x4, 11); \
+		xg = T32(x5 + xg); \
+		x5 = ROTL32(x5, 11); \
+		xj = T32(x6 + xj); \
+		x6 = ROTL32(x6, 11); \
+		xi = T32(x7 + xi); \
+		x7 = ROTL32(x7, 11); \
+		xl = T32(x0 + xl); \
+		x0 = ROTL32(x0, 11); \
+		xk = T32(x1 + xk); \
+		x1 = ROTL32(x1, 11); \
+		xn = T32(x2 + xn); \
+		x2 = ROTL32(x2, 11); \
+		xm = T32(x3 + xm); \
+		x3 = ROTL32(x3, 11); \
+		xp = T32(xc + xp); \
+		xc = ROTL32(xc, 11); \
+		xo = T32(xd + xo); \
+		xd = ROTL32(xd, 11); \
+		xr = T32(xe + xr); \
+		xe = ROTL32(xe, 11); \
+		xq = T32(xf + xq); \
+		xf = ROTL32(xf, 11); \
+		xt = T32(x8 + xt); \
+		x8 = ROTL32(x8, 11); \
+		xs = T32(x9 + xs); \
+		x9 = ROTL32(x9, 11); \
+		xv = T32(xa + xv); \
+		xa = ROTL32(xa, 11); \
+		xu = T32(xb + xu); \
+		xb = ROTL32(xb, 11); \
+		x0 ^= xh; \
+		x1 ^= xg; \
+		x2 ^= xj; \
+		x3 ^= xi; \
+		x4 ^= xl; \
+		x5 ^= xk; \
+		x6 ^= xn; \
+		x7 ^= xm; \
+		x8 ^= xp; \
+		x9 ^= xo; \
+		xa ^= xr; \
+		xb ^= xq; \
+		xc ^= xt; \
+		xd ^= xs; \
+		xe ^= xv; \
+		xf ^= xu; \
+	} while (0)
+
+/*
+ * There is no need to unroll all 16 rounds. The word-swapping permutation
+ * is an involution, so we need to unroll an even number of rounds. On
+ * "big" systems, unrolling 4 rounds yields about 97% of the speed
+ * achieved with full unrolling; and it keeps the code more compact
+ * for small architectures.
+ */
+
+#if SPH_CUBEHASH_UNROLL == 2
+
+#define SIXTEEN_ROUNDS   do { \
+		int j; \
+		for (j = 0; j < 8; j ++) { \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+		} \
+	} while (0)
+
+#elif SPH_CUBEHASH_UNROLL == 4
+
+#define SIXTEEN_ROUNDS   do { \
+		int j; \
+		for (j = 0; j < 4; j ++) { \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+		} \
+	} while (0)
+
+#elif SPH_CUBEHASH_UNROLL == 8
+
+#define SIXTEEN_ROUNDS   do { \
+		int j; \
+		for (j = 0; j < 2; j ++) { \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+			ROUND_EVEN; \
+			ROUND_ODD; \
+		} \
+	} while (0)
+
+#else
+
+#define SIXTEEN_ROUNDS   do { \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+		ROUND_EVEN; \
+		ROUND_ODD; \
+	} while (0)
+
+#endif
+
+static void
+cubehash_init(sph_cubehash_context *sc, const sph_u32 *iv)
+{
+    memcpy(sc->state, iv, sizeof sc->state);
+    sc->ptr = 0;
+}
+
+static void
+cubehash_core(sph_cubehash_context *sc, const void *data, size_t len)
+{
+    unsigned char *buf;
+    size_t ptr;
+    DECL_STATE
+
+    buf = sc->buf;
+    ptr = sc->ptr;
+    if (len < (sizeof sc->buf) - ptr) {
+        memcpy(buf + ptr, data, len);
+        ptr += len;
+        sc->ptr = ptr;
+        return;
+    }
+
+    READ_STATE(sc);
+    while (len > 0) {
+        size_t clen;
+
+        clen = (sizeof sc->buf) - ptr;
+        if (clen > len)
+            clen = len;
+        memcpy(buf + ptr, data, clen);
+        ptr += clen;
+        data = (const unsigned char *)data + clen;
+        len -= clen;
+        if (ptr == sizeof sc->buf) {
+            INPUT_BLOCK;
+            SIXTEEN_ROUNDS;
+            ptr = 0;
+        }
+    }
+    WRITE_STATE(sc);
+    sc->ptr = ptr;
+}
+
+static void
+cubehash_close(sph_cubehash_context *sc, unsigned ub, unsigned n,
+               void *dst, size_t out_size_w32)
+{
+    unsigned char *buf, *out;
+    size_t ptr;
+    unsigned z;
+    int i;
+    DECL_STATE
+
+    buf = sc->buf;
+    ptr = sc->ptr;
+    z = 0x80 >> n;
+    buf[ptr ++] = ((ub & -z) | z) & 0xFF;
+    memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+    READ_STATE(sc);
+    INPUT_BLOCK;
+    for (i = 0; i < 11; i ++) {
+        SIXTEEN_ROUNDS;
+        if (i == 0)
+            xv ^= SPH_C32(1);
+    }
+    WRITE_STATE(sc);
+    out = dst;
+    for (z = 0; z < out_size_w32; z ++)
+        sph_enc32le(out + (z << 2), sc->state[z]);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash224_init(void *cc)
+{
+    cubehash_init(cc, IV224);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash224(void *cc, const void *data, size_t len)
+{
+    cubehash_core(cc, data, len);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash224_close(void *cc, void *dst)
+{
+    sph_cubehash224_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    cubehash_close(cc, ub, n, dst, 7);
+    sph_cubehash224_init(cc);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash256_init(void *cc)
+{
+    cubehash_init(cc, IV256);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash256(void *cc, const void *data, size_t len)
+{
+    cubehash_core(cc, data, len);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash256_close(void *cc, void *dst)
+{
+    sph_cubehash256_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    cubehash_close(cc, ub, n, dst, 8);
+    sph_cubehash256_init(cc);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash384_init(void *cc)
+{
+    cubehash_init(cc, IV384);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash384(void *cc, const void *data, size_t len)
+{
+    cubehash_core(cc, data, len);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash384_close(void *cc, void *dst)
+{
+    sph_cubehash384_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    cubehash_close(cc, ub, n, dst, 12);
+    sph_cubehash384_init(cc);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash512_init(void *cc)
+{
+    cubehash_init(cc, IV512);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash512(void *cc, const void *data, size_t len)
+{
+    cubehash_core(cc, data, len);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash512_close(void *cc, void *dst)
+{
+    sph_cubehash512_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_cubehash.h */
+void
+sph_cubehash512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    cubehash_close(cc, ub, n, dst, 16);
+    sph_cubehash512_init(cc);
+}
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/sha3/sph_cubehash.h b/sha3/sph_cubehash.h
new file mode 100644
index 0000000..f247c55
--- /dev/null
+++ b/sha3/sph_cubehash.h
@@ -0,0 +1,292 @@
+/* $Id: sph_cubehash.h 180 2010-05-08 02:29:25Z tp $ */
+/**
+ * CubeHash interface. CubeHash is a family of functions which differ by
+ * their output size; this implementation defines CubeHash for output
+ * sizes 224, 256, 384 and 512 bits, with the "standard parameters"
+ * (CubeHash16/32 with the CubeHash specification notations).
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_cubehash.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_CUBEHASH_H__
+#define SPH_CUBEHASH_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for CubeHash-224.
+ */
+#define SPH_SIZE_cubehash224   224
+
+/**
+ * Output size (in bits) for CubeHash-256.
+ */
+#define SPH_SIZE_cubehash256   256
+
+/**
+ * Output size (in bits) for CubeHash-384.
+ */
+#define SPH_SIZE_cubehash384   384
+
+/**
+ * Output size (in bits) for CubeHash-512.
+ */
+#define SPH_SIZE_cubehash512   512
+
+/**
+ * This structure is a context for CubeHash computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a CubeHash computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running CubeHash computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+    unsigned char buf[32];    /* first field, for alignment */
+    size_t ptr;
+    sph_u32 state[32];
+#endif
+} sph_cubehash_context;
+
+/**
+ * Type for a CubeHash-224 context (identical to the common context).
+ */
+typedef sph_cubehash_context sph_cubehash224_context;
+
+/**
+ * Type for a CubeHash-256 context (identical to the common context).
+ */
+typedef sph_cubehash_context sph_cubehash256_context;
+
+/**
+ * Type for a CubeHash-384 context (identical to the common context).
+ */
+typedef sph_cubehash_context sph_cubehash384_context;
+
+/**
+ * Type for a CubeHash-512 context (identical to the common context).
+ */
+typedef sph_cubehash_context sph_cubehash512_context;
+
+/**
+ * Initialize a CubeHash-224 context. This process performs no memory
+ * allocation.
+ *
+ * @param cc   the CubeHash-224 context (pointer to a
+ *             <code>sph_cubehash224_context</code>)
+ */
+void sph_cubehash224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the CubeHash-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_cubehash224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current CubeHash-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the CubeHash-224 context
+ * @param dst   the destination buffer
+ */
+void sph_cubehash224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the CubeHash-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_cubehash224_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a CubeHash-256 context. This process performs no memory
+ * allocation.
+ *
+ * @param cc   the CubeHash-256 context (pointer to a
+ *             <code>sph_cubehash256_context</code>)
+ */
+void sph_cubehash256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the CubeHash-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_cubehash256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current CubeHash-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the CubeHash-256 context
+ * @param dst   the destination buffer
+ */
+void sph_cubehash256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the CubeHash-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_cubehash256_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a CubeHash-384 context. This process performs no memory
+ * allocation.
+ *
+ * @param cc   the CubeHash-384 context (pointer to a
+ *             <code>sph_cubehash384_context</code>)
+ */
+void sph_cubehash384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the CubeHash-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_cubehash384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current CubeHash-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the CubeHash-384 context
+ * @param dst   the destination buffer
+ */
+void sph_cubehash384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the CubeHash-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_cubehash384_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a CubeHash-512 context. This process performs no memory
+ * allocation.
+ *
+ * @param cc   the CubeHash-512 context (pointer to a
+ *             <code>sph_cubehash512_context</code>)
+ */
+void sph_cubehash512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the CubeHash-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_cubehash512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current CubeHash-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the CubeHash-512 context
+ * @param dst   the destination buffer
+ */
+void sph_cubehash512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the CubeHash-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_cubehash512_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/sha3/sph_echo.c b/sha3/sph_echo.c
new file mode 100644
index 0000000..42420aa
--- /dev/null
+++ b/sha3/sph_echo.c
@@ -0,0 +1,1031 @@
+/* $Id: echo.c 227 2010-06-16 17:28:38Z tp $ */
+/*
+ * ECHO implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <limits.h>
+
+#include "sph_echo.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_ECHO
+#define SPH_SMALL_FOOTPRINT_ECHO   1
+#endif
+
+/*
+ * Some measures tend to show that the 64-bit implementation offers
+ * better performance only on a "64-bit architectures", those which have
+ * actual 64-bit registers.
+ */
+#if !defined SPH_ECHO_64 && SPH_64_TRUE
+#define SPH_ECHO_64   1
+#endif
+
+/*
+ * We can use a 64-bit implementation only if a 64-bit type is available.
+ */
+#if !SPH_64
+#undef SPH_ECHO_64
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#define T32   SPH_T32
+#define C32   SPH_C32
+#if SPH_64
+#define C64   SPH_C64
+#endif
+
+#define AES_BIG_ENDIAN   0
+#include "aes_helper.c"
+
+#if SPH_ECHO_64
+
+#define DECL_STATE_SMALL   \
+	sph_u64 W[16][2];
+
+#define DECL_STATE_BIG   \
+	sph_u64 W[16][2];
+
+#define INPUT_BLOCK_SMALL(sc)   do { \
+		unsigned u; \
+		memcpy(W, sc->u.Vb, 8 * sizeof(sph_u64)); \
+		for (u = 0; u < 12; u ++) { \
+			W[u + 4][0] = sph_dec64le_aligned( \
+				sc->buf + 16 * u); \
+			W[u + 4][1] = sph_dec64le_aligned( \
+				sc->buf + 16 * u + 8); \
+		} \
+	} while (0)
+
+#define INPUT_BLOCK_BIG(sc)   do { \
+		unsigned u; \
+		memcpy(W, sc->u.Vb, 16 * sizeof(sph_u64)); \
+		for (u = 0; u < 8; u ++) { \
+			W[u + 8][0] = sph_dec64le_aligned( \
+				sc->buf + 16 * u); \
+			W[u + 8][1] = sph_dec64le_aligned( \
+				sc->buf + 16 * u + 8); \
+		} \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_ECHO
+
+static void
+aes_2rounds_all(sph_u64 W[16][2],
+	sph_u32 *pK0, sph_u32 *pK1, sph_u32 *pK2, sph_u32 *pK3)
+{
+	int n;
+	sph_u32 K0 = *pK0;
+	sph_u32 K1 = *pK1;
+	sph_u32 K2 = *pK2;
+	sph_u32 K3 = *pK3;
+
+	for (n = 0; n < 16; n ++) {
+		sph_u64 Wl = W[n][0];
+		sph_u64 Wh = W[n][1];
+		sph_u32 X0 = (sph_u32)Wl;
+		sph_u32 X1 = (sph_u32)(Wl >> 32);
+		sph_u32 X2 = (sph_u32)Wh;
+		sph_u32 X3 = (sph_u32)(Wh >> 32);
+		sph_u32 Y0, Y1, Y2, Y3; \
+		AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3);
+		AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X0, X1, X2, X3);
+		W[n][0] = (sph_u64)X0 | ((sph_u64)X1 << 32);
+		W[n][1] = (sph_u64)X2 | ((sph_u64)X3 << 32);
+		if ((K0 = T32(K0 + 1)) == 0) {
+			if ((K1 = T32(K1 + 1)) == 0)
+				if ((K2 = T32(K2 + 1)) == 0)
+					K3 = T32(K3 + 1);
+		}
+	}
+	*pK0 = K0;
+	*pK1 = K1;
+	*pK2 = K2;
+	*pK3 = K3;
+}
+
+#define BIG_SUB_WORDS   do { \
+		aes_2rounds_all(W, &K0, &K1, &K2, &K3); \
+	} while (0)
+
+#else
+
+#define AES_2ROUNDS(X)   do { \
+		sph_u32 X0 = (sph_u32)(X[0]); \
+		sph_u32 X1 = (sph_u32)(X[0] >> 32); \
+		sph_u32 X2 = (sph_u32)(X[1]); \
+		sph_u32 X3 = (sph_u32)(X[1] >> 32); \
+		sph_u32 Y0, Y1, Y2, Y3; \
+		AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3); \
+		AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X0, X1, X2, X3); \
+		X[0] = (sph_u64)X0 | ((sph_u64)X1 << 32); \
+		X[1] = (sph_u64)X2 | ((sph_u64)X3 << 32); \
+		if ((K0 = T32(K0 + 1)) == 0) { \
+			if ((K1 = T32(K1 + 1)) == 0) \
+				if ((K2 = T32(K2 + 1)) == 0) \
+					K3 = T32(K3 + 1); \
+		} \
+	} while (0)
+
+#define BIG_SUB_WORDS   do { \
+		AES_2ROUNDS(W[ 0]); \
+		AES_2ROUNDS(W[ 1]); \
+		AES_2ROUNDS(W[ 2]); \
+		AES_2ROUNDS(W[ 3]); \
+		AES_2ROUNDS(W[ 4]); \
+		AES_2ROUNDS(W[ 5]); \
+		AES_2ROUNDS(W[ 6]); \
+		AES_2ROUNDS(W[ 7]); \
+		AES_2ROUNDS(W[ 8]); \
+		AES_2ROUNDS(W[ 9]); \
+		AES_2ROUNDS(W[10]); \
+		AES_2ROUNDS(W[11]); \
+		AES_2ROUNDS(W[12]); \
+		AES_2ROUNDS(W[13]); \
+		AES_2ROUNDS(W[14]); \
+		AES_2ROUNDS(W[15]); \
+	} while (0)
+
+#endif
+
+#define SHIFT_ROW1(a, b, c, d)   do { \
+		sph_u64 tmp; \
+		tmp = W[a][0]; \
+		W[a][0] = W[b][0]; \
+		W[b][0] = W[c][0]; \
+		W[c][0] = W[d][0]; \
+		W[d][0] = tmp; \
+		tmp = W[a][1]; \
+		W[a][1] = W[b][1]; \
+		W[b][1] = W[c][1]; \
+		W[c][1] = W[d][1]; \
+		W[d][1] = tmp; \
+	} while (0)
+
+#define SHIFT_ROW2(a, b, c, d)   do { \
+		sph_u64 tmp; \
+		tmp = W[a][0]; \
+		W[a][0] = W[c][0]; \
+		W[c][0] = tmp; \
+		tmp = W[b][0]; \
+		W[b][0] = W[d][0]; \
+		W[d][0] = tmp; \
+		tmp = W[a][1]; \
+		W[a][1] = W[c][1]; \
+		W[c][1] = tmp; \
+		tmp = W[b][1]; \
+		W[b][1] = W[d][1]; \
+		W[d][1] = tmp; \
+	} while (0)
+
+#define SHIFT_ROW3(a, b, c, d)   SHIFT_ROW1(d, c, b, a)
+
+#define BIG_SHIFT_ROWS   do { \
+		SHIFT_ROW1(1, 5, 9, 13); \
+		SHIFT_ROW2(2, 6, 10, 14); \
+		SHIFT_ROW3(3, 7, 11, 15); \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_ECHO
+
+static void
+mix_column(sph_u64 W[16][2], int ia, int ib, int ic, int id)
+{
+	int n;
+
+	for (n = 0; n < 2; n ++) {
+		sph_u64 a = W[ia][n];
+		sph_u64 b = W[ib][n];
+		sph_u64 c = W[ic][n];
+		sph_u64 d = W[id][n];
+		sph_u64 ab = a ^ b;
+		sph_u64 bc = b ^ c;
+		sph_u64 cd = c ^ d;
+		sph_u64 abx = ((ab & C64(0x8080808080808080)) >> 7) * 27U
+			^ ((ab & C64(0x7F7F7F7F7F7F7F7F)) << 1);
+		sph_u64 bcx = ((bc & C64(0x8080808080808080)) >> 7) * 27U
+			^ ((bc & C64(0x7F7F7F7F7F7F7F7F)) << 1);
+		sph_u64 cdx = ((cd & C64(0x8080808080808080)) >> 7) * 27U
+			^ ((cd & C64(0x7F7F7F7F7F7F7F7F)) << 1);
+		W[ia][n] = abx ^ bc ^ d;
+		W[ib][n] = bcx ^ a ^ cd;
+		W[ic][n] = cdx ^ ab ^ d;
+		W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c;
+	}
+}
+
+#define MIX_COLUMN(a, b, c, d)   mix_column(W, a, b, c, d)
+
+#else
+
+#define MIX_COLUMN1(ia, ib, ic, id, n)   do { \
+		sph_u64 a = W[ia][n]; \
+		sph_u64 b = W[ib][n]; \
+		sph_u64 c = W[ic][n]; \
+		sph_u64 d = W[id][n]; \
+		sph_u64 ab = a ^ b; \
+		sph_u64 bc = b ^ c; \
+		sph_u64 cd = c ^ d; \
+		sph_u64 abx = ((ab & C64(0x8080808080808080)) >> 7) * 27U \
+			^ ((ab & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
+		sph_u64 bcx = ((bc & C64(0x8080808080808080)) >> 7) * 27U \
+			^ ((bc & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
+		sph_u64 cdx = ((cd & C64(0x8080808080808080)) >> 7) * 27U \
+			^ ((cd & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
+		W[ia][n] = abx ^ bc ^ d; \
+		W[ib][n] = bcx ^ a ^ cd; \
+		W[ic][n] = cdx ^ ab ^ d; \
+		W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c; \
+	} while (0)
+
+#define MIX_COLUMN(a, b, c, d)   do { \
+		MIX_COLUMN1(a, b, c, d, 0); \
+		MIX_COLUMN1(a, b, c, d, 1); \
+	} while (0)
+
+#endif
+
+#define BIG_MIX_COLUMNS   do { \
+		MIX_COLUMN(0, 1, 2, 3); \
+		MIX_COLUMN(4, 5, 6, 7); \
+		MIX_COLUMN(8, 9, 10, 11); \
+		MIX_COLUMN(12, 13, 14, 15); \
+	} while (0)
+
+#define BIG_ROUND   do { \
+		BIG_SUB_WORDS; \
+		BIG_SHIFT_ROWS; \
+		BIG_MIX_COLUMNS; \
+	} while (0)
+
+#define FINAL_SMALL   do { \
+		unsigned u; \
+		sph_u64 *VV = &sc->u.Vb[0][0]; \
+		sph_u64 *WW = &W[0][0]; \
+		for (u = 0; u < 8; u ++) { \
+			VV[u] ^= sph_dec64le_aligned(sc->buf + (u * 8)) \
+				^ sph_dec64le_aligned(sc->buf + (u * 8) + 64) \
+				^ sph_dec64le_aligned(sc->buf + (u * 8) + 128) \
+				^ WW[u] ^ WW[u + 8] \
+				^ WW[u + 16] ^ WW[u + 24]; \
+		} \
+	} while (0)
+
+#define FINAL_BIG   do { \
+		unsigned u; \
+		sph_u64 *VV = &sc->u.Vb[0][0]; \
+		sph_u64 *WW = &W[0][0]; \
+		for (u = 0; u < 16; u ++) { \
+			VV[u] ^= sph_dec64le_aligned(sc->buf + (u * 8)) \
+				^ WW[u] ^ WW[u + 16]; \
+		} \
+	} while (0)
+
+#define COMPRESS_SMALL(sc)   do { \
+		sph_u32 K0 = sc->C0; \
+		sph_u32 K1 = sc->C1; \
+		sph_u32 K2 = sc->C2; \
+		sph_u32 K3 = sc->C3; \
+		unsigned u; \
+		INPUT_BLOCK_SMALL(sc); \
+		for (u = 0; u < 8; u ++) { \
+			BIG_ROUND; \
+		} \
+		FINAL_SMALL; \
+	} while (0)
+
+#define COMPRESS_BIG(sc)   do { \
+		sph_u32 K0 = sc->C0; \
+		sph_u32 K1 = sc->C1; \
+		sph_u32 K2 = sc->C2; \
+		sph_u32 K3 = sc->C3; \
+		unsigned u; \
+		INPUT_BLOCK_BIG(sc); \
+		for (u = 0; u < 10; u ++) { \
+			BIG_ROUND; \
+		} \
+		FINAL_BIG; \
+	} while (0)
+
+#else
+
+#define DECL_STATE_SMALL   \
+	sph_u32 W[16][4];
+
+#define DECL_STATE_BIG   \
+	sph_u32 W[16][4];
+
+#define INPUT_BLOCK_SMALL(sc)   do { \
+		unsigned u; \
+		memcpy(W, sc->u.Vs, 16 * sizeof(sph_u32)); \
+		for (u = 0; u < 12; u ++) { \
+			W[u + 4][0] = sph_dec32le_aligned( \
+				sc->buf + 16 * u); \
+			W[u + 4][1] = sph_dec32le_aligned( \
+				sc->buf + 16 * u + 4); \
+			W[u + 4][2] = sph_dec32le_aligned( \
+				sc->buf + 16 * u + 8); \
+			W[u + 4][3] = sph_dec32le_aligned( \
+				sc->buf + 16 * u + 12); \
+		} \
+	} while (0)
+
+#define INPUT_BLOCK_BIG(sc)   do { \
+		unsigned u; \
+		memcpy(W, sc->u.Vs, 32 * sizeof(sph_u32)); \
+		for (u = 0; u < 8; u ++) { \
+			W[u + 8][0] = sph_dec32le_aligned( \
+				sc->buf + 16 * u); \
+			W[u + 8][1] = sph_dec32le_aligned( \
+				sc->buf + 16 * u + 4); \
+			W[u + 8][2] = sph_dec32le_aligned( \
+				sc->buf + 16 * u + 8); \
+			W[u + 8][3] = sph_dec32le_aligned( \
+				sc->buf + 16 * u + 12); \
+		} \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_ECHO
+
+static void
+aes_2rounds_all(sph_u32 W[16][4],
+	sph_u32 *pK0, sph_u32 *pK1, sph_u32 *pK2, sph_u32 *pK3)
+{
+	int n;
+	sph_u32 K0 = *pK0;
+	sph_u32 K1 = *pK1;
+	sph_u32 K2 = *pK2;
+	sph_u32 K3 = *pK3;
+
+	for (n = 0; n < 16; n ++) {
+		sph_u32 *X = W[n];
+		sph_u32 Y0, Y1, Y2, Y3;
+		AES_ROUND_LE(X[0], X[1], X[2], X[3],
+			K0, K1, K2, K3, Y0, Y1, Y2, Y3);
+		AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X[0], X[1], X[2], X[3]);
+		if ((K0 = T32(K0 + 1)) == 0) {
+			if ((K1 = T32(K1 + 1)) == 0)
+				if ((K2 = T32(K2 + 1)) == 0)
+					K3 = T32(K3 + 1);
+		}
+	}
+	*pK0 = K0;
+	*pK1 = K1;
+	*pK2 = K2;
+	*pK3 = K3;
+}
+
+#define BIG_SUB_WORDS   do { \
+		aes_2rounds_all(W, &K0, &K1, &K2, &K3); \
+	} while (0)
+
+#else
+
+#define AES_2ROUNDS(X)   do { \
+		sph_u32 Y0, Y1, Y2, Y3; \
+		AES_ROUND_LE(X[0], X[1], X[2], X[3], \
+			K0, K1, K2, K3, Y0, Y1, Y2, Y3); \
+		AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X[0], X[1], X[2], X[3]); \
+		if ((K0 = T32(K0 + 1)) == 0) { \
+			if ((K1 = T32(K1 + 1)) == 0) \
+				if ((K2 = T32(K2 + 1)) == 0) \
+					K3 = T32(K3 + 1); \
+		} \
+	} while (0)
+
+#define BIG_SUB_WORDS   do { \
+		AES_2ROUNDS(W[ 0]); \
+		AES_2ROUNDS(W[ 1]); \
+		AES_2ROUNDS(W[ 2]); \
+		AES_2ROUNDS(W[ 3]); \
+		AES_2ROUNDS(W[ 4]); \
+		AES_2ROUNDS(W[ 5]); \
+		AES_2ROUNDS(W[ 6]); \
+		AES_2ROUNDS(W[ 7]); \
+		AES_2ROUNDS(W[ 8]); \
+		AES_2ROUNDS(W[ 9]); \
+		AES_2ROUNDS(W[10]); \
+		AES_2ROUNDS(W[11]); \
+		AES_2ROUNDS(W[12]); \
+		AES_2ROUNDS(W[13]); \
+		AES_2ROUNDS(W[14]); \
+		AES_2ROUNDS(W[15]); \
+	} while (0)
+
+#endif
+
+#define SHIFT_ROW1(a, b, c, d)   do { \
+		sph_u32 tmp; \
+		tmp = W[a][0]; \
+		W[a][0] = W[b][0]; \
+		W[b][0] = W[c][0]; \
+		W[c][0] = W[d][0]; \
+		W[d][0] = tmp; \
+		tmp = W[a][1]; \
+		W[a][1] = W[b][1]; \
+		W[b][1] = W[c][1]; \
+		W[c][1] = W[d][1]; \
+		W[d][1] = tmp; \
+		tmp = W[a][2]; \
+		W[a][2] = W[b][2]; \
+		W[b][2] = W[c][2]; \
+		W[c][2] = W[d][2]; \
+		W[d][2] = tmp; \
+		tmp = W[a][3]; \
+		W[a][3] = W[b][3]; \
+		W[b][3] = W[c][3]; \
+		W[c][3] = W[d][3]; \
+		W[d][3] = tmp; \
+	} while (0)
+
+#define SHIFT_ROW2(a, b, c, d)   do { \
+		sph_u32 tmp; \
+		tmp = W[a][0]; \
+		W[a][0] = W[c][0]; \
+		W[c][0] = tmp; \
+		tmp = W[b][0]; \
+		W[b][0] = W[d][0]; \
+		W[d][0] = tmp; \
+		tmp = W[a][1]; \
+		W[a][1] = W[c][1]; \
+		W[c][1] = tmp; \
+		tmp = W[b][1]; \
+		W[b][1] = W[d][1]; \
+		W[d][1] = tmp; \
+		tmp = W[a][2]; \
+		W[a][2] = W[c][2]; \
+		W[c][2] = tmp; \
+		tmp = W[b][2]; \
+		W[b][2] = W[d][2]; \
+		W[d][2] = tmp; \
+		tmp = W[a][3]; \
+		W[a][3] = W[c][3]; \
+		W[c][3] = tmp; \
+		tmp = W[b][3]; \
+		W[b][3] = W[d][3]; \
+		W[d][3] = tmp; \
+	} while (0)
+
+#define SHIFT_ROW3(a, b, c, d)   SHIFT_ROW1(d, c, b, a)
+
+#define BIG_SHIFT_ROWS   do { \
+		SHIFT_ROW1(1, 5, 9, 13); \
+		SHIFT_ROW2(2, 6, 10, 14); \
+		SHIFT_ROW3(3, 7, 11, 15); \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_ECHO
+
+static void
+mix_column(sph_u32 W[16][4], int ia, int ib, int ic, int id)
+{
+	int n;
+
+	for (n = 0; n < 4; n ++) {
+		sph_u32 a = W[ia][n];
+		sph_u32 b = W[ib][n];
+		sph_u32 c = W[ic][n];
+		sph_u32 d = W[id][n];
+		sph_u32 ab = a ^ b;
+		sph_u32 bc = b ^ c;
+		sph_u32 cd = c ^ d;
+		sph_u32 abx = ((ab & C32(0x80808080)) >> 7) * 27U
+			^ ((ab & C32(0x7F7F7F7F)) << 1);
+		sph_u32 bcx = ((bc & C32(0x80808080)) >> 7) * 27U
+			^ ((bc & C32(0x7F7F7F7F)) << 1);
+		sph_u32 cdx = ((cd & C32(0x80808080)) >> 7) * 27U
+			^ ((cd & C32(0x7F7F7F7F)) << 1);
+		W[ia][n] = abx ^ bc ^ d;
+		W[ib][n] = bcx ^ a ^ cd;
+		W[ic][n] = cdx ^ ab ^ d;
+		W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c;
+	}
+}
+
+#define MIX_COLUMN(a, b, c, d)   mix_column(W, a, b, c, d)
+
+#else
+
+#define MIX_COLUMN1(ia, ib, ic, id, n)   do { \
+		sph_u32 a = W[ia][n]; \
+		sph_u32 b = W[ib][n]; \
+		sph_u32 c = W[ic][n]; \
+		sph_u32 d = W[id][n]; \
+		sph_u32 ab = a ^ b; \
+		sph_u32 bc = b ^ c; \
+		sph_u32 cd = c ^ d; \
+		sph_u32 abx = ((ab & C32(0x80808080)) >> 7) * 27U \
+			^ ((ab & C32(0x7F7F7F7F)) << 1); \
+		sph_u32 bcx = ((bc & C32(0x80808080)) >> 7) * 27U \
+			^ ((bc & C32(0x7F7F7F7F)) << 1); \
+		sph_u32 cdx = ((cd & C32(0x80808080)) >> 7) * 27U \
+			^ ((cd & C32(0x7F7F7F7F)) << 1); \
+		W[ia][n] = abx ^ bc ^ d; \
+		W[ib][n] = bcx ^ a ^ cd; \
+		W[ic][n] = cdx ^ ab ^ d; \
+		W[id][n] = abx ^ bcx ^ cdx ^ ab ^ c; \
+	} while (0)
+
+#define MIX_COLUMN(a, b, c, d)   do { \
+		MIX_COLUMN1(a, b, c, d, 0); \
+		MIX_COLUMN1(a, b, c, d, 1); \
+		MIX_COLUMN1(a, b, c, d, 2); \
+		MIX_COLUMN1(a, b, c, d, 3); \
+	} while (0)
+
+#endif
+
+#define BIG_MIX_COLUMNS   do { \
+		MIX_COLUMN(0, 1, 2, 3); \
+		MIX_COLUMN(4, 5, 6, 7); \
+		MIX_COLUMN(8, 9, 10, 11); \
+		MIX_COLUMN(12, 13, 14, 15); \
+	} while (0)
+
+#define BIG_ROUND   do { \
+		BIG_SUB_WORDS; \
+		BIG_SHIFT_ROWS; \
+		BIG_MIX_COLUMNS; \
+	} while (0)
+
+#define FINAL_SMALL   do { \
+		unsigned u; \
+		sph_u32 *VV = &sc->u.Vs[0][0]; \
+		sph_u32 *WW = &W[0][0]; \
+		for (u = 0; u < 16; u ++) { \
+			VV[u] ^= sph_dec32le_aligned(sc->buf + (u * 4)) \
+				^ sph_dec32le_aligned(sc->buf + (u * 4) + 64) \
+				^ sph_dec32le_aligned(sc->buf + (u * 4) + 128) \
+				^ WW[u] ^ WW[u + 16] \
+				^ WW[u + 32] ^ WW[u + 48]; \
+		} \
+	} while (0)
+
+#define FINAL_BIG   do { \
+		unsigned u; \
+		sph_u32 *VV = &sc->u.Vs[0][0]; \
+		sph_u32 *WW = &W[0][0]; \
+		for (u = 0; u < 32; u ++) { \
+			VV[u] ^= sph_dec32le_aligned(sc->buf + (u * 4)) \
+				^ WW[u] ^ WW[u + 32]; \
+		} \
+	} while (0)
+
+#define COMPRESS_SMALL(sc)   do { \
+		sph_u32 K0 = sc->C0; \
+		sph_u32 K1 = sc->C1; \
+		sph_u32 K2 = sc->C2; \
+		sph_u32 K3 = sc->C3; \
+		unsigned u; \
+		INPUT_BLOCK_SMALL(sc); \
+		for (u = 0; u < 8; u ++) { \
+			BIG_ROUND; \
+		} \
+		FINAL_SMALL; \
+	} while (0)
+
+#define COMPRESS_BIG(sc)   do { \
+		sph_u32 K0 = sc->C0; \
+		sph_u32 K1 = sc->C1; \
+		sph_u32 K2 = sc->C2; \
+		sph_u32 K3 = sc->C3; \
+		unsigned u; \
+		INPUT_BLOCK_BIG(sc); \
+		for (u = 0; u < 10; u ++) { \
+			BIG_ROUND; \
+		} \
+		FINAL_BIG; \
+	} while (0)
+
+#endif
+
+#define INCR_COUNTER(sc, val)   do { \
+		sc->C0 = T32(sc->C0 + (sph_u32)(val)); \
+		if (sc->C0 < (sph_u32)(val)) { \
+			if ((sc->C1 = T32(sc->C1 + 1)) == 0) \
+				if ((sc->C2 = T32(sc->C2 + 1)) == 0) \
+					sc->C3 = T32(sc->C3 + 1); \
+		} \
+	} while (0)
+
+static void
+echo_small_init(sph_echo_small_context *sc, unsigned out_len)
+{
+#if SPH_ECHO_64
+    sc->u.Vb[0][0] = (sph_u64)out_len;
+	sc->u.Vb[0][1] = 0;
+	sc->u.Vb[1][0] = (sph_u64)out_len;
+	sc->u.Vb[1][1] = 0;
+	sc->u.Vb[2][0] = (sph_u64)out_len;
+	sc->u.Vb[2][1] = 0;
+	sc->u.Vb[3][0] = (sph_u64)out_len;
+	sc->u.Vb[3][1] = 0;
+#else
+    sc->u.Vs[0][0] = (sph_u32)out_len;
+    sc->u.Vs[0][1] = sc->u.Vs[0][2] = sc->u.Vs[0][3] = 0;
+    sc->u.Vs[1][0] = (sph_u32)out_len;
+    sc->u.Vs[1][1] = sc->u.Vs[1][2] = sc->u.Vs[1][3] = 0;
+    sc->u.Vs[2][0] = (sph_u32)out_len;
+    sc->u.Vs[2][1] = sc->u.Vs[2][2] = sc->u.Vs[2][3] = 0;
+    sc->u.Vs[3][0] = (sph_u32)out_len;
+    sc->u.Vs[3][1] = sc->u.Vs[3][2] = sc->u.Vs[3][3] = 0;
+#endif
+    sc->ptr = 0;
+    sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+}
+
+static void
+echo_big_init(sph_echo_big_context *sc, unsigned out_len)
+{
+#if SPH_ECHO_64
+    sc->u.Vb[0][0] = (sph_u64)out_len;
+	sc->u.Vb[0][1] = 0;
+	sc->u.Vb[1][0] = (sph_u64)out_len;
+	sc->u.Vb[1][1] = 0;
+	sc->u.Vb[2][0] = (sph_u64)out_len;
+	sc->u.Vb[2][1] = 0;
+	sc->u.Vb[3][0] = (sph_u64)out_len;
+	sc->u.Vb[3][1] = 0;
+	sc->u.Vb[4][0] = (sph_u64)out_len;
+	sc->u.Vb[4][1] = 0;
+	sc->u.Vb[5][0] = (sph_u64)out_len;
+	sc->u.Vb[5][1] = 0;
+	sc->u.Vb[6][0] = (sph_u64)out_len;
+	sc->u.Vb[6][1] = 0;
+	sc->u.Vb[7][0] = (sph_u64)out_len;
+	sc->u.Vb[7][1] = 0;
+#else
+    sc->u.Vs[0][0] = (sph_u32)out_len;
+    sc->u.Vs[0][1] = sc->u.Vs[0][2] = sc->u.Vs[0][3] = 0;
+    sc->u.Vs[1][0] = (sph_u32)out_len;
+    sc->u.Vs[1][1] = sc->u.Vs[1][2] = sc->u.Vs[1][3] = 0;
+    sc->u.Vs[2][0] = (sph_u32)out_len;
+    sc->u.Vs[2][1] = sc->u.Vs[2][2] = sc->u.Vs[2][3] = 0;
+    sc->u.Vs[3][0] = (sph_u32)out_len;
+    sc->u.Vs[3][1] = sc->u.Vs[3][2] = sc->u.Vs[3][3] = 0;
+    sc->u.Vs[4][0] = (sph_u32)out_len;
+    sc->u.Vs[4][1] = sc->u.Vs[4][2] = sc->u.Vs[4][3] = 0;
+    sc->u.Vs[5][0] = (sph_u32)out_len;
+    sc->u.Vs[5][1] = sc->u.Vs[5][2] = sc->u.Vs[5][3] = 0;
+    sc->u.Vs[6][0] = (sph_u32)out_len;
+    sc->u.Vs[6][1] = sc->u.Vs[6][2] = sc->u.Vs[6][3] = 0;
+    sc->u.Vs[7][0] = (sph_u32)out_len;
+    sc->u.Vs[7][1] = sc->u.Vs[7][2] = sc->u.Vs[7][3] = 0;
+#endif
+    sc->ptr = 0;
+    sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+}
+
+static void
+echo_small_compress(sph_echo_small_context *sc)
+{
+    DECL_STATE_SMALL
+
+    COMPRESS_SMALL(sc);
+}
+
+static void
+echo_big_compress(sph_echo_big_context *sc)
+{
+    DECL_STATE_BIG
+
+    COMPRESS_BIG(sc);
+}
+
+static void
+echo_small_core(sph_echo_small_context *sc,
+                const unsigned char *data, size_t len)
+{
+    unsigned char *buf;
+    size_t ptr;
+
+    buf = sc->buf;
+    ptr = sc->ptr;
+    if (len < (sizeof sc->buf) - ptr) {
+        memcpy(buf + ptr, data, len);
+        ptr += len;
+        sc->ptr = ptr;
+        return;
+    }
+
+    while (len > 0) {
+        size_t clen;
+
+        clen = (sizeof sc->buf) - ptr;
+        if (clen > len)
+            clen = len;
+        memcpy(buf + ptr, data, clen);
+        ptr += clen;
+        data += clen;
+        len -= clen;
+        if (ptr == sizeof sc->buf) {
+            INCR_COUNTER(sc, 1536);
+            echo_small_compress(sc);
+            ptr = 0;
+        }
+    }
+    sc->ptr = ptr;
+}
+
+static void
+echo_big_core(sph_echo_big_context *sc,
+              const unsigned char *data, size_t len)
+{
+    unsigned char *buf;
+    size_t ptr;
+
+    buf = sc->buf;
+    ptr = sc->ptr;
+    if (len < (sizeof sc->buf) - ptr) {
+        memcpy(buf + ptr, data, len);
+        ptr += len;
+        sc->ptr = ptr;
+        return;
+    }
+
+    while (len > 0) {
+        size_t clen;
+
+        clen = (sizeof sc->buf) - ptr;
+        if (clen > len)
+            clen = len;
+        memcpy(buf + ptr, data, clen);
+        ptr += clen;
+        data += clen;
+        len -= clen;
+        if (ptr == sizeof sc->buf) {
+            INCR_COUNTER(sc, 1024);
+            echo_big_compress(sc);
+            ptr = 0;
+        }
+    }
+    sc->ptr = ptr;
+}
+
+static void
+echo_small_close(sph_echo_small_context *sc, unsigned ub, unsigned n,
+                 void *dst, unsigned out_size_w32)
+{
+    unsigned char *buf;
+    size_t ptr;
+    unsigned z;
+    unsigned elen;
+    union {
+        unsigned char tmp[32];
+        sph_u32 dummy;
+#if SPH_ECHO_64
+        sph_u64 dummy2;
+#endif
+    } u;
+#if SPH_ECHO_64
+    sph_u64 *VV;
+#else
+    sph_u32 *VV;
+#endif
+    unsigned k;
+
+    buf = sc->buf;
+    ptr = sc->ptr;
+    elen = ((unsigned)ptr << 3) + n;
+    INCR_COUNTER(sc, elen);
+    sph_enc32le_aligned(u.tmp, sc->C0);
+    sph_enc32le_aligned(u.tmp + 4, sc->C1);
+    sph_enc32le_aligned(u.tmp + 8, sc->C2);
+    sph_enc32le_aligned(u.tmp + 12, sc->C3);
+    /*
+     * If elen is zero, then this block actually contains no message
+     * bit, only the first padding bit.
+     */
+    if (elen == 0) {
+        sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+    }
+    z = 0x80 >> n;
+    buf[ptr ++] = ((ub & -z) | z) & 0xFF;
+    memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+    if (ptr > ((sizeof sc->buf) - 18)) {
+        echo_small_compress(sc);
+        sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+        memset(buf, 0, sizeof sc->buf);
+    }
+    sph_enc16le(buf + (sizeof sc->buf) - 18, out_size_w32 << 5);
+    memcpy(buf + (sizeof sc->buf) - 16, u.tmp, 16);
+    echo_small_compress(sc);
+#if SPH_ECHO_64
+    for (VV = &sc->u.Vb[0][0], k = 0; k < ((out_size_w32 + 1) >> 1); k ++)
+		sph_enc64le_aligned(u.tmp + (k << 3), VV[k]);
+#else
+    for (VV = &sc->u.Vs[0][0], k = 0; k < out_size_w32; k ++)
+        sph_enc32le_aligned(u.tmp + (k << 2), VV[k]);
+#endif
+    memcpy(dst, u.tmp, out_size_w32 << 2);
+    echo_small_init(sc, out_size_w32 << 5);
+}
+
+static void
+echo_big_close(sph_echo_big_context *sc, unsigned ub, unsigned n,
+               void *dst, unsigned out_size_w32)
+{
+    unsigned char *buf;
+    size_t ptr;
+    unsigned z;
+    unsigned elen;
+    union {
+        unsigned char tmp[64];
+        sph_u32 dummy;
+#if SPH_ECHO_64
+        sph_u64 dummy2;
+#endif
+    } u;
+#if SPH_ECHO_64
+    sph_u64 *VV;
+#else
+    sph_u32 *VV;
+#endif
+    unsigned k;
+
+    buf = sc->buf;
+    ptr = sc->ptr;
+    elen = ((unsigned)ptr << 3) + n;
+    INCR_COUNTER(sc, elen);
+    sph_enc32le_aligned(u.tmp, sc->C0);
+    sph_enc32le_aligned(u.tmp + 4, sc->C1);
+    sph_enc32le_aligned(u.tmp + 8, sc->C2);
+    sph_enc32le_aligned(u.tmp + 12, sc->C3);
+    /*
+     * If elen is zero, then this block actually contains no message
+     * bit, only the first padding bit.
+     */
+    if (elen == 0) {
+        sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+    }
+    z = 0x80 >> n;
+    buf[ptr ++] = ((ub & -z) | z) & 0xFF;
+    memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+    if (ptr > ((sizeof sc->buf) - 18)) {
+        echo_big_compress(sc);
+        sc->C0 = sc->C1 = sc->C2 = sc->C3 = 0;
+        memset(buf, 0, sizeof sc->buf);
+    }
+    sph_enc16le(buf + (sizeof sc->buf) - 18, out_size_w32 << 5);
+    memcpy(buf + (sizeof sc->buf) - 16, u.tmp, 16);
+    echo_big_compress(sc);
+#if SPH_ECHO_64
+    for (VV = &sc->u.Vb[0][0], k = 0; k < ((out_size_w32 + 1) >> 1); k ++)
+		sph_enc64le_aligned(u.tmp + (k << 3), VV[k]);
+#else
+    for (VV = &sc->u.Vs[0][0], k = 0; k < out_size_w32; k ++)
+        sph_enc32le_aligned(u.tmp + (k << 2), VV[k]);
+#endif
+    memcpy(dst, u.tmp, out_size_w32 << 2);
+    echo_big_init(sc, out_size_w32 << 5);
+}
+
+/* see sph_echo.h */
+void
+sph_echo224_init(void *cc)
+{
+    echo_small_init(cc, 224);
+}
+
+/* see sph_echo.h */
+void
+sph_echo224(void *cc, const void *data, size_t len)
+{
+    echo_small_core(cc, data, len);
+}
+
+/* see sph_echo.h */
+void
+sph_echo224_close(void *cc, void *dst)
+{
+    echo_small_close(cc, 0, 0, dst, 7);
+}
+
+/* see sph_echo.h */
+void
+sph_echo224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    echo_small_close(cc, ub, n, dst, 7);
+}
+
+/* see sph_echo.h */
+void
+sph_echo256_init(void *cc)
+{
+    echo_small_init(cc, 256);
+}
+
+/* see sph_echo.h */
+void
+sph_echo256(void *cc, const void *data, size_t len)
+{
+    echo_small_core(cc, data, len);
+}
+
+/* see sph_echo.h */
+void
+sph_echo256_close(void *cc, void *dst)
+{
+    echo_small_close(cc, 0, 0, dst, 8);
+}
+
+/* see sph_echo.h */
+void
+sph_echo256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    echo_small_close(cc, ub, n, dst, 8);
+}
+
+/* see sph_echo.h */
+void
+sph_echo384_init(void *cc)
+{
+    echo_big_init(cc, 384);
+}
+
+/* see sph_echo.h */
+void
+sph_echo384(void *cc, const void *data, size_t len)
+{
+    echo_big_core(cc, data, len);
+}
+
+/* see sph_echo.h */
+void
+sph_echo384_close(void *cc, void *dst)
+{
+    echo_big_close(cc, 0, 0, dst, 12);
+}
+
+/* see sph_echo.h */
+void
+sph_echo384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    echo_big_close(cc, ub, n, dst, 12);
+}
+
+/* see sph_echo.h */
+void
+sph_echo512_init(void *cc)
+{
+    echo_big_init(cc, 512);
+}
+
+/* see sph_echo.h */
+void
+sph_echo512(void *cc, const void *data, size_t len)
+{
+    echo_big_core(cc, data, len);
+}
+
+/* see sph_echo.h */
+void
+sph_echo512_close(void *cc, void *dst)
+{
+    echo_big_close(cc, 0, 0, dst, 16);
+}
+
+/* see sph_echo.h */
+void
+sph_echo512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    echo_big_close(cc, ub, n, dst, 16);
+}
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/sha3/sph_echo.h b/sha3/sph_echo.h
new file mode 100644
index 0000000..20de5fb
--- /dev/null
+++ b/sha3/sph_echo.h
@@ -0,0 +1,320 @@
+/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * ECHO interface. ECHO is a family of functions which differ by
+ * their output size; this implementation defines ECHO for output
+ * sizes 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_echo.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_ECHO_H__
+#define SPH_ECHO_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for ECHO-224.
+ */
+#define SPH_SIZE_echo224   224
+
+/**
+ * Output size (in bits) for ECHO-256.
+ */
+#define SPH_SIZE_echo256   256
+
+/**
+ * Output size (in bits) for ECHO-384.
+ */
+#define SPH_SIZE_echo384   384
+
+/**
+ * Output size (in bits) for ECHO-512.
+ */
+#define SPH_SIZE_echo512   512
+
+/**
+ * This structure is a context for ECHO computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * an ECHO computation has been performed, the context can be reused for
+ * another computation. This specific structure is used for ECHO-224
+ * and ECHO-256.
+ *
+ * The contents of this structure are private. A running ECHO computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+    unsigned char buf[192];    /* first field, for alignment */
+    size_t ptr;
+    union {
+        sph_u32 Vs[4][4];
+#if SPH_64
+        sph_u64 Vb[4][2];
+#endif
+    } u;
+    sph_u32 C0, C1, C2, C3;
+#endif
+} sph_echo_small_context;
+
+/**
+ * This structure is a context for ECHO computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * an ECHO computation has been performed, the context can be reused for
+ * another computation. This specific structure is used for ECHO-384
+ * and ECHO-512.
+ *
+ * The contents of this structure are private. A running ECHO computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+    unsigned char buf[128];    /* first field, for alignment */
+    size_t ptr;
+    union {
+        sph_u32 Vs[8][4];
+#if SPH_64
+        sph_u64 Vb[8][2];
+#endif
+    } u;
+    sph_u32 C0, C1, C2, C3;
+#endif
+} sph_echo_big_context;
+
+/**
+ * Type for a ECHO-224 context (identical to the common "small" context).
+ */
+typedef sph_echo_small_context sph_echo224_context;
+
+/**
+ * Type for a ECHO-256 context (identical to the common "small" context).
+ */
+typedef sph_echo_small_context sph_echo256_context;
+
+/**
+ * Type for a ECHO-384 context (identical to the common "big" context).
+ */
+typedef sph_echo_big_context sph_echo384_context;
+
+/**
+ * Type for a ECHO-512 context (identical to the common "big" context).
+ */
+typedef sph_echo_big_context sph_echo512_context;
+
+/**
+ * Initialize an ECHO-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the ECHO-224 context (pointer to a
+ *             <code>sph_echo224_context</code>)
+ */
+void sph_echo224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the ECHO-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_echo224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current ECHO-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the ECHO-224 context
+ * @param dst   the destination buffer
+ */
+void sph_echo224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the ECHO-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_echo224_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize an ECHO-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the ECHO-256 context (pointer to a
+ *             <code>sph_echo256_context</code>)
+ */
+void sph_echo256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the ECHO-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_echo256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current ECHO-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the ECHO-256 context
+ * @param dst   the destination buffer
+ */
+void sph_echo256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the ECHO-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_echo256_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize an ECHO-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the ECHO-384 context (pointer to a
+ *             <code>sph_echo384_context</code>)
+ */
+void sph_echo384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the ECHO-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_echo384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current ECHO-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the ECHO-384 context
+ * @param dst   the destination buffer
+ */
+void sph_echo384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the ECHO-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_echo384_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize an ECHO-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the ECHO-512 context (pointer to a
+ *             <code>sph_echo512_context</code>)
+ */
+void sph_echo512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the ECHO-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_echo512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current ECHO-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the ECHO-512 context
+ * @param dst   the destination buffer
+ */
+void sph_echo512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the ECHO-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_echo512_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/sha3/sph_fugue.c b/sha3/sph_fugue.c
new file mode 100644
index 0000000..390d2d1
--- /dev/null
+++ b/sha3/sph_fugue.c
@@ -0,0 +1,1208 @@
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_fugue.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+static const sph_u32 IV224[] = {
+	SPH_C32(0xf4c9120d), SPH_C32(0x6286f757), SPH_C32(0xee39e01c),
+	SPH_C32(0xe074e3cb), SPH_C32(0xa1127c62), SPH_C32(0x9a43d215),
+	SPH_C32(0xbd8d679a)
+};
+
+static const sph_u32 IV256[] = {
+	SPH_C32(0xe952bdde), SPH_C32(0x6671135f), SPH_C32(0xe0d4f668),
+	SPH_C32(0xd2b0b594), SPH_C32(0xf96c621d), SPH_C32(0xfbf929de),
+	SPH_C32(0x9149e899), SPH_C32(0x34f8c248)
+};
+
+static const sph_u32 IV384[] = {
+	SPH_C32(0xaa61ec0d), SPH_C32(0x31252e1f), SPH_C32(0xa01db4c7),
+	SPH_C32(0x00600985), SPH_C32(0x215ef44a), SPH_C32(0x741b5e9c),
+	SPH_C32(0xfa693e9a), SPH_C32(0x473eb040), SPH_C32(0xe502ae8a),
+	SPH_C32(0xa99c25e0), SPH_C32(0xbc95517c), SPH_C32(0x5c1095a1)
+};
+
+static const sph_u32 IV512[] = {
+	SPH_C32(0x8807a57e), SPH_C32(0xe616af75), SPH_C32(0xc5d3e4db),
+	SPH_C32(0xac9ab027), SPH_C32(0xd915f117), SPH_C32(0xb6eecc54),
+	SPH_C32(0x06e8020b), SPH_C32(0x4a92efd1), SPH_C32(0xaac6e2c9),
+	SPH_C32(0xddb21398), SPH_C32(0xcae65838), SPH_C32(0x437f203f),
+	SPH_C32(0x25ea78e7), SPH_C32(0x951fddd6), SPH_C32(0xda6ed11d),
+	SPH_C32(0xe13e3567)
+};
+
+static const sph_u32 mixtab0[] = {
+	SPH_C32(0x63633297), SPH_C32(0x7c7c6feb), SPH_C32(0x77775ec7),
+	SPH_C32(0x7b7b7af7), SPH_C32(0xf2f2e8e5), SPH_C32(0x6b6b0ab7),
+	SPH_C32(0x6f6f16a7), SPH_C32(0xc5c56d39), SPH_C32(0x303090c0),
+	SPH_C32(0x01010704), SPH_C32(0x67672e87), SPH_C32(0x2b2bd1ac),
+	SPH_C32(0xfefeccd5), SPH_C32(0xd7d71371), SPH_C32(0xabab7c9a),
+	SPH_C32(0x767659c3), SPH_C32(0xcaca4005), SPH_C32(0x8282a33e),
+	SPH_C32(0xc9c94909), SPH_C32(0x7d7d68ef), SPH_C32(0xfafad0c5),
+	SPH_C32(0x5959947f), SPH_C32(0x4747ce07), SPH_C32(0xf0f0e6ed),
+	SPH_C32(0xadad6e82), SPH_C32(0xd4d41a7d), SPH_C32(0xa2a243be),
+	SPH_C32(0xafaf608a), SPH_C32(0x9c9cf946), SPH_C32(0xa4a451a6),
+	SPH_C32(0x727245d3), SPH_C32(0xc0c0762d), SPH_C32(0xb7b728ea),
+	SPH_C32(0xfdfdc5d9), SPH_C32(0x9393d47a), SPH_C32(0x2626f298),
+	SPH_C32(0x363682d8), SPH_C32(0x3f3fbdfc), SPH_C32(0xf7f7f3f1),
+	SPH_C32(0xcccc521d), SPH_C32(0x34348cd0), SPH_C32(0xa5a556a2),
+	SPH_C32(0xe5e58db9), SPH_C32(0xf1f1e1e9), SPH_C32(0x71714cdf),
+	SPH_C32(0xd8d83e4d), SPH_C32(0x313197c4), SPH_C32(0x15156b54),
+	SPH_C32(0x04041c10), SPH_C32(0xc7c76331), SPH_C32(0x2323e98c),
+	SPH_C32(0xc3c37f21), SPH_C32(0x18184860), SPH_C32(0x9696cf6e),
+	SPH_C32(0x05051b14), SPH_C32(0x9a9aeb5e), SPH_C32(0x0707151c),
+	SPH_C32(0x12127e48), SPH_C32(0x8080ad36), SPH_C32(0xe2e298a5),
+	SPH_C32(0xebeba781), SPH_C32(0x2727f59c), SPH_C32(0xb2b233fe),
+	SPH_C32(0x757550cf), SPH_C32(0x09093f24), SPH_C32(0x8383a43a),
+	SPH_C32(0x2c2cc4b0), SPH_C32(0x1a1a4668), SPH_C32(0x1b1b416c),
+	SPH_C32(0x6e6e11a3), SPH_C32(0x5a5a9d73), SPH_C32(0xa0a04db6),
+	SPH_C32(0x5252a553), SPH_C32(0x3b3ba1ec), SPH_C32(0xd6d61475),
+	SPH_C32(0xb3b334fa), SPH_C32(0x2929dfa4), SPH_C32(0xe3e39fa1),
+	SPH_C32(0x2f2fcdbc), SPH_C32(0x8484b126), SPH_C32(0x5353a257),
+	SPH_C32(0xd1d10169), SPH_C32(0x00000000), SPH_C32(0xededb599),
+	SPH_C32(0x2020e080), SPH_C32(0xfcfcc2dd), SPH_C32(0xb1b13af2),
+	SPH_C32(0x5b5b9a77), SPH_C32(0x6a6a0db3), SPH_C32(0xcbcb4701),
+	SPH_C32(0xbebe17ce), SPH_C32(0x3939afe4), SPH_C32(0x4a4aed33),
+	SPH_C32(0x4c4cff2b), SPH_C32(0x5858937b), SPH_C32(0xcfcf5b11),
+	SPH_C32(0xd0d0066d), SPH_C32(0xefefbb91), SPH_C32(0xaaaa7b9e),
+	SPH_C32(0xfbfbd7c1), SPH_C32(0x4343d217), SPH_C32(0x4d4df82f),
+	SPH_C32(0x333399cc), SPH_C32(0x8585b622), SPH_C32(0x4545c00f),
+	SPH_C32(0xf9f9d9c9), SPH_C32(0x02020e08), SPH_C32(0x7f7f66e7),
+	SPH_C32(0x5050ab5b), SPH_C32(0x3c3cb4f0), SPH_C32(0x9f9ff04a),
+	SPH_C32(0xa8a87596), SPH_C32(0x5151ac5f), SPH_C32(0xa3a344ba),
+	SPH_C32(0x4040db1b), SPH_C32(0x8f8f800a), SPH_C32(0x9292d37e),
+	SPH_C32(0x9d9dfe42), SPH_C32(0x3838a8e0), SPH_C32(0xf5f5fdf9),
+	SPH_C32(0xbcbc19c6), SPH_C32(0xb6b62fee), SPH_C32(0xdada3045),
+	SPH_C32(0x2121e784), SPH_C32(0x10107040), SPH_C32(0xffffcbd1),
+	SPH_C32(0xf3f3efe1), SPH_C32(0xd2d20865), SPH_C32(0xcdcd5519),
+	SPH_C32(0x0c0c2430), SPH_C32(0x1313794c), SPH_C32(0xececb29d),
+	SPH_C32(0x5f5f8667), SPH_C32(0x9797c86a), SPH_C32(0x4444c70b),
+	SPH_C32(0x1717655c), SPH_C32(0xc4c46a3d), SPH_C32(0xa7a758aa),
+	SPH_C32(0x7e7e61e3), SPH_C32(0x3d3db3f4), SPH_C32(0x6464278b),
+	SPH_C32(0x5d5d886f), SPH_C32(0x19194f64), SPH_C32(0x737342d7),
+	SPH_C32(0x60603b9b), SPH_C32(0x8181aa32), SPH_C32(0x4f4ff627),
+	SPH_C32(0xdcdc225d), SPH_C32(0x2222ee88), SPH_C32(0x2a2ad6a8),
+	SPH_C32(0x9090dd76), SPH_C32(0x88889516), SPH_C32(0x4646c903),
+	SPH_C32(0xeeeebc95), SPH_C32(0xb8b805d6), SPH_C32(0x14146c50),
+	SPH_C32(0xdede2c55), SPH_C32(0x5e5e8163), SPH_C32(0x0b0b312c),
+	SPH_C32(0xdbdb3741), SPH_C32(0xe0e096ad), SPH_C32(0x32329ec8),
+	SPH_C32(0x3a3aa6e8), SPH_C32(0x0a0a3628), SPH_C32(0x4949e43f),
+	SPH_C32(0x06061218), SPH_C32(0x2424fc90), SPH_C32(0x5c5c8f6b),
+	SPH_C32(0xc2c27825), SPH_C32(0xd3d30f61), SPH_C32(0xacac6986),
+	SPH_C32(0x62623593), SPH_C32(0x9191da72), SPH_C32(0x9595c662),
+	SPH_C32(0xe4e48abd), SPH_C32(0x797974ff), SPH_C32(0xe7e783b1),
+	SPH_C32(0xc8c84e0d), SPH_C32(0x373785dc), SPH_C32(0x6d6d18af),
+	SPH_C32(0x8d8d8e02), SPH_C32(0xd5d51d79), SPH_C32(0x4e4ef123),
+	SPH_C32(0xa9a97292), SPH_C32(0x6c6c1fab), SPH_C32(0x5656b943),
+	SPH_C32(0xf4f4fafd), SPH_C32(0xeaeaa085), SPH_C32(0x6565208f),
+	SPH_C32(0x7a7a7df3), SPH_C32(0xaeae678e), SPH_C32(0x08083820),
+	SPH_C32(0xbaba0bde), SPH_C32(0x787873fb), SPH_C32(0x2525fb94),
+	SPH_C32(0x2e2ecab8), SPH_C32(0x1c1c5470), SPH_C32(0xa6a65fae),
+	SPH_C32(0xb4b421e6), SPH_C32(0xc6c66435), SPH_C32(0xe8e8ae8d),
+	SPH_C32(0xdddd2559), SPH_C32(0x747457cb), SPH_C32(0x1f1f5d7c),
+	SPH_C32(0x4b4bea37), SPH_C32(0xbdbd1ec2), SPH_C32(0x8b8b9c1a),
+	SPH_C32(0x8a8a9b1e), SPH_C32(0x70704bdb), SPH_C32(0x3e3ebaf8),
+	SPH_C32(0xb5b526e2), SPH_C32(0x66662983), SPH_C32(0x4848e33b),
+	SPH_C32(0x0303090c), SPH_C32(0xf6f6f4f5), SPH_C32(0x0e0e2a38),
+	SPH_C32(0x61613c9f), SPH_C32(0x35358bd4), SPH_C32(0x5757be47),
+	SPH_C32(0xb9b902d2), SPH_C32(0x8686bf2e), SPH_C32(0xc1c17129),
+	SPH_C32(0x1d1d5374), SPH_C32(0x9e9ef74e), SPH_C32(0xe1e191a9),
+	SPH_C32(0xf8f8decd), SPH_C32(0x9898e556), SPH_C32(0x11117744),
+	SPH_C32(0x696904bf), SPH_C32(0xd9d93949), SPH_C32(0x8e8e870e),
+	SPH_C32(0x9494c166), SPH_C32(0x9b9bec5a), SPH_C32(0x1e1e5a78),
+	SPH_C32(0x8787b82a), SPH_C32(0xe9e9a989), SPH_C32(0xcece5c15),
+	SPH_C32(0x5555b04f), SPH_C32(0x2828d8a0), SPH_C32(0xdfdf2b51),
+	SPH_C32(0x8c8c8906), SPH_C32(0xa1a14ab2), SPH_C32(0x89899212),
+	SPH_C32(0x0d0d2334), SPH_C32(0xbfbf10ca), SPH_C32(0xe6e684b5),
+	SPH_C32(0x4242d513), SPH_C32(0x686803bb), SPH_C32(0x4141dc1f),
+	SPH_C32(0x9999e252), SPH_C32(0x2d2dc3b4), SPH_C32(0x0f0f2d3c),
+	SPH_C32(0xb0b03df6), SPH_C32(0x5454b74b), SPH_C32(0xbbbb0cda),
+	SPH_C32(0x16166258)
+};
+
+static const sph_u32 mixtab1[] = {
+	SPH_C32(0x97636332), SPH_C32(0xeb7c7c6f), SPH_C32(0xc777775e),
+	SPH_C32(0xf77b7b7a), SPH_C32(0xe5f2f2e8), SPH_C32(0xb76b6b0a),
+	SPH_C32(0xa76f6f16), SPH_C32(0x39c5c56d), SPH_C32(0xc0303090),
+	SPH_C32(0x04010107), SPH_C32(0x8767672e), SPH_C32(0xac2b2bd1),
+	SPH_C32(0xd5fefecc), SPH_C32(0x71d7d713), SPH_C32(0x9aabab7c),
+	SPH_C32(0xc3767659), SPH_C32(0x05caca40), SPH_C32(0x3e8282a3),
+	SPH_C32(0x09c9c949), SPH_C32(0xef7d7d68), SPH_C32(0xc5fafad0),
+	SPH_C32(0x7f595994), SPH_C32(0x074747ce), SPH_C32(0xedf0f0e6),
+	SPH_C32(0x82adad6e), SPH_C32(0x7dd4d41a), SPH_C32(0xbea2a243),
+	SPH_C32(0x8aafaf60), SPH_C32(0x469c9cf9), SPH_C32(0xa6a4a451),
+	SPH_C32(0xd3727245), SPH_C32(0x2dc0c076), SPH_C32(0xeab7b728),
+	SPH_C32(0xd9fdfdc5), SPH_C32(0x7a9393d4), SPH_C32(0x982626f2),
+	SPH_C32(0xd8363682), SPH_C32(0xfc3f3fbd), SPH_C32(0xf1f7f7f3),
+	SPH_C32(0x1dcccc52), SPH_C32(0xd034348c), SPH_C32(0xa2a5a556),
+	SPH_C32(0xb9e5e58d), SPH_C32(0xe9f1f1e1), SPH_C32(0xdf71714c),
+	SPH_C32(0x4dd8d83e), SPH_C32(0xc4313197), SPH_C32(0x5415156b),
+	SPH_C32(0x1004041c), SPH_C32(0x31c7c763), SPH_C32(0x8c2323e9),
+	SPH_C32(0x21c3c37f), SPH_C32(0x60181848), SPH_C32(0x6e9696cf),
+	SPH_C32(0x1405051b), SPH_C32(0x5e9a9aeb), SPH_C32(0x1c070715),
+	SPH_C32(0x4812127e), SPH_C32(0x368080ad), SPH_C32(0xa5e2e298),
+	SPH_C32(0x81ebeba7), SPH_C32(0x9c2727f5), SPH_C32(0xfeb2b233),
+	SPH_C32(0xcf757550), SPH_C32(0x2409093f), SPH_C32(0x3a8383a4),
+	SPH_C32(0xb02c2cc4), SPH_C32(0x681a1a46), SPH_C32(0x6c1b1b41),
+	SPH_C32(0xa36e6e11), SPH_C32(0x735a5a9d), SPH_C32(0xb6a0a04d),
+	SPH_C32(0x535252a5), SPH_C32(0xec3b3ba1), SPH_C32(0x75d6d614),
+	SPH_C32(0xfab3b334), SPH_C32(0xa42929df), SPH_C32(0xa1e3e39f),
+	SPH_C32(0xbc2f2fcd), SPH_C32(0x268484b1), SPH_C32(0x575353a2),
+	SPH_C32(0x69d1d101), SPH_C32(0x00000000), SPH_C32(0x99ededb5),
+	SPH_C32(0x802020e0), SPH_C32(0xddfcfcc2), SPH_C32(0xf2b1b13a),
+	SPH_C32(0x775b5b9a), SPH_C32(0xb36a6a0d), SPH_C32(0x01cbcb47),
+	SPH_C32(0xcebebe17), SPH_C32(0xe43939af), SPH_C32(0x334a4aed),
+	SPH_C32(0x2b4c4cff), SPH_C32(0x7b585893), SPH_C32(0x11cfcf5b),
+	SPH_C32(0x6dd0d006), SPH_C32(0x91efefbb), SPH_C32(0x9eaaaa7b),
+	SPH_C32(0xc1fbfbd7), SPH_C32(0x174343d2), SPH_C32(0x2f4d4df8),
+	SPH_C32(0xcc333399), SPH_C32(0x228585b6), SPH_C32(0x0f4545c0),
+	SPH_C32(0xc9f9f9d9), SPH_C32(0x0802020e), SPH_C32(0xe77f7f66),
+	SPH_C32(0x5b5050ab), SPH_C32(0xf03c3cb4), SPH_C32(0x4a9f9ff0),
+	SPH_C32(0x96a8a875), SPH_C32(0x5f5151ac), SPH_C32(0xbaa3a344),
+	SPH_C32(0x1b4040db), SPH_C32(0x0a8f8f80), SPH_C32(0x7e9292d3),
+	SPH_C32(0x429d9dfe), SPH_C32(0xe03838a8), SPH_C32(0xf9f5f5fd),
+	SPH_C32(0xc6bcbc19), SPH_C32(0xeeb6b62f), SPH_C32(0x45dada30),
+	SPH_C32(0x842121e7), SPH_C32(0x40101070), SPH_C32(0xd1ffffcb),
+	SPH_C32(0xe1f3f3ef), SPH_C32(0x65d2d208), SPH_C32(0x19cdcd55),
+	SPH_C32(0x300c0c24), SPH_C32(0x4c131379), SPH_C32(0x9dececb2),
+	SPH_C32(0x675f5f86), SPH_C32(0x6a9797c8), SPH_C32(0x0b4444c7),
+	SPH_C32(0x5c171765), SPH_C32(0x3dc4c46a), SPH_C32(0xaaa7a758),
+	SPH_C32(0xe37e7e61), SPH_C32(0xf43d3db3), SPH_C32(0x8b646427),
+	SPH_C32(0x6f5d5d88), SPH_C32(0x6419194f), SPH_C32(0xd7737342),
+	SPH_C32(0x9b60603b), SPH_C32(0x328181aa), SPH_C32(0x274f4ff6),
+	SPH_C32(0x5ddcdc22), SPH_C32(0x882222ee), SPH_C32(0xa82a2ad6),
+	SPH_C32(0x769090dd), SPH_C32(0x16888895), SPH_C32(0x034646c9),
+	SPH_C32(0x95eeeebc), SPH_C32(0xd6b8b805), SPH_C32(0x5014146c),
+	SPH_C32(0x55dede2c), SPH_C32(0x635e5e81), SPH_C32(0x2c0b0b31),
+	SPH_C32(0x41dbdb37), SPH_C32(0xade0e096), SPH_C32(0xc832329e),
+	SPH_C32(0xe83a3aa6), SPH_C32(0x280a0a36), SPH_C32(0x3f4949e4),
+	SPH_C32(0x18060612), SPH_C32(0x902424fc), SPH_C32(0x6b5c5c8f),
+	SPH_C32(0x25c2c278), SPH_C32(0x61d3d30f), SPH_C32(0x86acac69),
+	SPH_C32(0x93626235), SPH_C32(0x729191da), SPH_C32(0x629595c6),
+	SPH_C32(0xbde4e48a), SPH_C32(0xff797974), SPH_C32(0xb1e7e783),
+	SPH_C32(0x0dc8c84e), SPH_C32(0xdc373785), SPH_C32(0xaf6d6d18),
+	SPH_C32(0x028d8d8e), SPH_C32(0x79d5d51d), SPH_C32(0x234e4ef1),
+	SPH_C32(0x92a9a972), SPH_C32(0xab6c6c1f), SPH_C32(0x435656b9),
+	SPH_C32(0xfdf4f4fa), SPH_C32(0x85eaeaa0), SPH_C32(0x8f656520),
+	SPH_C32(0xf37a7a7d), SPH_C32(0x8eaeae67), SPH_C32(0x20080838),
+	SPH_C32(0xdebaba0b), SPH_C32(0xfb787873), SPH_C32(0x942525fb),
+	SPH_C32(0xb82e2eca), SPH_C32(0x701c1c54), SPH_C32(0xaea6a65f),
+	SPH_C32(0xe6b4b421), SPH_C32(0x35c6c664), SPH_C32(0x8de8e8ae),
+	SPH_C32(0x59dddd25), SPH_C32(0xcb747457), SPH_C32(0x7c1f1f5d),
+	SPH_C32(0x374b4bea), SPH_C32(0xc2bdbd1e), SPH_C32(0x1a8b8b9c),
+	SPH_C32(0x1e8a8a9b), SPH_C32(0xdb70704b), SPH_C32(0xf83e3eba),
+	SPH_C32(0xe2b5b526), SPH_C32(0x83666629), SPH_C32(0x3b4848e3),
+	SPH_C32(0x0c030309), SPH_C32(0xf5f6f6f4), SPH_C32(0x380e0e2a),
+	SPH_C32(0x9f61613c), SPH_C32(0xd435358b), SPH_C32(0x475757be),
+	SPH_C32(0xd2b9b902), SPH_C32(0x2e8686bf), SPH_C32(0x29c1c171),
+	SPH_C32(0x741d1d53), SPH_C32(0x4e9e9ef7), SPH_C32(0xa9e1e191),
+	SPH_C32(0xcdf8f8de), SPH_C32(0x569898e5), SPH_C32(0x44111177),
+	SPH_C32(0xbf696904), SPH_C32(0x49d9d939), SPH_C32(0x0e8e8e87),
+	SPH_C32(0x669494c1), SPH_C32(0x5a9b9bec), SPH_C32(0x781e1e5a),
+	SPH_C32(0x2a8787b8), SPH_C32(0x89e9e9a9), SPH_C32(0x15cece5c),
+	SPH_C32(0x4f5555b0), SPH_C32(0xa02828d8), SPH_C32(0x51dfdf2b),
+	SPH_C32(0x068c8c89), SPH_C32(0xb2a1a14a), SPH_C32(0x12898992),
+	SPH_C32(0x340d0d23), SPH_C32(0xcabfbf10), SPH_C32(0xb5e6e684),
+	SPH_C32(0x134242d5), SPH_C32(0xbb686803), SPH_C32(0x1f4141dc),
+	SPH_C32(0x529999e2), SPH_C32(0xb42d2dc3), SPH_C32(0x3c0f0f2d),
+	SPH_C32(0xf6b0b03d), SPH_C32(0x4b5454b7), SPH_C32(0xdabbbb0c),
+	SPH_C32(0x58161662)
+};
+
+static const sph_u32 mixtab2[] = {
+	SPH_C32(0x32976363), SPH_C32(0x6feb7c7c), SPH_C32(0x5ec77777),
+	SPH_C32(0x7af77b7b), SPH_C32(0xe8e5f2f2), SPH_C32(0x0ab76b6b),
+	SPH_C32(0x16a76f6f), SPH_C32(0x6d39c5c5), SPH_C32(0x90c03030),
+	SPH_C32(0x07040101), SPH_C32(0x2e876767), SPH_C32(0xd1ac2b2b),
+	SPH_C32(0xccd5fefe), SPH_C32(0x1371d7d7), SPH_C32(0x7c9aabab),
+	SPH_C32(0x59c37676), SPH_C32(0x4005caca), SPH_C32(0xa33e8282),
+	SPH_C32(0x4909c9c9), SPH_C32(0x68ef7d7d), SPH_C32(0xd0c5fafa),
+	SPH_C32(0x947f5959), SPH_C32(0xce074747), SPH_C32(0xe6edf0f0),
+	SPH_C32(0x6e82adad), SPH_C32(0x1a7dd4d4), SPH_C32(0x43bea2a2),
+	SPH_C32(0x608aafaf), SPH_C32(0xf9469c9c), SPH_C32(0x51a6a4a4),
+	SPH_C32(0x45d37272), SPH_C32(0x762dc0c0), SPH_C32(0x28eab7b7),
+	SPH_C32(0xc5d9fdfd), SPH_C32(0xd47a9393), SPH_C32(0xf2982626),
+	SPH_C32(0x82d83636), SPH_C32(0xbdfc3f3f), SPH_C32(0xf3f1f7f7),
+	SPH_C32(0x521dcccc), SPH_C32(0x8cd03434), SPH_C32(0x56a2a5a5),
+	SPH_C32(0x8db9e5e5), SPH_C32(0xe1e9f1f1), SPH_C32(0x4cdf7171),
+	SPH_C32(0x3e4dd8d8), SPH_C32(0x97c43131), SPH_C32(0x6b541515),
+	SPH_C32(0x1c100404), SPH_C32(0x6331c7c7), SPH_C32(0xe98c2323),
+	SPH_C32(0x7f21c3c3), SPH_C32(0x48601818), SPH_C32(0xcf6e9696),
+	SPH_C32(0x1b140505), SPH_C32(0xeb5e9a9a), SPH_C32(0x151c0707),
+	SPH_C32(0x7e481212), SPH_C32(0xad368080), SPH_C32(0x98a5e2e2),
+	SPH_C32(0xa781ebeb), SPH_C32(0xf59c2727), SPH_C32(0x33feb2b2),
+	SPH_C32(0x50cf7575), SPH_C32(0x3f240909), SPH_C32(0xa43a8383),
+	SPH_C32(0xc4b02c2c), SPH_C32(0x46681a1a), SPH_C32(0x416c1b1b),
+	SPH_C32(0x11a36e6e), SPH_C32(0x9d735a5a), SPH_C32(0x4db6a0a0),
+	SPH_C32(0xa5535252), SPH_C32(0xa1ec3b3b), SPH_C32(0x1475d6d6),
+	SPH_C32(0x34fab3b3), SPH_C32(0xdfa42929), SPH_C32(0x9fa1e3e3),
+	SPH_C32(0xcdbc2f2f), SPH_C32(0xb1268484), SPH_C32(0xa2575353),
+	SPH_C32(0x0169d1d1), SPH_C32(0x00000000), SPH_C32(0xb599eded),
+	SPH_C32(0xe0802020), SPH_C32(0xc2ddfcfc), SPH_C32(0x3af2b1b1),
+	SPH_C32(0x9a775b5b), SPH_C32(0x0db36a6a), SPH_C32(0x4701cbcb),
+	SPH_C32(0x17cebebe), SPH_C32(0xafe43939), SPH_C32(0xed334a4a),
+	SPH_C32(0xff2b4c4c), SPH_C32(0x937b5858), SPH_C32(0x5b11cfcf),
+	SPH_C32(0x066dd0d0), SPH_C32(0xbb91efef), SPH_C32(0x7b9eaaaa),
+	SPH_C32(0xd7c1fbfb), SPH_C32(0xd2174343), SPH_C32(0xf82f4d4d),
+	SPH_C32(0x99cc3333), SPH_C32(0xb6228585), SPH_C32(0xc00f4545),
+	SPH_C32(0xd9c9f9f9), SPH_C32(0x0e080202), SPH_C32(0x66e77f7f),
+	SPH_C32(0xab5b5050), SPH_C32(0xb4f03c3c), SPH_C32(0xf04a9f9f),
+	SPH_C32(0x7596a8a8), SPH_C32(0xac5f5151), SPH_C32(0x44baa3a3),
+	SPH_C32(0xdb1b4040), SPH_C32(0x800a8f8f), SPH_C32(0xd37e9292),
+	SPH_C32(0xfe429d9d), SPH_C32(0xa8e03838), SPH_C32(0xfdf9f5f5),
+	SPH_C32(0x19c6bcbc), SPH_C32(0x2feeb6b6), SPH_C32(0x3045dada),
+	SPH_C32(0xe7842121), SPH_C32(0x70401010), SPH_C32(0xcbd1ffff),
+	SPH_C32(0xefe1f3f3), SPH_C32(0x0865d2d2), SPH_C32(0x5519cdcd),
+	SPH_C32(0x24300c0c), SPH_C32(0x794c1313), SPH_C32(0xb29decec),
+	SPH_C32(0x86675f5f), SPH_C32(0xc86a9797), SPH_C32(0xc70b4444),
+	SPH_C32(0x655c1717), SPH_C32(0x6a3dc4c4), SPH_C32(0x58aaa7a7),
+	SPH_C32(0x61e37e7e), SPH_C32(0xb3f43d3d), SPH_C32(0x278b6464),
+	SPH_C32(0x886f5d5d), SPH_C32(0x4f641919), SPH_C32(0x42d77373),
+	SPH_C32(0x3b9b6060), SPH_C32(0xaa328181), SPH_C32(0xf6274f4f),
+	SPH_C32(0x225ddcdc), SPH_C32(0xee882222), SPH_C32(0xd6a82a2a),
+	SPH_C32(0xdd769090), SPH_C32(0x95168888), SPH_C32(0xc9034646),
+	SPH_C32(0xbc95eeee), SPH_C32(0x05d6b8b8), SPH_C32(0x6c501414),
+	SPH_C32(0x2c55dede), SPH_C32(0x81635e5e), SPH_C32(0x312c0b0b),
+	SPH_C32(0x3741dbdb), SPH_C32(0x96ade0e0), SPH_C32(0x9ec83232),
+	SPH_C32(0xa6e83a3a), SPH_C32(0x36280a0a), SPH_C32(0xe43f4949),
+	SPH_C32(0x12180606), SPH_C32(0xfc902424), SPH_C32(0x8f6b5c5c),
+	SPH_C32(0x7825c2c2), SPH_C32(0x0f61d3d3), SPH_C32(0x6986acac),
+	SPH_C32(0x35936262), SPH_C32(0xda729191), SPH_C32(0xc6629595),
+	SPH_C32(0x8abde4e4), SPH_C32(0x74ff7979), SPH_C32(0x83b1e7e7),
+	SPH_C32(0x4e0dc8c8), SPH_C32(0x85dc3737), SPH_C32(0x18af6d6d),
+	SPH_C32(0x8e028d8d), SPH_C32(0x1d79d5d5), SPH_C32(0xf1234e4e),
+	SPH_C32(0x7292a9a9), SPH_C32(0x1fab6c6c), SPH_C32(0xb9435656),
+	SPH_C32(0xfafdf4f4), SPH_C32(0xa085eaea), SPH_C32(0x208f6565),
+	SPH_C32(0x7df37a7a), SPH_C32(0x678eaeae), SPH_C32(0x38200808),
+	SPH_C32(0x0bdebaba), SPH_C32(0x73fb7878), SPH_C32(0xfb942525),
+	SPH_C32(0xcab82e2e), SPH_C32(0x54701c1c), SPH_C32(0x5faea6a6),
+	SPH_C32(0x21e6b4b4), SPH_C32(0x6435c6c6), SPH_C32(0xae8de8e8),
+	SPH_C32(0x2559dddd), SPH_C32(0x57cb7474), SPH_C32(0x5d7c1f1f),
+	SPH_C32(0xea374b4b), SPH_C32(0x1ec2bdbd), SPH_C32(0x9c1a8b8b),
+	SPH_C32(0x9b1e8a8a), SPH_C32(0x4bdb7070), SPH_C32(0xbaf83e3e),
+	SPH_C32(0x26e2b5b5), SPH_C32(0x29836666), SPH_C32(0xe33b4848),
+	SPH_C32(0x090c0303), SPH_C32(0xf4f5f6f6), SPH_C32(0x2a380e0e),
+	SPH_C32(0x3c9f6161), SPH_C32(0x8bd43535), SPH_C32(0xbe475757),
+	SPH_C32(0x02d2b9b9), SPH_C32(0xbf2e8686), SPH_C32(0x7129c1c1),
+	SPH_C32(0x53741d1d), SPH_C32(0xf74e9e9e), SPH_C32(0x91a9e1e1),
+	SPH_C32(0xdecdf8f8), SPH_C32(0xe5569898), SPH_C32(0x77441111),
+	SPH_C32(0x04bf6969), SPH_C32(0x3949d9d9), SPH_C32(0x870e8e8e),
+	SPH_C32(0xc1669494), SPH_C32(0xec5a9b9b), SPH_C32(0x5a781e1e),
+	SPH_C32(0xb82a8787), SPH_C32(0xa989e9e9), SPH_C32(0x5c15cece),
+	SPH_C32(0xb04f5555), SPH_C32(0xd8a02828), SPH_C32(0x2b51dfdf),
+	SPH_C32(0x89068c8c), SPH_C32(0x4ab2a1a1), SPH_C32(0x92128989),
+	SPH_C32(0x23340d0d), SPH_C32(0x10cabfbf), SPH_C32(0x84b5e6e6),
+	SPH_C32(0xd5134242), SPH_C32(0x03bb6868), SPH_C32(0xdc1f4141),
+	SPH_C32(0xe2529999), SPH_C32(0xc3b42d2d), SPH_C32(0x2d3c0f0f),
+	SPH_C32(0x3df6b0b0), SPH_C32(0xb74b5454), SPH_C32(0x0cdabbbb),
+	SPH_C32(0x62581616)
+};
+
+static const sph_u32 mixtab3[] = {
+	SPH_C32(0x63329763), SPH_C32(0x7c6feb7c), SPH_C32(0x775ec777),
+	SPH_C32(0x7b7af77b), SPH_C32(0xf2e8e5f2), SPH_C32(0x6b0ab76b),
+	SPH_C32(0x6f16a76f), SPH_C32(0xc56d39c5), SPH_C32(0x3090c030),
+	SPH_C32(0x01070401), SPH_C32(0x672e8767), SPH_C32(0x2bd1ac2b),
+	SPH_C32(0xfeccd5fe), SPH_C32(0xd71371d7), SPH_C32(0xab7c9aab),
+	SPH_C32(0x7659c376), SPH_C32(0xca4005ca), SPH_C32(0x82a33e82),
+	SPH_C32(0xc94909c9), SPH_C32(0x7d68ef7d), SPH_C32(0xfad0c5fa),
+	SPH_C32(0x59947f59), SPH_C32(0x47ce0747), SPH_C32(0xf0e6edf0),
+	SPH_C32(0xad6e82ad), SPH_C32(0xd41a7dd4), SPH_C32(0xa243bea2),
+	SPH_C32(0xaf608aaf), SPH_C32(0x9cf9469c), SPH_C32(0xa451a6a4),
+	SPH_C32(0x7245d372), SPH_C32(0xc0762dc0), SPH_C32(0xb728eab7),
+	SPH_C32(0xfdc5d9fd), SPH_C32(0x93d47a93), SPH_C32(0x26f29826),
+	SPH_C32(0x3682d836), SPH_C32(0x3fbdfc3f), SPH_C32(0xf7f3f1f7),
+	SPH_C32(0xcc521dcc), SPH_C32(0x348cd034), SPH_C32(0xa556a2a5),
+	SPH_C32(0xe58db9e5), SPH_C32(0xf1e1e9f1), SPH_C32(0x714cdf71),
+	SPH_C32(0xd83e4dd8), SPH_C32(0x3197c431), SPH_C32(0x156b5415),
+	SPH_C32(0x041c1004), SPH_C32(0xc76331c7), SPH_C32(0x23e98c23),
+	SPH_C32(0xc37f21c3), SPH_C32(0x18486018), SPH_C32(0x96cf6e96),
+	SPH_C32(0x051b1405), SPH_C32(0x9aeb5e9a), SPH_C32(0x07151c07),
+	SPH_C32(0x127e4812), SPH_C32(0x80ad3680), SPH_C32(0xe298a5e2),
+	SPH_C32(0xeba781eb), SPH_C32(0x27f59c27), SPH_C32(0xb233feb2),
+	SPH_C32(0x7550cf75), SPH_C32(0x093f2409), SPH_C32(0x83a43a83),
+	SPH_C32(0x2cc4b02c), SPH_C32(0x1a46681a), SPH_C32(0x1b416c1b),
+	SPH_C32(0x6e11a36e), SPH_C32(0x5a9d735a), SPH_C32(0xa04db6a0),
+	SPH_C32(0x52a55352), SPH_C32(0x3ba1ec3b), SPH_C32(0xd61475d6),
+	SPH_C32(0xb334fab3), SPH_C32(0x29dfa429), SPH_C32(0xe39fa1e3),
+	SPH_C32(0x2fcdbc2f), SPH_C32(0x84b12684), SPH_C32(0x53a25753),
+	SPH_C32(0xd10169d1), SPH_C32(0x00000000), SPH_C32(0xedb599ed),
+	SPH_C32(0x20e08020), SPH_C32(0xfcc2ddfc), SPH_C32(0xb13af2b1),
+	SPH_C32(0x5b9a775b), SPH_C32(0x6a0db36a), SPH_C32(0xcb4701cb),
+	SPH_C32(0xbe17cebe), SPH_C32(0x39afe439), SPH_C32(0x4aed334a),
+	SPH_C32(0x4cff2b4c), SPH_C32(0x58937b58), SPH_C32(0xcf5b11cf),
+	SPH_C32(0xd0066dd0), SPH_C32(0xefbb91ef), SPH_C32(0xaa7b9eaa),
+	SPH_C32(0xfbd7c1fb), SPH_C32(0x43d21743), SPH_C32(0x4df82f4d),
+	SPH_C32(0x3399cc33), SPH_C32(0x85b62285), SPH_C32(0x45c00f45),
+	SPH_C32(0xf9d9c9f9), SPH_C32(0x020e0802), SPH_C32(0x7f66e77f),
+	SPH_C32(0x50ab5b50), SPH_C32(0x3cb4f03c), SPH_C32(0x9ff04a9f),
+	SPH_C32(0xa87596a8), SPH_C32(0x51ac5f51), SPH_C32(0xa344baa3),
+	SPH_C32(0x40db1b40), SPH_C32(0x8f800a8f), SPH_C32(0x92d37e92),
+	SPH_C32(0x9dfe429d), SPH_C32(0x38a8e038), SPH_C32(0xf5fdf9f5),
+	SPH_C32(0xbc19c6bc), SPH_C32(0xb62feeb6), SPH_C32(0xda3045da),
+	SPH_C32(0x21e78421), SPH_C32(0x10704010), SPH_C32(0xffcbd1ff),
+	SPH_C32(0xf3efe1f3), SPH_C32(0xd20865d2), SPH_C32(0xcd5519cd),
+	SPH_C32(0x0c24300c), SPH_C32(0x13794c13), SPH_C32(0xecb29dec),
+	SPH_C32(0x5f86675f), SPH_C32(0x97c86a97), SPH_C32(0x44c70b44),
+	SPH_C32(0x17655c17), SPH_C32(0xc46a3dc4), SPH_C32(0xa758aaa7),
+	SPH_C32(0x7e61e37e), SPH_C32(0x3db3f43d), SPH_C32(0x64278b64),
+	SPH_C32(0x5d886f5d), SPH_C32(0x194f6419), SPH_C32(0x7342d773),
+	SPH_C32(0x603b9b60), SPH_C32(0x81aa3281), SPH_C32(0x4ff6274f),
+	SPH_C32(0xdc225ddc), SPH_C32(0x22ee8822), SPH_C32(0x2ad6a82a),
+	SPH_C32(0x90dd7690), SPH_C32(0x88951688), SPH_C32(0x46c90346),
+	SPH_C32(0xeebc95ee), SPH_C32(0xb805d6b8), SPH_C32(0x146c5014),
+	SPH_C32(0xde2c55de), SPH_C32(0x5e81635e), SPH_C32(0x0b312c0b),
+	SPH_C32(0xdb3741db), SPH_C32(0xe096ade0), SPH_C32(0x329ec832),
+	SPH_C32(0x3aa6e83a), SPH_C32(0x0a36280a), SPH_C32(0x49e43f49),
+	SPH_C32(0x06121806), SPH_C32(0x24fc9024), SPH_C32(0x5c8f6b5c),
+	SPH_C32(0xc27825c2), SPH_C32(0xd30f61d3), SPH_C32(0xac6986ac),
+	SPH_C32(0x62359362), SPH_C32(0x91da7291), SPH_C32(0x95c66295),
+	SPH_C32(0xe48abde4), SPH_C32(0x7974ff79), SPH_C32(0xe783b1e7),
+	SPH_C32(0xc84e0dc8), SPH_C32(0x3785dc37), SPH_C32(0x6d18af6d),
+	SPH_C32(0x8d8e028d), SPH_C32(0xd51d79d5), SPH_C32(0x4ef1234e),
+	SPH_C32(0xa97292a9), SPH_C32(0x6c1fab6c), SPH_C32(0x56b94356),
+	SPH_C32(0xf4fafdf4), SPH_C32(0xeaa085ea), SPH_C32(0x65208f65),
+	SPH_C32(0x7a7df37a), SPH_C32(0xae678eae), SPH_C32(0x08382008),
+	SPH_C32(0xba0bdeba), SPH_C32(0x7873fb78), SPH_C32(0x25fb9425),
+	SPH_C32(0x2ecab82e), SPH_C32(0x1c54701c), SPH_C32(0xa65faea6),
+	SPH_C32(0xb421e6b4), SPH_C32(0xc66435c6), SPH_C32(0xe8ae8de8),
+	SPH_C32(0xdd2559dd), SPH_C32(0x7457cb74), SPH_C32(0x1f5d7c1f),
+	SPH_C32(0x4bea374b), SPH_C32(0xbd1ec2bd), SPH_C32(0x8b9c1a8b),
+	SPH_C32(0x8a9b1e8a), SPH_C32(0x704bdb70), SPH_C32(0x3ebaf83e),
+	SPH_C32(0xb526e2b5), SPH_C32(0x66298366), SPH_C32(0x48e33b48),
+	SPH_C32(0x03090c03), SPH_C32(0xf6f4f5f6), SPH_C32(0x0e2a380e),
+	SPH_C32(0x613c9f61), SPH_C32(0x358bd435), SPH_C32(0x57be4757),
+	SPH_C32(0xb902d2b9), SPH_C32(0x86bf2e86), SPH_C32(0xc17129c1),
+	SPH_C32(0x1d53741d), SPH_C32(0x9ef74e9e), SPH_C32(0xe191a9e1),
+	SPH_C32(0xf8decdf8), SPH_C32(0x98e55698), SPH_C32(0x11774411),
+	SPH_C32(0x6904bf69), SPH_C32(0xd93949d9), SPH_C32(0x8e870e8e),
+	SPH_C32(0x94c16694), SPH_C32(0x9bec5a9b), SPH_C32(0x1e5a781e),
+	SPH_C32(0x87b82a87), SPH_C32(0xe9a989e9), SPH_C32(0xce5c15ce),
+	SPH_C32(0x55b04f55), SPH_C32(0x28d8a028), SPH_C32(0xdf2b51df),
+	SPH_C32(0x8c89068c), SPH_C32(0xa14ab2a1), SPH_C32(0x89921289),
+	SPH_C32(0x0d23340d), SPH_C32(0xbf10cabf), SPH_C32(0xe684b5e6),
+	SPH_C32(0x42d51342), SPH_C32(0x6803bb68), SPH_C32(0x41dc1f41),
+	SPH_C32(0x99e25299), SPH_C32(0x2dc3b42d), SPH_C32(0x0f2d3c0f),
+	SPH_C32(0xb03df6b0), SPH_C32(0x54b74b54), SPH_C32(0xbb0cdabb),
+	SPH_C32(0x16625816)
+};
+
+#define TIX2(q, x00, x01, x08, x10, x24)   do { \
+		x10 ^= x00; \
+		x00 = (q); \
+		x08 ^= x00; \
+		x01 ^= x24; \
+	} while (0)
+
+#define TIX3(q, x00, x01, x04, x08, x16, x27, x30)   do { \
+		x16 ^= x00; \
+		x00 = (q); \
+		x08 ^= x00; \
+		x01 ^= x27; \
+		x04 ^= x30; \
+	} while (0)
+
+#define TIX4(q, x00, x01, x04, x07, x08, x22, x24, x27, x30)   do { \
+		x22 ^= x00; \
+		x00 = (q); \
+		x08 ^= x00; \
+		x01 ^= x24; \
+		x04 ^= x27; \
+		x07 ^= x30; \
+	} while (0)
+
+#define CMIX30(x00, x01, x02, x04, x05, x06, x15, x16, x17)   do { \
+		x00 ^= x04; \
+		x01 ^= x05; \
+		x02 ^= x06; \
+		x15 ^= x04; \
+		x16 ^= x05; \
+		x17 ^= x06; \
+	} while (0)
+
+#define CMIX36(x00, x01, x02, x04, x05, x06, x18, x19, x20)   do { \
+		x00 ^= x04; \
+		x01 ^= x05; \
+		x02 ^= x06; \
+		x18 ^= x04; \
+		x19 ^= x05; \
+		x20 ^= x06; \
+	} while (0)
+
+#define SMIX(x0, x1, x2, x3)   do { \
+		sph_u32 c0 = 0; \
+		sph_u32 c1 = 0; \
+		sph_u32 c2 = 0; \
+		sph_u32 c3 = 0; \
+		sph_u32 r0 = 0; \
+		sph_u32 r1 = 0; \
+		sph_u32 r2 = 0; \
+		sph_u32 r3 = 0; \
+		sph_u32 tmp; \
+		tmp = mixtab0[x0 >> 24]; \
+		c0 ^= tmp; \
+		tmp = mixtab1[(x0 >> 16) & 0xFF]; \
+		c0 ^= tmp; \
+		r1 ^= tmp; \
+		tmp = mixtab2[(x0 >>  8) & 0xFF]; \
+		c0 ^= tmp; \
+		r2 ^= tmp; \
+		tmp = mixtab3[x0 & 0xFF]; \
+		c0 ^= tmp; \
+		r3 ^= tmp; \
+		tmp = mixtab0[x1 >> 24]; \
+		c1 ^= tmp; \
+		r0 ^= tmp; \
+		tmp = mixtab1[(x1 >> 16) & 0xFF]; \
+		c1 ^= tmp; \
+		tmp = mixtab2[(x1 >>  8) & 0xFF]; \
+		c1 ^= tmp; \
+		r2 ^= tmp; \
+		tmp = mixtab3[x1 & 0xFF]; \
+		c1 ^= tmp; \
+		r3 ^= tmp; \
+		tmp = mixtab0[x2 >> 24]; \
+		c2 ^= tmp; \
+		r0 ^= tmp; \
+		tmp = mixtab1[(x2 >> 16) & 0xFF]; \
+		c2 ^= tmp; \
+		r1 ^= tmp; \
+		tmp = mixtab2[(x2 >>  8) & 0xFF]; \
+		c2 ^= tmp; \
+		tmp = mixtab3[x2 & 0xFF]; \
+		c2 ^= tmp; \
+		r3 ^= tmp; \
+		tmp = mixtab0[x3 >> 24]; \
+		c3 ^= tmp; \
+		r0 ^= tmp; \
+		tmp = mixtab1[(x3 >> 16) & 0xFF]; \
+		c3 ^= tmp; \
+		r1 ^= tmp; \
+		tmp = mixtab2[(x3 >>  8) & 0xFF]; \
+		c3 ^= tmp; \
+		r2 ^= tmp; \
+		tmp = mixtab3[x3 & 0xFF]; \
+		c3 ^= tmp; \
+		x0 = ((c0 ^ r0) & SPH_C32(0xFF000000)) \
+			| ((c1 ^ r1) & SPH_C32(0x00FF0000)) \
+			| ((c2 ^ r2) & SPH_C32(0x0000FF00)) \
+			| ((c3 ^ r3) & SPH_C32(0x000000FF)); \
+		x1 = ((c1 ^ (r0 << 8)) & SPH_C32(0xFF000000)) \
+			| ((c2 ^ (r1 << 8)) & SPH_C32(0x00FF0000)) \
+			| ((c3 ^ (r2 << 8)) & SPH_C32(0x0000FF00)) \
+			| ((c0 ^ (r3 >> 24)) & SPH_C32(0x000000FF)); \
+		x2 = ((c2 ^ (r0 << 16)) & SPH_C32(0xFF000000)) \
+			| ((c3 ^ (r1 << 16)) & SPH_C32(0x00FF0000)) \
+			| ((c0 ^ (r2 >> 16)) & SPH_C32(0x0000FF00)) \
+			| ((c1 ^ (r3 >> 16)) & SPH_C32(0x000000FF)); \
+		x3 = ((c3 ^ (r0 << 24)) & SPH_C32(0xFF000000)) \
+			| ((c0 ^ (r1 >> 8)) & SPH_C32(0x00FF0000)) \
+			| ((c1 ^ (r2 >> 8)) & SPH_C32(0x0000FF00)) \
+			| ((c2 ^ (r3 >> 8)) & SPH_C32(0x000000FF)); \
+		/* */ \
+	} while (0)
+
+#if SPH_FUGUE_NOCOPY
+
+#define DECL_STATE_SMALL
+#define READ_STATE_SMALL(state)
+#define WRITE_STATE_SMALL(state)
+#define DECL_STATE_BIG
+#define READ_STATE_BIG(state)
+#define WRITE_STATE_BIG(state)
+
+#define S00   ((sc)->S[ 0])
+#define S01   ((sc)->S[ 1])
+#define S02   ((sc)->S[ 2])
+#define S03   ((sc)->S[ 3])
+#define S04   ((sc)->S[ 4])
+#define S05   ((sc)->S[ 5])
+#define S06   ((sc)->S[ 6])
+#define S07   ((sc)->S[ 7])
+#define S08   ((sc)->S[ 8])
+#define S09   ((sc)->S[ 9])
+#define S10   ((sc)->S[10])
+#define S11   ((sc)->S[11])
+#define S12   ((sc)->S[12])
+#define S13   ((sc)->S[13])
+#define S14   ((sc)->S[14])
+#define S15   ((sc)->S[15])
+#define S16   ((sc)->S[16])
+#define S17   ((sc)->S[17])
+#define S18   ((sc)->S[18])
+#define S19   ((sc)->S[19])
+#define S20   ((sc)->S[20])
+#define S21   ((sc)->S[21])
+#define S22   ((sc)->S[22])
+#define S23   ((sc)->S[23])
+#define S24   ((sc)->S[24])
+#define S25   ((sc)->S[25])
+#define S26   ((sc)->S[26])
+#define S27   ((sc)->S[27])
+#define S28   ((sc)->S[28])
+#define S29   ((sc)->S[29])
+#define S30   ((sc)->S[30])
+#define S31   ((sc)->S[31])
+#define S32   ((sc)->S[32])
+#define S33   ((sc)->S[33])
+#define S34   ((sc)->S[34])
+#define S35   ((sc)->S[35])
+
+#else
+
+#define DECL_STATE_SMALL \
+	sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; \
+	sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19; \
+	sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29;
+
+#define DECL_STATE_BIG \
+	DECL_STATE_SMALL \
+	sph_u32 S30, S31, S32, S33, S34, S35;
+
+#define READ_STATE_SMALL(state)   do { \
+		S00 = (state)->S[ 0]; \
+		S01 = (state)->S[ 1]; \
+		S02 = (state)->S[ 2]; \
+		S03 = (state)->S[ 3]; \
+		S04 = (state)->S[ 4]; \
+		S05 = (state)->S[ 5]; \
+		S06 = (state)->S[ 6]; \
+		S07 = (state)->S[ 7]; \
+		S08 = (state)->S[ 8]; \
+		S09 = (state)->S[ 9]; \
+		S10 = (state)->S[10]; \
+		S11 = (state)->S[11]; \
+		S12 = (state)->S[12]; \
+		S13 = (state)->S[13]; \
+		S14 = (state)->S[14]; \
+		S15 = (state)->S[15]; \
+		S16 = (state)->S[16]; \
+		S17 = (state)->S[17]; \
+		S18 = (state)->S[18]; \
+		S19 = (state)->S[19]; \
+		S20 = (state)->S[20]; \
+		S21 = (state)->S[21]; \
+		S22 = (state)->S[22]; \
+		S23 = (state)->S[23]; \
+		S24 = (state)->S[24]; \
+		S25 = (state)->S[25]; \
+		S26 = (state)->S[26]; \
+		S27 = (state)->S[27]; \
+		S28 = (state)->S[28]; \
+		S29 = (state)->S[29]; \
+	} while (0)
+
+#define READ_STATE_BIG(state)   do { \
+		READ_STATE_SMALL(state); \
+		S30 = (state)->S[30]; \
+		S31 = (state)->S[31]; \
+		S32 = (state)->S[32]; \
+		S33 = (state)->S[33]; \
+		S34 = (state)->S[34]; \
+		S35 = (state)->S[35]; \
+	} while (0)
+
+#define WRITE_STATE_SMALL(state)   do { \
+		(state)->S[ 0] = S00; \
+		(state)->S[ 1] = S01; \
+		(state)->S[ 2] = S02; \
+		(state)->S[ 3] = S03; \
+		(state)->S[ 4] = S04; \
+		(state)->S[ 5] = S05; \
+		(state)->S[ 6] = S06; \
+		(state)->S[ 7] = S07; \
+		(state)->S[ 8] = S08; \
+		(state)->S[ 9] = S09; \
+		(state)->S[10] = S10; \
+		(state)->S[11] = S11; \
+		(state)->S[12] = S12; \
+		(state)->S[13] = S13; \
+		(state)->S[14] = S14; \
+		(state)->S[15] = S15; \
+		(state)->S[16] = S16; \
+		(state)->S[17] = S17; \
+		(state)->S[18] = S18; \
+		(state)->S[19] = S19; \
+		(state)->S[20] = S20; \
+		(state)->S[21] = S21; \
+		(state)->S[22] = S22; \
+		(state)->S[23] = S23; \
+		(state)->S[24] = S24; \
+		(state)->S[25] = S25; \
+		(state)->S[26] = S26; \
+		(state)->S[27] = S27; \
+		(state)->S[28] = S28; \
+		(state)->S[29] = S29; \
+	} while (0)
+
+#define WRITE_STATE_BIG(state)   do { \
+		WRITE_STATE_SMALL(state); \
+		(state)->S[30] = S30; \
+		(state)->S[31] = S31; \
+		(state)->S[32] = S32; \
+		(state)->S[33] = S33; \
+		(state)->S[34] = S34; \
+		(state)->S[35] = S35; \
+	} while (0)
+
+#endif
+
+static void
+fugue_init(sph_fugue_context *sc, size_t z_len,
+	const sph_u32 *iv, size_t iv_len)
+{
+	size_t u;
+
+	for (u = 0; u < z_len; u ++)
+		sc->S[u] = 0;
+	memcpy(&sc->S[z_len], iv, iv_len * sizeof *iv);
+	sc->partial = 0;
+	sc->partial_len = 0;
+	sc->round_shift = 0;
+#if SPH_64
+	sc->bit_count = 0;
+#else
+	sc->bit_count_high = 0;
+	sc->bit_count_low = 0;
+#endif
+}
+
+#if SPH_64
+
+#define INCR_COUNTER   do { \
+		sc->bit_count += (sph_u64)len << 3; \
+	} while (0)
+
+#else
+
+#define INCR_COUNTER   do { \
+		sph_u32 tmp = SPH_T32((sph_u32)len << 3); \
+		sc->bit_count_low = SPH_T32(sc->bit_count_low + tmp); \
+		if (sc->bit_count_low < tmp) \
+			sc->bit_count_high ++; \
+		sc->bit_count_high = SPH_T32(sc->bit_count_high \
+			+ ((sph_u32)len >> 29)); \
+	} while (0)
+
+#endif
+
+#define CORE_ENTRY \
+	sph_u32 p; \
+	unsigned plen, rshift; \
+	INCR_COUNTER; \
+	p = sc->partial; \
+	plen = sc->partial_len; \
+	if (plen < 4) { \
+		unsigned count = 4 - plen; \
+		if (len < count) \
+			count = len; \
+		plen += count; \
+		while (count -- > 0) { \
+			p = (p << 8) | *(const unsigned char *)data; \
+			data = (const unsigned char *)data + 1; \
+			len --; \
+		} \
+		if (len == 0) { \
+			sc->partial = p; \
+			sc->partial_len = plen; \
+			return; \
+		} \
+	}
+
+#define CORE_EXIT \
+	p = 0; \
+	sc->partial_len = (unsigned)len; \
+	while (len -- > 0) { \
+		p = (p << 8) | *(const unsigned char *)data; \
+		data = (const unsigned char *)data + 1; \
+	} \
+	sc->partial = p; \
+	sc->round_shift = rshift;
+
+/*
+ * Not in a do..while: the 'break' must exit the outer loop.
+ */
+#define NEXT(rc) \
+	if (len <= 4) { \
+		rshift = (rc); \
+		break; \
+	} \
+	p = sph_dec32be(data); \
+	data = (const unsigned char *)data + 4; \
+	len -= 4
+
+static void
+fugue2_core(sph_fugue_context *sc, const void *data, size_t len)
+{
+	DECL_STATE_SMALL
+	CORE_ENTRY
+	READ_STATE_SMALL(sc);
+	rshift = sc->round_shift;
+	switch (rshift) {
+		for (;;) {
+			sph_u32 q;
+
+		case 0:
+			q = p;
+			TIX2(q, S00, S01, S08, S10, S24);
+			CMIX30(S27, S28, S29, S01, S02, S03, S12, S13, S14);
+			SMIX(S27, S28, S29, S00);
+			CMIX30(S24, S25, S26, S28, S29, S00, S09, S10, S11);
+			SMIX(S24, S25, S26, S27);
+			NEXT(1);
+			/* fall through */
+		case 1:
+			q = p;
+			TIX2(q, S24, S25, S02, S04, S18);
+			CMIX30(S21, S22, S23, S25, S26, S27, S06, S07, S08);
+			SMIX(S21, S22, S23, S24);
+			CMIX30(S18, S19, S20, S22, S23, S24, S03, S04, S05);
+			SMIX(S18, S19, S20, S21);
+			NEXT(2);
+			/* fall through */
+		case 2:
+			q = p;
+			TIX2(q, S18, S19, S26, S28, S12);
+			CMIX30(S15, S16, S17, S19, S20, S21, S00, S01, S02);
+			SMIX(S15, S16, S17, S18);
+			CMIX30(S12, S13, S14, S16, S17, S18, S27, S28, S29);
+			SMIX(S12, S13, S14, S15);
+			NEXT(3);
+			/* fall through */
+		case 3:
+			q = p;
+			TIX2(q, S12, S13, S20, S22, S06);
+			CMIX30(S09, S10, S11, S13, S14, S15, S24, S25, S26);
+			SMIX(S09, S10, S11, S12);
+			CMIX30(S06, S07, S08, S10, S11, S12, S21, S22, S23);
+			SMIX(S06, S07, S08, S09);
+			NEXT(4);
+			/* fall through */
+		case 4:
+			q = p;
+			TIX2(q, S06, S07, S14, S16, S00);
+			CMIX30(S03, S04, S05, S07, S08, S09, S18, S19, S20);
+			SMIX(S03, S04, S05, S06);
+			CMIX30(S00, S01, S02, S04, S05, S06, S15, S16, S17);
+			SMIX(S00, S01, S02, S03);
+			NEXT(0);
+		}
+	}
+	CORE_EXIT
+	WRITE_STATE_SMALL(sc);
+}
+
+static void
+fugue3_core(sph_fugue_context *sc, const void *data, size_t len)
+{
+	DECL_STATE_BIG
+	CORE_ENTRY
+	READ_STATE_BIG(sc);
+	rshift = sc->round_shift;
+	switch (rshift) {
+		for (;;) {
+			sph_u32 q;
+
+		case 0:
+			q = p;
+			TIX3(q, S00, S01, S04, S08, S16, S27, S30);
+			CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17);
+			SMIX(S33, S34, S35, S00);
+			CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14);
+			SMIX(S30, S31, S32, S33);
+			CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11);
+			SMIX(S27, S28, S29, S30);
+			NEXT(1);
+			/* fall through */
+		case 1:
+			q = p;
+			TIX3(q, S27, S28, S31, S35, S07, S18, S21);
+			CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08);
+			SMIX(S24, S25, S26, S27);
+			CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05);
+			SMIX(S21, S22, S23, S24);
+			CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02);
+			SMIX(S18, S19, S20, S21);
+			NEXT(2);
+			/* fall through */
+		case 2:
+			q = p;
+			TIX3(q, S18, S19, S22, S26, S34, S09, S12);
+			CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35);
+			SMIX(S15, S16, S17, S18);
+			CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32);
+			SMIX(S12, S13, S14, S15);
+			CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29);
+			SMIX(S09, S10, S11, S12);
+			NEXT(3);
+			/* fall through */
+		case 3:
+			q = p;
+			TIX3(q, S09, S10, S13, S17, S25, S00, S03);
+			CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26);
+			SMIX(S06, S07, S08, S09);
+			CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23);
+			SMIX(S03, S04, S05, S06);
+			CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
+			SMIX(S00, S01, S02, S03);
+			NEXT(0);
+		}
+	}
+	CORE_EXIT
+	WRITE_STATE_BIG(sc);
+}
+
+static void
+fugue4_core(sph_fugue_context *sc, const void *data, size_t len)
+{
+	DECL_STATE_BIG
+	CORE_ENTRY
+	READ_STATE_BIG(sc);
+	rshift = sc->round_shift;
+	switch (rshift) {
+		for (;;) {
+			sph_u32 q;
+
+		case 0:
+			q = p;
+			TIX4(q, S00, S01, S04, S07, S08, S22, S24, S27, S30);
+			CMIX36(S33, S34, S35, S01, S02, S03, S15, S16, S17);
+			SMIX(S33, S34, S35, S00);
+			CMIX36(S30, S31, S32, S34, S35, S00, S12, S13, S14);
+			SMIX(S30, S31, S32, S33);
+			CMIX36(S27, S28, S29, S31, S32, S33, S09, S10, S11);
+			SMIX(S27, S28, S29, S30);
+			CMIX36(S24, S25, S26, S28, S29, S30, S06, S07, S08);
+			SMIX(S24, S25, S26, S27);
+			NEXT(1);
+			/* fall through */
+		case 1:
+			q = p;
+			TIX4(q, S24, S25, S28, S31, S32, S10, S12, S15, S18);
+			CMIX36(S21, S22, S23, S25, S26, S27, S03, S04, S05);
+			SMIX(S21, S22, S23, S24);
+			CMIX36(S18, S19, S20, S22, S23, S24, S00, S01, S02);
+			SMIX(S18, S19, S20, S21);
+			CMIX36(S15, S16, S17, S19, S20, S21, S33, S34, S35);
+			SMIX(S15, S16, S17, S18);
+			CMIX36(S12, S13, S14, S16, S17, S18, S30, S31, S32);
+			SMIX(S12, S13, S14, S15);
+			NEXT(2);
+			/* fall through */
+		case 2:
+			q = p;
+			TIX4(q, S12, S13, S16, S19, S20, S34, S00, S03, S06);
+			CMIX36(S09, S10, S11, S13, S14, S15, S27, S28, S29);
+			SMIX(S09, S10, S11, S12);
+			CMIX36(S06, S07, S08, S10, S11, S12, S24, S25, S26);
+			SMIX(S06, S07, S08, S09);
+			CMIX36(S03, S04, S05, S07, S08, S09, S21, S22, S23);
+			SMIX(S03, S04, S05, S06);
+			CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
+			SMIX(S00, S01, S02, S03);
+			NEXT(0);
+		}
+	}
+	CORE_EXIT
+	WRITE_STATE_BIG(sc);
+}
+
+#if SPH_64
+
+#define WRITE_COUNTER   do { \
+		sph_enc64be(buf + 4, sc->bit_count + n); \
+	} while (0)
+
+#else
+
+#define WRITE_COUNTER   do { \
+		sph_enc32be(buf + 4, sc->bit_count_high); \
+		sph_enc32be(buf + 8, sc->bit_count_low + n); \
+	} while (0)
+
+#endif
+
+#define CLOSE_ENTRY(s, rcm, core) \
+	unsigned char buf[16]; \
+	unsigned plen, rms; \
+	unsigned char *out; \
+	sph_u32 S[s]; \
+	plen = sc->partial_len; \
+	WRITE_COUNTER; \
+	if (plen == 0 && n == 0) { \
+		plen = 4; \
+	} else if (plen < 4 || n != 0) { \
+		unsigned u; \
+ \
+		if (plen == 4) \
+			plen = 0; \
+		buf[plen] = ub & ~(0xFFU >> n); \
+		for (u = plen + 1; u < 4; u ++) \
+			buf[u] = 0; \
+	} \
+	core(sc, buf + plen, (sizeof buf) - plen); \
+	rms = sc->round_shift * (rcm); \
+	memcpy(S, sc->S + (s) - rms, rms * sizeof(sph_u32)); \
+	memcpy(S + rms, sc->S, ((s) - rms) * sizeof(sph_u32));
+
+#define ROR(n, s)   do { \
+		sph_u32 tmp[n]; \
+		memcpy(tmp, S + ((s) - (n)), (n) * sizeof(sph_u32)); \
+		memmove(S + (n), S, ((s) - (n)) * sizeof(sph_u32)); \
+		memcpy(S, tmp, (n) * sizeof(sph_u32)); \
+	} while (0)
+
+static void
+fugue2_close(sph_fugue_context *sc, unsigned ub, unsigned n,
+	void *dst, size_t out_size_w32)
+{
+	int i;
+
+	CLOSE_ENTRY(30, 6, fugue2_core)
+	for (i = 0; i < 10; i ++) {
+		ROR(3, 30);
+		CMIX30(S[0], S[1], S[2], S[4], S[5], S[6], S[15], S[16], S[17]);
+		SMIX(S[0], S[1], S[2], S[3]);
+	}
+	for (i = 0; i < 13; i ++) {
+		S[4] ^= S[0];
+		S[15] ^= S[0];
+		ROR(15, 30);
+		SMIX(S[0], S[1], S[2], S[3]);
+		S[4] ^= S[0];
+		S[16] ^= S[0];
+		ROR(14, 30);
+		SMIX(S[0], S[1], S[2], S[3]);
+	}
+	S[4] ^= S[0];
+	S[15] ^= S[0];
+	out = dst;
+	sph_enc32be(out +  0, S[ 1]);
+	sph_enc32be(out +  4, S[ 2]);
+	sph_enc32be(out +  8, S[ 3]);
+	sph_enc32be(out + 12, S[ 4]);
+	sph_enc32be(out + 16, S[15]);
+	sph_enc32be(out + 20, S[16]);
+	sph_enc32be(out + 24, S[17]);
+	if (out_size_w32 == 8) {
+		sph_enc32be(out + 28, S[18]);
+		sph_fugue256_init(sc);
+	} else {
+		sph_fugue224_init(sc);
+	}
+}
+
+static void
+fugue3_close(sph_fugue_context *sc, unsigned ub, unsigned n, void *dst)
+{
+	int i;
+
+	CLOSE_ENTRY(36, 9, fugue3_core)
+	for (i = 0; i < 18; i ++) {
+		ROR(3, 36);
+		CMIX36(S[0], S[1], S[2], S[4], S[5], S[6], S[18], S[19], S[20]);
+		SMIX(S[0], S[1], S[2], S[3]);
+	}
+	for (i = 0; i < 13; i ++) {
+		S[4] ^= S[0];
+		S[12] ^= S[0];
+		S[24] ^= S[0];
+		ROR(12, 36);
+		SMIX(S[0], S[1], S[2], S[3]);
+		S[4] ^= S[0];
+		S[13] ^= S[0];
+		S[24] ^= S[0];
+		ROR(12, 36);
+		SMIX(S[0], S[1], S[2], S[3]);
+		S[4] ^= S[0];
+		S[13] ^= S[0];
+		S[25] ^= S[0];
+		ROR(11, 36);
+		SMIX(S[0], S[1], S[2], S[3]);
+	}
+	S[4] ^= S[0];
+	S[12] ^= S[0];
+	S[24] ^= S[0];
+	out = dst;
+	sph_enc32be(out +  0, S[ 1]);
+	sph_enc32be(out +  4, S[ 2]);
+	sph_enc32be(out +  8, S[ 3]);
+	sph_enc32be(out + 12, S[ 4]);
+	sph_enc32be(out + 16, S[12]);
+	sph_enc32be(out + 20, S[13]);
+	sph_enc32be(out + 24, S[14]);
+	sph_enc32be(out + 28, S[15]);
+	sph_enc32be(out + 32, S[24]);
+	sph_enc32be(out + 36, S[25]);
+	sph_enc32be(out + 40, S[26]);
+	sph_enc32be(out + 44, S[27]);
+	sph_fugue384_init(sc);
+}
+
+static void
+fugue4_close(sph_fugue_context *sc, unsigned ub, unsigned n, void *dst)
+{
+	int i;
+
+	CLOSE_ENTRY(36, 12, fugue4_core)
+	for (i = 0; i < 32; i ++) {
+		ROR(3, 36);
+		CMIX36(S[0], S[1], S[2], S[4], S[5], S[6], S[18], S[19], S[20]);
+		SMIX(S[0], S[1], S[2], S[3]);
+	}
+	for (i = 0; i < 13; i ++) {
+		S[4] ^= S[0];
+		S[9] ^= S[0];
+		S[18] ^= S[0];
+		S[27] ^= S[0];
+		ROR(9, 36);
+		SMIX(S[0], S[1], S[2], S[3]);
+		S[4] ^= S[0];
+		S[10] ^= S[0];
+		S[18] ^= S[0];
+		S[27] ^= S[0];
+		ROR(9, 36);
+		SMIX(S[0], S[1], S[2], S[3]);
+		S[4] ^= S[0];
+		S[10] ^= S[0];
+		S[19] ^= S[0];
+		S[27] ^= S[0];
+		ROR(9, 36);
+		SMIX(S[0], S[1], S[2], S[3]);
+		S[4] ^= S[0];
+		S[10] ^= S[0];
+		S[19] ^= S[0];
+		S[28] ^= S[0];
+		ROR(8, 36);
+		SMIX(S[0], S[1], S[2], S[3]);
+	}
+	S[4] ^= S[0];
+	S[9] ^= S[0];
+	S[18] ^= S[0];
+	S[27] ^= S[0];
+	out = dst;
+	sph_enc32be(out +  0, S[ 1]);
+	sph_enc32be(out +  4, S[ 2]);
+	sph_enc32be(out +  8, S[ 3]);
+	sph_enc32be(out + 12, S[ 4]);
+	sph_enc32be(out + 16, S[ 9]);
+	sph_enc32be(out + 20, S[10]);
+	sph_enc32be(out + 24, S[11]);
+	sph_enc32be(out + 28, S[12]);
+	sph_enc32be(out + 32, S[18]);
+	sph_enc32be(out + 36, S[19]);
+	sph_enc32be(out + 40, S[20]);
+	sph_enc32be(out + 44, S[21]);
+	sph_enc32be(out + 48, S[27]);
+	sph_enc32be(out + 52, S[28]);
+	sph_enc32be(out + 56, S[29]);
+	sph_enc32be(out + 60, S[30]);
+	sph_fugue512_init(sc);
+}
+
+void
+sph_fugue224_init(void *cc)
+{
+	fugue_init(cc, 23, IV224, 7);
+}
+
+void
+sph_fugue224(void *cc, const void *data, size_t len)
+{
+	fugue2_core(cc, data, len);
+}
+
+void
+sph_fugue224_close(void *cc, void *dst)
+{
+	fugue2_close(cc, 0, 0, dst, 7);
+}
+
+void
+sph_fugue224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	fugue2_close(cc, ub, n, dst, 7);
+}
+
+void
+sph_fugue256_init(void *cc)
+{
+	fugue_init(cc, 22, IV256, 8);
+}
+
+void
+sph_fugue256(void *cc, const void *data, size_t len)
+{
+	fugue2_core(cc, data, len);
+}
+
+void
+sph_fugue256_close(void *cc, void *dst)
+{
+	fugue2_close(cc, 0, 0, dst, 8);
+}
+
+void
+sph_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	fugue2_close(cc, ub, n, dst, 8);
+}
+
+void
+sph_fugue384_init(void *cc)
+{
+	fugue_init(cc, 24, IV384, 12);
+}
+
+void
+sph_fugue384(void *cc, const void *data, size_t len)
+{
+	fugue3_core(cc, data, len);
+}
+
+void
+sph_fugue384_close(void *cc, void *dst)
+{
+	fugue3_close(cc, 0, 0, dst);
+}
+
+void
+sph_fugue384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	fugue3_close(cc, ub, n, dst);
+}
+
+void
+sph_fugue512_init(void *cc)
+{
+	fugue_init(cc, 20, IV512, 16);
+}
+
+void
+sph_fugue512(void *cc, const void *data, size_t len)
+{
+	fugue4_core(cc, data, len);
+}
+
+void
+sph_fugue512_close(void *cc, void *dst)
+{
+	fugue4_close(cc, 0, 0, dst);
+}
+
+void
+sph_fugue512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	fugue4_close(cc, ub, n, dst);
+}
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/sha3/sph_fugue.h b/sha3/sph_fugue.h
new file mode 100644
index 0000000..6939668
--- /dev/null
+++ b/sha3/sph_fugue.h
@@ -0,0 +1,81 @@
+#ifndef SPH_FUGUE_H__
+#define SPH_FUGUE_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#define SPH_SIZE_fugue224   224
+
+#define SPH_SIZE_fugue256   256
+
+#define SPH_SIZE_fugue384   384
+
+#define SPH_SIZE_fugue512   512
+
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	sph_u32 partial;
+	unsigned partial_len;
+	unsigned round_shift;
+	sph_u32 S[36];
+#if SPH_64
+	sph_u64 bit_count;
+#else
+	sph_u32 bit_count_high, bit_count_low;
+#endif
+#endif
+} sph_fugue_context;
+
+typedef sph_fugue_context sph_fugue224_context;
+
+typedef sph_fugue_context sph_fugue256_context;
+
+typedef sph_fugue_context sph_fugue384_context;
+
+typedef sph_fugue_context sph_fugue512_context;
+
+void sph_fugue224_init(void *cc);
+
+void sph_fugue224(void *cc, const void *data, size_t len);
+
+void sph_fugue224_close(void *cc, void *dst);
+
+void sph_fugue224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+void sph_fugue256_init(void *cc);
+
+void sph_fugue256(void *cc, const void *data, size_t len);
+
+void sph_fugue256_close(void *cc, void *dst);
+
+void sph_fugue256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+void sph_fugue384_init(void *cc);
+
+void sph_fugue384(void *cc, const void *data, size_t len);
+
+void sph_fugue384_close(void *cc, void *dst);
+
+void sph_fugue384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+void sph_fugue512_init(void *cc);
+
+void sph_fugue512(void *cc, const void *data, size_t len);
+
+void sph_fugue512_close(void *cc, void *dst);
+
+void sph_fugue512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/sha3/sph_jh.c b/sha3/sph_jh.c
new file mode 100644
index 0000000..9f624f7
--- /dev/null
+++ b/sha3/sph_jh.c
@@ -0,0 +1,1116 @@
+/* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */
+/*
+ * JH implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_jh.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_JH
+#define SPH_SMALL_FOOTPRINT_JH   1
+#endif
+
+#if !defined SPH_JH_64 && SPH_64_TRUE
+#define SPH_JH_64   1
+#endif
+
+#if !SPH_64
+#undef SPH_JH_64
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+/*
+ * The internal bitslice representation may use either big-endian or
+ * little-endian (true bitslice operations do not care about the bit
+ * ordering, and the bit-swapping linear operations in JH happen to
+ * be invariant through endianness-swapping). The constants must be
+ * defined according to the chosen endianness; we use some
+ * byte-swapping macros for that.
+ */
+
+#if SPH_LITTLE_ENDIAN
+
+#define C32e(x)     ((SPH_C32(x) >> 24) \
+                    | ((SPH_C32(x) >>  8) & SPH_C32(0x0000FF00)) \
+                    | ((SPH_C32(x) <<  8) & SPH_C32(0x00FF0000)) \
+                    | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
+#define dec32e_aligned   sph_dec32le_aligned
+#define enc32e           sph_enc32le
+
+#if SPH_64
+#define C64e(x)     ((SPH_C64(x) >> 56) \
+                    | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
+                    | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
+                    | ((SPH_C64(x) >>  8) & SPH_C64(0x00000000FF000000)) \
+                    | ((SPH_C64(x) <<  8) & SPH_C64(0x000000FF00000000)) \
+                    | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
+                    | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
+                    | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
+#define dec64e_aligned   sph_dec64le_aligned
+#define enc64e           sph_enc64le
+#endif
+
+#else
+
+#define C32e(x)     SPH_C32(x)
+#define dec32e_aligned   sph_dec32be_aligned
+#define enc32e           sph_enc32be
+#if SPH_64
+#define C64e(x)     SPH_C64(x)
+#define dec64e_aligned   sph_dec64be_aligned
+#define enc64e           sph_enc64be
+#endif
+
+#endif
+
+#define Sb(x0, x1, x2, x3, c)   do { \
+		x3 = ~x3; \
+		x0 ^= (c) & ~x2; \
+		tmp = (c) ^ (x0 & x1); \
+		x0 ^= x2 & x3; \
+		x3 ^= ~x1 & x2; \
+		x1 ^= x0 & x2; \
+		x2 ^= x0 & ~x3; \
+		x0 ^= x1 | x3; \
+		x3 ^= x1 & x2; \
+		x1 ^= tmp & x0; \
+		x2 ^= tmp; \
+	} while (0)
+
+#define Lb(x0, x1, x2, x3, x4, x5, x6, x7)   do { \
+		x4 ^= x1; \
+		x5 ^= x2; \
+		x6 ^= x3 ^ x0; \
+		x7 ^= x0; \
+		x0 ^= x5; \
+		x1 ^= x6; \
+		x2 ^= x7 ^ x4; \
+		x3 ^= x4; \
+	} while (0)
+
+#if SPH_JH_64
+
+static const sph_u64 C[] = {
+	C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
+	C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
+	C64e(0xea983ae05c45fa9c), C64e(0x03c5d29966b2999a),
+	C64e(0x660296b4f2bb538a), C64e(0xb556141a88dba231),
+	C64e(0x03a35a5c9a190edb), C64e(0x403fb20a87c14410),
+	C64e(0x1c051980849e951d), C64e(0x6f33ebad5ee7cddc),
+	C64e(0x10ba139202bf6b41), C64e(0xdc786515f7bb27d0),
+	C64e(0x0a2c813937aa7850), C64e(0x3f1abfd2410091d3),
+	C64e(0x422d5a0df6cc7e90), C64e(0xdd629f9c92c097ce),
+	C64e(0x185ca70bc72b44ac), C64e(0xd1df65d663c6fc23),
+	C64e(0x976e6c039ee0b81a), C64e(0x2105457e446ceca8),
+	C64e(0xeef103bb5d8e61fa), C64e(0xfd9697b294838197),
+	C64e(0x4a8e8537db03302f), C64e(0x2a678d2dfb9f6a95),
+	C64e(0x8afe7381f8b8696c), C64e(0x8ac77246c07f4214),
+	C64e(0xc5f4158fbdc75ec4), C64e(0x75446fa78f11bb80),
+	C64e(0x52de75b7aee488bc), C64e(0x82b8001e98a6a3f4),
+	C64e(0x8ef48f33a9a36315), C64e(0xaa5f5624d5b7f989),
+	C64e(0xb6f1ed207c5ae0fd), C64e(0x36cae95a06422c36),
+	C64e(0xce2935434efe983d), C64e(0x533af974739a4ba7),
+	C64e(0xd0f51f596f4e8186), C64e(0x0e9dad81afd85a9f),
+	C64e(0xa7050667ee34626a), C64e(0x8b0b28be6eb91727),
+	C64e(0x47740726c680103f), C64e(0xe0a07e6fc67e487b),
+	C64e(0x0d550aa54af8a4c0), C64e(0x91e3e79f978ef19e),
+	C64e(0x8676728150608dd4), C64e(0x7e9e5a41f3e5b062),
+	C64e(0xfc9f1fec4054207a), C64e(0xe3e41a00cef4c984),
+	C64e(0x4fd794f59dfa95d8), C64e(0x552e7e1124c354a5),
+	C64e(0x5bdf7228bdfe6e28), C64e(0x78f57fe20fa5c4b2),
+	C64e(0x05897cefee49d32e), C64e(0x447e9385eb28597f),
+	C64e(0x705f6937b324314a), C64e(0x5e8628f11dd6e465),
+	C64e(0xc71b770451b920e7), C64e(0x74fe43e823d4878a),
+	C64e(0x7d29e8a3927694f2), C64e(0xddcb7a099b30d9c1),
+	C64e(0x1d1b30fb5bdc1be0), C64e(0xda24494ff29c82bf),
+	C64e(0xa4e7ba31b470bfff), C64e(0x0d324405def8bc48),
+	C64e(0x3baefc3253bbd339), C64e(0x459fc3c1e0298ba0),
+	C64e(0xe5c905fdf7ae090f), C64e(0x947034124290f134),
+	C64e(0xa271b701e344ed95), C64e(0xe93b8e364f2f984a),
+	C64e(0x88401d63a06cf615), C64e(0x47c1444b8752afff),
+	C64e(0x7ebb4af1e20ac630), C64e(0x4670b6c5cc6e8ce6),
+	C64e(0xa4d5a456bd4fca00), C64e(0xda9d844bc83e18ae),
+	C64e(0x7357ce453064d1ad), C64e(0xe8a6ce68145c2567),
+	C64e(0xa3da8cf2cb0ee116), C64e(0x33e906589a94999a),
+	C64e(0x1f60b220c26f847b), C64e(0xd1ceac7fa0d18518),
+	C64e(0x32595ba18ddd19d3), C64e(0x509a1cc0aaa5b446),
+	C64e(0x9f3d6367e4046bba), C64e(0xf6ca19ab0b56ee7e),
+	C64e(0x1fb179eaa9282174), C64e(0xe9bdf7353b3651ee),
+	C64e(0x1d57ac5a7550d376), C64e(0x3a46c2fea37d7001),
+	C64e(0xf735c1af98a4d842), C64e(0x78edec209e6b6779),
+	C64e(0x41836315ea3adba8), C64e(0xfac33b4d32832c83),
+	C64e(0xa7403b1f1c2747f3), C64e(0x5940f034b72d769a),
+	C64e(0xe73e4e6cd2214ffd), C64e(0xb8fd8d39dc5759ef),
+	C64e(0x8d9b0c492b49ebda), C64e(0x5ba2d74968f3700d),
+	C64e(0x7d3baed07a8d5584), C64e(0xf5a5e9f0e4f88e65),
+	C64e(0xa0b8a2f436103b53), C64e(0x0ca8079e753eec5a),
+	C64e(0x9168949256e8884f), C64e(0x5bb05c55f8babc4c),
+	C64e(0xe3bb3b99f387947b), C64e(0x75daf4d6726b1c5d),
+	C64e(0x64aeac28dc34b36d), C64e(0x6c34a550b828db71),
+	C64e(0xf861e2f2108d512a), C64e(0xe3db643359dd75fc),
+	C64e(0x1cacbcf143ce3fa2), C64e(0x67bbd13c02e843b0),
+	C64e(0x330a5bca8829a175), C64e(0x7f34194db416535c),
+	C64e(0x923b94c30e794d1e), C64e(0x797475d7b6eeaf3f),
+	C64e(0xeaa8d4f7be1a3921), C64e(0x5cf47e094c232751),
+	C64e(0x26a32453ba323cd2), C64e(0x44a3174a6da6d5ad),
+	C64e(0xb51d3ea6aff2c908), C64e(0x83593d98916b3c56),
+	C64e(0x4cf87ca17286604d), C64e(0x46e23ecc086ec7f6),
+	C64e(0x2f9833b3b1bc765e), C64e(0x2bd666a5efc4e62a),
+	C64e(0x06f4b6e8bec1d436), C64e(0x74ee8215bcef2163),
+	C64e(0xfdc14e0df453c969), C64e(0xa77d5ac406585826),
+	C64e(0x7ec1141606e0fa16), C64e(0x7e90af3d28639d3f),
+	C64e(0xd2c9f2e3009bd20c), C64e(0x5faace30b7d40c30),
+	C64e(0x742a5116f2e03298), C64e(0x0deb30d8e3cef89a),
+	C64e(0x4bc59e7bb5f17992), C64e(0xff51e66e048668d3),
+	C64e(0x9b234d57e6966731), C64e(0xcce6a6f3170a7505),
+	C64e(0xb17681d913326cce), C64e(0x3c175284f805a262),
+	C64e(0xf42bcbb378471547), C64e(0xff46548223936a48),
+	C64e(0x38df58074e5e6565), C64e(0xf2fc7c89fc86508e),
+	C64e(0x31702e44d00bca86), C64e(0xf04009a23078474e),
+	C64e(0x65a0ee39d1f73883), C64e(0xf75ee937e42c3abd),
+	C64e(0x2197b2260113f86f), C64e(0xa344edd1ef9fdee7),
+	C64e(0x8ba0df15762592d9), C64e(0x3c85f7f612dc42be),
+	C64e(0xd8a7ec7cab27b07e), C64e(0x538d7ddaaa3ea8de),
+	C64e(0xaa25ce93bd0269d8), C64e(0x5af643fd1a7308f9),
+	C64e(0xc05fefda174a19a5), C64e(0x974d66334cfd216a),
+	C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
+	C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
+};
+
+#define Ceven_hi(r)   (C[((r) << 2) + 0])
+#define Ceven_lo(r)   (C[((r) << 2) + 1])
+#define Codd_hi(r)    (C[((r) << 2) + 2])
+#define Codd_lo(r)    (C[((r) << 2) + 3])
+
+#define S(x0, x1, x2, x3, cb, r)   do { \
+		Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \
+		Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \
+	} while (0)
+
+#define L(x0, x1, x2, x3, x4, x5, x6, x7)   do { \
+		Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \
+			x4 ## h, x5 ## h, x6 ## h, x7 ## h); \
+		Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \
+			x4 ## l, x5 ## l, x6 ## l, x7 ## l); \
+	} while (0)
+
+#define Wz(x, c, n)   do { \
+		sph_u64 t = (x ## h & (c)) << (n); \
+		x ## h = ((x ## h >> (n)) & (c)) | t; \
+		t = (x ## l & (c)) << (n); \
+		x ## l = ((x ## l >> (n)) & (c)) | t; \
+	} while (0)
+
+#define W0(x)   Wz(x, SPH_C64(0x5555555555555555),  1)
+#define W1(x)   Wz(x, SPH_C64(0x3333333333333333),  2)
+#define W2(x)   Wz(x, SPH_C64(0x0F0F0F0F0F0F0F0F),  4)
+#define W3(x)   Wz(x, SPH_C64(0x00FF00FF00FF00FF),  8)
+#define W4(x)   Wz(x, SPH_C64(0x0000FFFF0000FFFF), 16)
+#define W5(x)   Wz(x, SPH_C64(0x00000000FFFFFFFF), 32)
+#define W6(x)   do { \
+		sph_u64 t = x ## h; \
+		x ## h = x ## l; \
+		x ## l = t; \
+	} while (0)
+
+#define DECL_STATE \
+	sph_u64 h0h, h1h, h2h, h3h, h4h, h5h, h6h, h7h; \
+	sph_u64 h0l, h1l, h2l, h3l, h4l, h5l, h6l, h7l; \
+	sph_u64 tmp;
+
+#define READ_STATE(state)   do { \
+		h0h = (state)->H.wide[ 0]; \
+		h0l = (state)->H.wide[ 1]; \
+		h1h = (state)->H.wide[ 2]; \
+		h1l = (state)->H.wide[ 3]; \
+		h2h = (state)->H.wide[ 4]; \
+		h2l = (state)->H.wide[ 5]; \
+		h3h = (state)->H.wide[ 6]; \
+		h3l = (state)->H.wide[ 7]; \
+		h4h = (state)->H.wide[ 8]; \
+		h4l = (state)->H.wide[ 9]; \
+		h5h = (state)->H.wide[10]; \
+		h5l = (state)->H.wide[11]; \
+		h6h = (state)->H.wide[12]; \
+		h6l = (state)->H.wide[13]; \
+		h7h = (state)->H.wide[14]; \
+		h7l = (state)->H.wide[15]; \
+	} while (0)
+
+#define WRITE_STATE(state)   do { \
+		(state)->H.wide[ 0] = h0h; \
+		(state)->H.wide[ 1] = h0l; \
+		(state)->H.wide[ 2] = h1h; \
+		(state)->H.wide[ 3] = h1l; \
+		(state)->H.wide[ 4] = h2h; \
+		(state)->H.wide[ 5] = h2l; \
+		(state)->H.wide[ 6] = h3h; \
+		(state)->H.wide[ 7] = h3l; \
+		(state)->H.wide[ 8] = h4h; \
+		(state)->H.wide[ 9] = h4l; \
+		(state)->H.wide[10] = h5h; \
+		(state)->H.wide[11] = h5l; \
+		(state)->H.wide[12] = h6h; \
+		(state)->H.wide[13] = h6l; \
+		(state)->H.wide[14] = h7h; \
+		(state)->H.wide[15] = h7l; \
+	} while (0)
+
+#define INPUT_BUF1 \
+	sph_u64 m0h = dec64e_aligned(buf +  0); \
+	sph_u64 m0l = dec64e_aligned(buf +  8); \
+	sph_u64 m1h = dec64e_aligned(buf + 16); \
+	sph_u64 m1l = dec64e_aligned(buf + 24); \
+	sph_u64 m2h = dec64e_aligned(buf + 32); \
+	sph_u64 m2l = dec64e_aligned(buf + 40); \
+	sph_u64 m3h = dec64e_aligned(buf + 48); \
+	sph_u64 m3l = dec64e_aligned(buf + 56); \
+	h0h ^= m0h; \
+	h0l ^= m0l; \
+	h1h ^= m1h; \
+	h1l ^= m1l; \
+	h2h ^= m2h; \
+	h2l ^= m2l; \
+	h3h ^= m3h; \
+	h3l ^= m3l;
+
+#define INPUT_BUF2 \
+	h4h ^= m0h; \
+	h4l ^= m0l; \
+	h5h ^= m1h; \
+	h5l ^= m1l; \
+	h6h ^= m2h; \
+	h6l ^= m2l; \
+	h7h ^= m3h; \
+	h7l ^= m3l;
+
+static const sph_u64 IV224[] = {
+	C64e(0x2dfedd62f99a98ac), C64e(0xae7cacd619d634e7),
+	C64e(0xa4831005bc301216), C64e(0xb86038c6c9661494),
+	C64e(0x66d9899f2580706f), C64e(0xce9ea31b1d9b1adc),
+	C64e(0x11e8325f7b366e10), C64e(0xf994857f02fa06c1),
+	C64e(0x1b4f1b5cd8c840b3), C64e(0x97f6a17f6e738099),
+	C64e(0xdcdf93a5adeaa3d3), C64e(0xa431e8dec9539a68),
+	C64e(0x22b4a98aec86a1e4), C64e(0xd574ac959ce56cf0),
+	C64e(0x15960deab5ab2bbf), C64e(0x9611dcf0dd64ea6e)
+};
+
+static const sph_u64 IV256[] = {
+	C64e(0xeb98a3412c20d3eb), C64e(0x92cdbe7b9cb245c1),
+	C64e(0x1c93519160d4c7fa), C64e(0x260082d67e508a03),
+	C64e(0xa4239e267726b945), C64e(0xe0fb1a48d41a9477),
+	C64e(0xcdb5ab26026b177a), C64e(0x56f024420fff2fa8),
+	C64e(0x71a396897f2e4d75), C64e(0x1d144908f77de262),
+	C64e(0x277695f776248f94), C64e(0x87d5b6574780296c),
+	C64e(0x5c5e272dac8e0d6c), C64e(0x518450c657057a0f),
+	C64e(0x7be4d367702412ea), C64e(0x89e3ab13d31cd769)
+};
+
+static const sph_u64 IV384[] = {
+	C64e(0x481e3bc6d813398a), C64e(0x6d3b5e894ade879b),
+	C64e(0x63faea68d480ad2e), C64e(0x332ccb21480f8267),
+	C64e(0x98aec84d9082b928), C64e(0xd455ea3041114249),
+	C64e(0x36f555b2924847ec), C64e(0xc7250a93baf43ce1),
+	C64e(0x569b7f8a27db454c), C64e(0x9efcbd496397af0e),
+	C64e(0x589fc27d26aa80cd), C64e(0x80c08b8c9deb2eda),
+	C64e(0x8a7981e8f8d5373a), C64e(0xf43967adddd17a71),
+	C64e(0xa9b4d3bda475d394), C64e(0x976c3fba9842737f)
+};
+
+static const sph_u64 IV512[] = {
+	C64e(0x6fd14b963e00aa17), C64e(0x636a2e057a15d543),
+	C64e(0x8a225e8d0c97ef0b), C64e(0xe9341259f2b3c361),
+	C64e(0x891da0c1536f801e), C64e(0x2aa9056bea2b6d80),
+	C64e(0x588eccdb2075baa6), C64e(0xa90f3a76baf83bf7),
+	C64e(0x0169e60541e34a69), C64e(0x46b58a8e2e6fe65a),
+	C64e(0x1047a7d0c1843c24), C64e(0x3b6e71b12d5ac199),
+	C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
+	C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
+};
+
+#else
+
+static const sph_u32 C[] = {
+        C32e(0x72d5dea2), C32e(0xdf15f867), C32e(0x7b84150a),
+        C32e(0xb7231557), C32e(0x81abd690), C32e(0x4d5a87f6),
+        C32e(0x4e9f4fc5), C32e(0xc3d12b40), C32e(0xea983ae0),
+        C32e(0x5c45fa9c), C32e(0x03c5d299), C32e(0x66b2999a),
+        C32e(0x660296b4), C32e(0xf2bb538a), C32e(0xb556141a),
+        C32e(0x88dba231), C32e(0x03a35a5c), C32e(0x9a190edb),
+        C32e(0x403fb20a), C32e(0x87c14410), C32e(0x1c051980),
+        C32e(0x849e951d), C32e(0x6f33ebad), C32e(0x5ee7cddc),
+        C32e(0x10ba1392), C32e(0x02bf6b41), C32e(0xdc786515),
+        C32e(0xf7bb27d0), C32e(0x0a2c8139), C32e(0x37aa7850),
+        C32e(0x3f1abfd2), C32e(0x410091d3), C32e(0x422d5a0d),
+        C32e(0xf6cc7e90), C32e(0xdd629f9c), C32e(0x92c097ce),
+        C32e(0x185ca70b), C32e(0xc72b44ac), C32e(0xd1df65d6),
+        C32e(0x63c6fc23), C32e(0x976e6c03), C32e(0x9ee0b81a),
+        C32e(0x2105457e), C32e(0x446ceca8), C32e(0xeef103bb),
+        C32e(0x5d8e61fa), C32e(0xfd9697b2), C32e(0x94838197),
+        C32e(0x4a8e8537), C32e(0xdb03302f), C32e(0x2a678d2d),
+        C32e(0xfb9f6a95), C32e(0x8afe7381), C32e(0xf8b8696c),
+        C32e(0x8ac77246), C32e(0xc07f4214), C32e(0xc5f4158f),
+        C32e(0xbdc75ec4), C32e(0x75446fa7), C32e(0x8f11bb80),
+        C32e(0x52de75b7), C32e(0xaee488bc), C32e(0x82b8001e),
+        C32e(0x98a6a3f4), C32e(0x8ef48f33), C32e(0xa9a36315),
+        C32e(0xaa5f5624), C32e(0xd5b7f989), C32e(0xb6f1ed20),
+        C32e(0x7c5ae0fd), C32e(0x36cae95a), C32e(0x06422c36),
+        C32e(0xce293543), C32e(0x4efe983d), C32e(0x533af974),
+        C32e(0x739a4ba7), C32e(0xd0f51f59), C32e(0x6f4e8186),
+        C32e(0x0e9dad81), C32e(0xafd85a9f), C32e(0xa7050667),
+        C32e(0xee34626a), C32e(0x8b0b28be), C32e(0x6eb91727),
+        C32e(0x47740726), C32e(0xc680103f), C32e(0xe0a07e6f),
+        C32e(0xc67e487b), C32e(0x0d550aa5), C32e(0x4af8a4c0),
+        C32e(0x91e3e79f), C32e(0x978ef19e), C32e(0x86767281),
+        C32e(0x50608dd4), C32e(0x7e9e5a41), C32e(0xf3e5b062),
+        C32e(0xfc9f1fec), C32e(0x4054207a), C32e(0xe3e41a00),
+        C32e(0xcef4c984), C32e(0x4fd794f5), C32e(0x9dfa95d8),
+        C32e(0x552e7e11), C32e(0x24c354a5), C32e(0x5bdf7228),
+        C32e(0xbdfe6e28), C32e(0x78f57fe2), C32e(0x0fa5c4b2),
+        C32e(0x05897cef), C32e(0xee49d32e), C32e(0x447e9385),
+        C32e(0xeb28597f), C32e(0x705f6937), C32e(0xb324314a),
+        C32e(0x5e8628f1), C32e(0x1dd6e465), C32e(0xc71b7704),
+        C32e(0x51b920e7), C32e(0x74fe43e8), C32e(0x23d4878a),
+        C32e(0x7d29e8a3), C32e(0x927694f2), C32e(0xddcb7a09),
+        C32e(0x9b30d9c1), C32e(0x1d1b30fb), C32e(0x5bdc1be0),
+        C32e(0xda24494f), C32e(0xf29c82bf), C32e(0xa4e7ba31),
+        C32e(0xb470bfff), C32e(0x0d324405), C32e(0xdef8bc48),
+        C32e(0x3baefc32), C32e(0x53bbd339), C32e(0x459fc3c1),
+        C32e(0xe0298ba0), C32e(0xe5c905fd), C32e(0xf7ae090f),
+        C32e(0x94703412), C32e(0x4290f134), C32e(0xa271b701),
+        C32e(0xe344ed95), C32e(0xe93b8e36), C32e(0x4f2f984a),
+        C32e(0x88401d63), C32e(0xa06cf615), C32e(0x47c1444b),
+        C32e(0x8752afff), C32e(0x7ebb4af1), C32e(0xe20ac630),
+        C32e(0x4670b6c5), C32e(0xcc6e8ce6), C32e(0xa4d5a456),
+        C32e(0xbd4fca00), C32e(0xda9d844b), C32e(0xc83e18ae),
+        C32e(0x7357ce45), C32e(0x3064d1ad), C32e(0xe8a6ce68),
+        C32e(0x145c2567), C32e(0xa3da8cf2), C32e(0xcb0ee116),
+        C32e(0x33e90658), C32e(0x9a94999a), C32e(0x1f60b220),
+        C32e(0xc26f847b), C32e(0xd1ceac7f), C32e(0xa0d18518),
+        C32e(0x32595ba1), C32e(0x8ddd19d3), C32e(0x509a1cc0),
+        C32e(0xaaa5b446), C32e(0x9f3d6367), C32e(0xe4046bba),
+        C32e(0xf6ca19ab), C32e(0x0b56ee7e), C32e(0x1fb179ea),
+        C32e(0xa9282174), C32e(0xe9bdf735), C32e(0x3b3651ee),
+        C32e(0x1d57ac5a), C32e(0x7550d376), C32e(0x3a46c2fe),
+        C32e(0xa37d7001), C32e(0xf735c1af), C32e(0x98a4d842),
+        C32e(0x78edec20), C32e(0x9e6b6779), C32e(0x41836315),
+        C32e(0xea3adba8), C32e(0xfac33b4d), C32e(0x32832c83),
+        C32e(0xa7403b1f), C32e(0x1c2747f3), C32e(0x5940f034),
+        C32e(0xb72d769a), C32e(0xe73e4e6c), C32e(0xd2214ffd),
+        C32e(0xb8fd8d39), C32e(0xdc5759ef), C32e(0x8d9b0c49),
+        C32e(0x2b49ebda), C32e(0x5ba2d749), C32e(0x68f3700d),
+        C32e(0x7d3baed0), C32e(0x7a8d5584), C32e(0xf5a5e9f0),
+        C32e(0xe4f88e65), C32e(0xa0b8a2f4), C32e(0x36103b53),
+        C32e(0x0ca8079e), C32e(0x753eec5a), C32e(0x91689492),
+        C32e(0x56e8884f), C32e(0x5bb05c55), C32e(0xf8babc4c),
+        C32e(0xe3bb3b99), C32e(0xf387947b), C32e(0x75daf4d6),
+        C32e(0x726b1c5d), C32e(0x64aeac28), C32e(0xdc34b36d),
+        C32e(0x6c34a550), C32e(0xb828db71), C32e(0xf861e2f2),
+        C32e(0x108d512a), C32e(0xe3db6433), C32e(0x59dd75fc),
+        C32e(0x1cacbcf1), C32e(0x43ce3fa2), C32e(0x67bbd13c),
+        C32e(0x02e843b0), C32e(0x330a5bca), C32e(0x8829a175),
+        C32e(0x7f34194d), C32e(0xb416535c), C32e(0x923b94c3),
+        C32e(0x0e794d1e), C32e(0x797475d7), C32e(0xb6eeaf3f),
+        C32e(0xeaa8d4f7), C32e(0xbe1a3921), C32e(0x5cf47e09),
+        C32e(0x4c232751), C32e(0x26a32453), C32e(0xba323cd2),
+        C32e(0x44a3174a), C32e(0x6da6d5ad), C32e(0xb51d3ea6),
+        C32e(0xaff2c908), C32e(0x83593d98), C32e(0x916b3c56),
+        C32e(0x4cf87ca1), C32e(0x7286604d), C32e(0x46e23ecc),
+        C32e(0x086ec7f6), C32e(0x2f9833b3), C32e(0xb1bc765e),
+        C32e(0x2bd666a5), C32e(0xefc4e62a), C32e(0x06f4b6e8),
+        C32e(0xbec1d436), C32e(0x74ee8215), C32e(0xbcef2163),
+        C32e(0xfdc14e0d), C32e(0xf453c969), C32e(0xa77d5ac4),
+        C32e(0x06585826), C32e(0x7ec11416), C32e(0x06e0fa16),
+        C32e(0x7e90af3d), C32e(0x28639d3f), C32e(0xd2c9f2e3),
+        C32e(0x009bd20c), C32e(0x5faace30), C32e(0xb7d40c30),
+        C32e(0x742a5116), C32e(0xf2e03298), C32e(0x0deb30d8),
+        C32e(0xe3cef89a), C32e(0x4bc59e7b), C32e(0xb5f17992),
+        C32e(0xff51e66e), C32e(0x048668d3), C32e(0x9b234d57),
+        C32e(0xe6966731), C32e(0xcce6a6f3), C32e(0x170a7505),
+        C32e(0xb17681d9), C32e(0x13326cce), C32e(0x3c175284),
+        C32e(0xf805a262), C32e(0xf42bcbb3), C32e(0x78471547),
+        C32e(0xff465482), C32e(0x23936a48), C32e(0x38df5807),
+        C32e(0x4e5e6565), C32e(0xf2fc7c89), C32e(0xfc86508e),
+        C32e(0x31702e44), C32e(0xd00bca86), C32e(0xf04009a2),
+        C32e(0x3078474e), C32e(0x65a0ee39), C32e(0xd1f73883),
+        C32e(0xf75ee937), C32e(0xe42c3abd), C32e(0x2197b226),
+        C32e(0x0113f86f), C32e(0xa344edd1), C32e(0xef9fdee7),
+        C32e(0x8ba0df15), C32e(0x762592d9), C32e(0x3c85f7f6),
+        C32e(0x12dc42be), C32e(0xd8a7ec7c), C32e(0xab27b07e),
+        C32e(0x538d7dda), C32e(0xaa3ea8de), C32e(0xaa25ce93),
+        C32e(0xbd0269d8), C32e(0x5af643fd), C32e(0x1a7308f9),
+        C32e(0xc05fefda), C32e(0x174a19a5), C32e(0x974d6633),
+        C32e(0x4cfd216a), C32e(0x35b49831), C32e(0xdb411570),
+        C32e(0xea1e0fbb), C32e(0xedcd549b), C32e(0x9ad063a1),
+        C32e(0x51974072), C32e(0xf6759dbf), C32e(0x91476fe2)
+};
+
+#define Ceven_w3(r)   (C[((r) << 3) + 0])
+#define Ceven_w2(r)   (C[((r) << 3) + 1])
+#define Ceven_w1(r)   (C[((r) << 3) + 2])
+#define Ceven_w0(r)   (C[((r) << 3) + 3])
+#define Codd_w3(r)    (C[((r) << 3) + 4])
+#define Codd_w2(r)    (C[((r) << 3) + 5])
+#define Codd_w1(r)    (C[((r) << 3) + 6])
+#define Codd_w0(r)    (C[((r) << 3) + 7])
+
+#define S(x0, x1, x2, x3, cb, r)   do { \
+		Sb(x0 ## 3, x1 ## 3, x2 ## 3, x3 ## 3, cb ## w3(r)); \
+		Sb(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, cb ## w2(r)); \
+		Sb(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, cb ## w1(r)); \
+		Sb(x0 ## 0, x1 ## 0, x2 ## 0, x3 ## 0, cb ## w0(r)); \
+	} while (0)
+
+#define L(x0, x1, x2, x3, x4, x5, x6, x7)   do { \
+		Lb(x0 ## 3, x1 ## 3, x2 ## 3, x3 ## 3, \
+			x4 ## 3, x5 ## 3, x6 ## 3, x7 ## 3); \
+		Lb(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, \
+			x4 ## 2, x5 ## 2, x6 ## 2, x7 ## 2); \
+		Lb(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, \
+			x4 ## 1, x5 ## 1, x6 ## 1, x7 ## 1); \
+		Lb(x0 ## 0, x1 ## 0, x2 ## 0, x3 ## 0, \
+			x4 ## 0, x5 ## 0, x6 ## 0, x7 ## 0); \
+	} while (0)
+
+#define Wz(x, c, n)   do { \
+		sph_u32 t = (x ## 3 & (c)) << (n); \
+		x ## 3 = ((x ## 3 >> (n)) & (c)) | t; \
+		t = (x ## 2 & (c)) << (n); \
+		x ## 2 = ((x ## 2 >> (n)) & (c)) | t; \
+		t = (x ## 1 & (c)) << (n); \
+		x ## 1 = ((x ## 1 >> (n)) & (c)) | t; \
+		t = (x ## 0 & (c)) << (n); \
+		x ## 0 = ((x ## 0 >> (n)) & (c)) | t; \
+	} while (0)
+
+#define W0(x)   Wz(x, SPH_C32(0x55555555),  1)
+#define W1(x)   Wz(x, SPH_C32(0x33333333),  2)
+#define W2(x)   Wz(x, SPH_C32(0x0F0F0F0F),  4)
+#define W3(x)   Wz(x, SPH_C32(0x00FF00FF),  8)
+#define W4(x)   Wz(x, SPH_C32(0x0000FFFF), 16)
+#define W5(x)   do { \
+		sph_u32 t = x ## 3; \
+		x ## 3 = x ## 2; \
+		x ## 2 = t; \
+		t = x ## 1; \
+		x ## 1 = x ## 0; \
+		x ## 0 = t; \
+	} while (0)
+#define W6(x)   do { \
+		sph_u32 t = x ## 3; \
+		x ## 3 = x ## 1; \
+		x ## 1 = t; \
+		t = x ## 2; \
+		x ## 2 = x ## 0; \
+		x ## 0 = t; \
+	} while (0)
+
+#define DECL_STATE \
+	sph_u32 h03, h02, h01, h00, h13, h12, h11, h10; \
+	sph_u32 h23, h22, h21, h20, h33, h32, h31, h30; \
+	sph_u32 h43, h42, h41, h40, h53, h52, h51, h50; \
+	sph_u32 h63, h62, h61, h60, h73, h72, h71, h70; \
+	sph_u32 tmp;
+
+#define READ_STATE(state)   do { \
+		h03 = (state)->H.narrow[ 0]; \
+		h02 = (state)->H.narrow[ 1]; \
+		h01 = (state)->H.narrow[ 2]; \
+		h00 = (state)->H.narrow[ 3]; \
+		h13 = (state)->H.narrow[ 4]; \
+		h12 = (state)->H.narrow[ 5]; \
+		h11 = (state)->H.narrow[ 6]; \
+		h10 = (state)->H.narrow[ 7]; \
+		h23 = (state)->H.narrow[ 8]; \
+		h22 = (state)->H.narrow[ 9]; \
+		h21 = (state)->H.narrow[10]; \
+		h20 = (state)->H.narrow[11]; \
+		h33 = (state)->H.narrow[12]; \
+		h32 = (state)->H.narrow[13]; \
+		h31 = (state)->H.narrow[14]; \
+		h30 = (state)->H.narrow[15]; \
+		h43 = (state)->H.narrow[16]; \
+		h42 = (state)->H.narrow[17]; \
+		h41 = (state)->H.narrow[18]; \
+		h40 = (state)->H.narrow[19]; \
+		h53 = (state)->H.narrow[20]; \
+		h52 = (state)->H.narrow[21]; \
+		h51 = (state)->H.narrow[22]; \
+		h50 = (state)->H.narrow[23]; \
+		h63 = (state)->H.narrow[24]; \
+		h62 = (state)->H.narrow[25]; \
+		h61 = (state)->H.narrow[26]; \
+		h60 = (state)->H.narrow[27]; \
+		h73 = (state)->H.narrow[28]; \
+		h72 = (state)->H.narrow[29]; \
+		h71 = (state)->H.narrow[30]; \
+		h70 = (state)->H.narrow[31]; \
+	} while (0)
+
+#define WRITE_STATE(state)   do { \
+		(state)->H.narrow[ 0] = h03; \
+		(state)->H.narrow[ 1] = h02; \
+		(state)->H.narrow[ 2] = h01; \
+		(state)->H.narrow[ 3] = h00; \
+		(state)->H.narrow[ 4] = h13; \
+		(state)->H.narrow[ 5] = h12; \
+		(state)->H.narrow[ 6] = h11; \
+		(state)->H.narrow[ 7] = h10; \
+		(state)->H.narrow[ 8] = h23; \
+		(state)->H.narrow[ 9] = h22; \
+		(state)->H.narrow[10] = h21; \
+		(state)->H.narrow[11] = h20; \
+		(state)->H.narrow[12] = h33; \
+		(state)->H.narrow[13] = h32; \
+		(state)->H.narrow[14] = h31; \
+		(state)->H.narrow[15] = h30; \
+		(state)->H.narrow[16] = h43; \
+		(state)->H.narrow[17] = h42; \
+		(state)->H.narrow[18] = h41; \
+		(state)->H.narrow[19] = h40; \
+		(state)->H.narrow[20] = h53; \
+		(state)->H.narrow[21] = h52; \
+		(state)->H.narrow[22] = h51; \
+		(state)->H.narrow[23] = h50; \
+		(state)->H.narrow[24] = h63; \
+		(state)->H.narrow[25] = h62; \
+		(state)->H.narrow[26] = h61; \
+		(state)->H.narrow[27] = h60; \
+		(state)->H.narrow[28] = h73; \
+		(state)->H.narrow[29] = h72; \
+		(state)->H.narrow[30] = h71; \
+		(state)->H.narrow[31] = h70; \
+	} while (0)
+
+#define INPUT_BUF1 \
+	sph_u32 m03 = dec32e_aligned(buf +  0); \
+	sph_u32 m02 = dec32e_aligned(buf +  4); \
+	sph_u32 m01 = dec32e_aligned(buf +  8); \
+	sph_u32 m00 = dec32e_aligned(buf + 12); \
+	sph_u32 m13 = dec32e_aligned(buf + 16); \
+	sph_u32 m12 = dec32e_aligned(buf + 20); \
+	sph_u32 m11 = dec32e_aligned(buf + 24); \
+	sph_u32 m10 = dec32e_aligned(buf + 28); \
+	sph_u32 m23 = dec32e_aligned(buf + 32); \
+	sph_u32 m22 = dec32e_aligned(buf + 36); \
+	sph_u32 m21 = dec32e_aligned(buf + 40); \
+	sph_u32 m20 = dec32e_aligned(buf + 44); \
+	sph_u32 m33 = dec32e_aligned(buf + 48); \
+	sph_u32 m32 = dec32e_aligned(buf + 52); \
+	sph_u32 m31 = dec32e_aligned(buf + 56); \
+	sph_u32 m30 = dec32e_aligned(buf + 60); \
+	h03 ^= m03; \
+	h02 ^= m02; \
+	h01 ^= m01; \
+	h00 ^= m00; \
+	h13 ^= m13; \
+	h12 ^= m12; \
+	h11 ^= m11; \
+	h10 ^= m10; \
+	h23 ^= m23; \
+	h22 ^= m22; \
+	h21 ^= m21; \
+	h20 ^= m20; \
+	h33 ^= m33; \
+	h32 ^= m32; \
+	h31 ^= m31; \
+	h30 ^= m30;
+
+#define INPUT_BUF2 \
+	h43 ^= m03; \
+	h42 ^= m02; \
+	h41 ^= m01; \
+	h40 ^= m00; \
+	h53 ^= m13; \
+	h52 ^= m12; \
+	h51 ^= m11; \
+	h50 ^= m10; \
+	h63 ^= m23; \
+	h62 ^= m22; \
+	h61 ^= m21; \
+	h60 ^= m20; \
+	h73 ^= m33; \
+	h72 ^= m32; \
+	h71 ^= m31; \
+	h70 ^= m30;
+
+static const sph_u32 IV224[] = {
+        C32e(0x2dfedd62), C32e(0xf99a98ac), C32e(0xae7cacd6), C32e(0x19d634e7),
+        C32e(0xa4831005), C32e(0xbc301216), C32e(0xb86038c6), C32e(0xc9661494),
+        C32e(0x66d9899f), C32e(0x2580706f), C32e(0xce9ea31b), C32e(0x1d9b1adc),
+        C32e(0x11e8325f), C32e(0x7b366e10), C32e(0xf994857f), C32e(0x02fa06c1),
+        C32e(0x1b4f1b5c), C32e(0xd8c840b3), C32e(0x97f6a17f), C32e(0x6e738099),
+        C32e(0xdcdf93a5), C32e(0xadeaa3d3), C32e(0xa431e8de), C32e(0xc9539a68),
+        C32e(0x22b4a98a), C32e(0xec86a1e4), C32e(0xd574ac95), C32e(0x9ce56cf0),
+        C32e(0x15960dea), C32e(0xb5ab2bbf), C32e(0x9611dcf0), C32e(0xdd64ea6e)
+};
+
+static const sph_u32 IV256[] = {
+        C32e(0xeb98a341), C32e(0x2c20d3eb), C32e(0x92cdbe7b), C32e(0x9cb245c1),
+        C32e(0x1c935191), C32e(0x60d4c7fa), C32e(0x260082d6), C32e(0x7e508a03),
+        C32e(0xa4239e26), C32e(0x7726b945), C32e(0xe0fb1a48), C32e(0xd41a9477),
+        C32e(0xcdb5ab26), C32e(0x026b177a), C32e(0x56f02442), C32e(0x0fff2fa8),
+        C32e(0x71a39689), C32e(0x7f2e4d75), C32e(0x1d144908), C32e(0xf77de262),
+        C32e(0x277695f7), C32e(0x76248f94), C32e(0x87d5b657), C32e(0x4780296c),
+        C32e(0x5c5e272d), C32e(0xac8e0d6c), C32e(0x518450c6), C32e(0x57057a0f),
+        C32e(0x7be4d367), C32e(0x702412ea), C32e(0x89e3ab13), C32e(0xd31cd769)
+};
+
+static const sph_u32 IV384[] = {
+        C32e(0x481e3bc6), C32e(0xd813398a), C32e(0x6d3b5e89), C32e(0x4ade879b),
+        C32e(0x63faea68), C32e(0xd480ad2e), C32e(0x332ccb21), C32e(0x480f8267),
+        C32e(0x98aec84d), C32e(0x9082b928), C32e(0xd455ea30), C32e(0x41114249),
+        C32e(0x36f555b2), C32e(0x924847ec), C32e(0xc7250a93), C32e(0xbaf43ce1),
+        C32e(0x569b7f8a), C32e(0x27db454c), C32e(0x9efcbd49), C32e(0x6397af0e),
+        C32e(0x589fc27d), C32e(0x26aa80cd), C32e(0x80c08b8c), C32e(0x9deb2eda),
+        C32e(0x8a7981e8), C32e(0xf8d5373a), C32e(0xf43967ad), C32e(0xddd17a71),
+        C32e(0xa9b4d3bd), C32e(0xa475d394), C32e(0x976c3fba), C32e(0x9842737f)
+};
+
+static const sph_u32 IV512[] = {
+        C32e(0x6fd14b96), C32e(0x3e00aa17), C32e(0x636a2e05), C32e(0x7a15d543),
+        C32e(0x8a225e8d), C32e(0x0c97ef0b), C32e(0xe9341259), C32e(0xf2b3c361),
+        C32e(0x891da0c1), C32e(0x536f801e), C32e(0x2aa9056b), C32e(0xea2b6d80),
+        C32e(0x588eccdb), C32e(0x2075baa6), C32e(0xa90f3a76), C32e(0xbaf83bf7),
+        C32e(0x0169e605), C32e(0x41e34a69), C32e(0x46b58a8e), C32e(0x2e6fe65a),
+        C32e(0x1047a7d0), C32e(0xc1843c24), C32e(0x3b6e71b1), C32e(0x2d5ac199),
+        C32e(0xcf57f6ec), C32e(0x9db1f856), C32e(0xa706887c), C32e(0x5716b156),
+        C32e(0xe3c2fcdf), C32e(0xe68517fb), C32e(0x545a4678), C32e(0xcc8cdd4b)
+};
+
+#endif
+
+#define SL(ro)   SLu(r + ro, ro)
+
+#define SLu(r, ro)   do { \
+		S(h0, h2, h4, h6, Ceven_, r); \
+		S(h1, h3, h5, h7, Codd_, r); \
+		L(h0, h2, h4, h6, h1, h3, h5, h7); \
+		W ## ro(h1); \
+		W ## ro(h3); \
+		W ## ro(h5); \
+		W ## ro(h7); \
+	} while (0)
+
+#if SPH_SMALL_FOOTPRINT_JH
+
+#if SPH_JH_64
+
+/*
+ * The "small footprint" 64-bit version just uses a partially unrolled
+ * loop.
+ */
+
+#define E8   do { \
+		unsigned r; \
+		for (r = 0; r < 42; r += 7) { \
+			SL(0); \
+			SL(1); \
+			SL(2); \
+			SL(3); \
+			SL(4); \
+			SL(5); \
+			SL(6); \
+		} \
+	} while (0)
+
+#else
+
+#define E8   do { \
+		unsigned r, g; \
+		for (r = g = 0; r < 42; r ++) { \
+			S(h0, h2, h4, h6, Ceven_, r); \
+			S(h1, h3, h5, h7, Codd_, r); \
+			L(h0, h2, h4, h6, h1, h3, h5, h7); \
+			switch (g) { \
+			case 0: \
+				W0(h1); \
+				W0(h3); \
+				W0(h5); \
+				W0(h7); \
+				break; \
+			case 1: \
+				W1(h1); \
+				W1(h3); \
+				W1(h5); \
+				W1(h7); \
+				break; \
+			case 2: \
+				W2(h1); \
+				W2(h3); \
+				W2(h5); \
+				W2(h7); \
+				break; \
+			case 3: \
+				W3(h1); \
+				W3(h3); \
+				W3(h5); \
+				W3(h7); \
+				break; \
+			case 4: \
+				W4(h1); \
+				W4(h3); \
+				W4(h5); \
+				W4(h7); \
+				break; \
+			case 5: \
+				W5(h1); \
+				W5(h3); \
+				W5(h5); \
+				W5(h7); \
+				break; \
+			case 6: \
+				W6(h1); \
+				W6(h3); \
+				W6(h5); \
+				W6(h7); \
+				break; \
+			} \
+			if (++ g == 7) \
+				g = 0; \
+		} \
+	} while (0)
+
+#endif
+
+#else
+
+#if SPH_JH_64
+
+/*
+ * On a "true 64-bit" architecture, we can unroll at will.
+ */
+
+#define E8   do { \
+		SLu( 0, 0); \
+		SLu( 1, 1); \
+		SLu( 2, 2); \
+		SLu( 3, 3); \
+		SLu( 4, 4); \
+		SLu( 5, 5); \
+		SLu( 6, 6); \
+		SLu( 7, 0); \
+		SLu( 8, 1); \
+		SLu( 9, 2); \
+		SLu(10, 3); \
+		SLu(11, 4); \
+		SLu(12, 5); \
+		SLu(13, 6); \
+		SLu(14, 0); \
+		SLu(15, 1); \
+		SLu(16, 2); \
+		SLu(17, 3); \
+		SLu(18, 4); \
+		SLu(19, 5); \
+		SLu(20, 6); \
+		SLu(21, 0); \
+		SLu(22, 1); \
+		SLu(23, 2); \
+		SLu(24, 3); \
+		SLu(25, 4); \
+		SLu(26, 5); \
+		SLu(27, 6); \
+		SLu(28, 0); \
+		SLu(29, 1); \
+		SLu(30, 2); \
+		SLu(31, 3); \
+		SLu(32, 4); \
+		SLu(33, 5); \
+		SLu(34, 6); \
+		SLu(35, 0); \
+		SLu(36, 1); \
+		SLu(37, 2); \
+		SLu(38, 3); \
+		SLu(39, 4); \
+		SLu(40, 5); \
+		SLu(41, 6); \
+	} while (0)
+
+#else
+
+/*
+ * We are not aiming at a small footprint, but we are still using a
+ * 32-bit implementation. Full loop unrolling would smash the L1
+ * cache on some "big" architectures (32 kB L1 cache).
+ */
+
+#define E8   do { \
+		unsigned r; \
+		for (r = 0; r < 42; r += 7) { \
+			SL(0); \
+			SL(1); \
+			SL(2); \
+			SL(3); \
+			SL(4); \
+			SL(5); \
+			SL(6); \
+		} \
+	} while (0)
+
+#endif
+
+#endif
+
+static void
+jh_init(sph_jh_context *sc, const void *iv)
+{
+    sc->ptr = 0;
+#if SPH_JH_64
+    memcpy(sc->H.wide, iv, sizeof sc->H.wide);
+#else
+    memcpy(sc->H.narrow, iv, sizeof sc->H.narrow);
+#endif
+#if SPH_64
+    sc->block_count = 0;
+#else
+    sc->block_count_high = 0;
+    sc->block_count_low = 0;
+#endif
+}
+
+static void
+jh_core(sph_jh_context *sc, const void *data, size_t len)
+{
+    unsigned char *buf;
+    size_t ptr;
+    DECL_STATE
+
+    buf = sc->buf;
+    ptr = sc->ptr;
+    if (len < (sizeof sc->buf) - ptr) {
+        memcpy(buf + ptr, data, len);
+        ptr += len;
+        sc->ptr = ptr;
+        return;
+    }
+
+    READ_STATE(sc);
+    while (len > 0) {
+        size_t clen;
+
+        clen = (sizeof sc->buf) - ptr;
+        if (clen > len)
+            clen = len;
+        memcpy(buf + ptr, data, clen);
+        ptr += clen;
+        data = (const unsigned char *)data + clen;
+        len -= clen;
+        if (ptr == sizeof sc->buf) {
+            INPUT_BUF1;
+            E8;
+            INPUT_BUF2;
+#if SPH_64
+            sc->block_count ++;
+#else
+            if ((sc->block_count_low = SPH_T32(
+                    sc->block_count_low + 1)) == 0)
+                sc->block_count_high ++;
+#endif
+            ptr = 0;
+        }
+    }
+    WRITE_STATE(sc);
+    sc->ptr = ptr;
+}
+
+static void
+jh_close(sph_jh_context *sc, unsigned ub, unsigned n,
+         void *dst, size_t out_size_w32, const void *iv)
+{
+    unsigned z;
+    unsigned char buf[128];
+    size_t numz, u;
+#if SPH_64
+    sph_u64 l0, l1;
+#else
+    sph_u32 l0, l1, l2, l3;
+#endif
+
+    z = 0x80 >> n;
+    buf[0] = ((ub & -z) | z) & 0xFF;
+    if (sc->ptr == 0 && n == 0) {
+        numz = 47;
+    } else {
+        numz = 111 - sc->ptr;
+    }
+    memset(buf + 1, 0, numz);
+#if SPH_64
+    l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3) + n;
+	l1 = SPH_T64(sc->block_count >> 55);
+	sph_enc64be(buf + numz + 1, l1);
+	sph_enc64be(buf + numz + 9, l0);
+#else
+    l0 = SPH_T32(sc->block_count_low << 9) + (sc->ptr << 3) + n;
+    l1 = SPH_T32(sc->block_count_low >> 23)
+         + SPH_T32(sc->block_count_high << 9);
+    l2 = SPH_T32(sc->block_count_high >> 23);
+    l3 = 0;
+    sph_enc32be(buf + numz +  1, l3);
+    sph_enc32be(buf + numz +  5, l2);
+    sph_enc32be(buf + numz +  9, l1);
+    sph_enc32be(buf + numz + 13, l0);
+#endif
+    jh_core(sc, buf, numz + 17);
+#if SPH_JH_64
+    for (u = 0; u < 8; u ++)
+		enc64e(buf + (u << 3), sc->H.wide[u + 8]);
+#else
+    for (u = 0; u < 16; u ++)
+        enc32e(buf + (u << 2), sc->H.narrow[u + 16]);
+#endif
+    memcpy(dst, buf + ((16 - out_size_w32) << 2), out_size_w32 << 2);
+    jh_init(sc, iv);
+}
+
+/* see sph_jh.h */
+void
+sph_jh224_init(void *cc)
+{
+    jh_init(cc, IV224);
+}
+
+/* see sph_jh.h */
+void
+sph_jh224(void *cc, const void *data, size_t len)
+{
+    jh_core(cc, data, len);
+}
+
+/* see sph_jh.h */
+void
+sph_jh224_close(void *cc, void *dst)
+{
+    jh_close(cc, 0, 0, dst, 7, IV224);
+}
+
+/* see sph_jh.h */
+void
+sph_jh224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    jh_close(cc, ub, n, dst, 7, IV224);
+}
+
+/* see sph_jh.h */
+void
+sph_jh256_init(void *cc)
+{
+    jh_init(cc, IV256);
+}
+
+/* see sph_jh.h */
+void
+sph_jh256(void *cc, const void *data, size_t len)
+{
+    jh_core(cc, data, len);
+}
+
+/* see sph_jh.h */
+void
+sph_jh256_close(void *cc, void *dst)
+{
+    jh_close(cc, 0, 0, dst, 8, IV256);
+}
+
+/* see sph_jh.h */
+void
+sph_jh256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    jh_close(cc, ub, n, dst, 8, IV256);
+}
+
+/* see sph_jh.h */
+void
+sph_jh384_init(void *cc)
+{
+    jh_init(cc, IV384);
+}
+
+/* see sph_jh.h */
+void
+sph_jh384(void *cc, const void *data, size_t len)
+{
+    jh_core(cc, data, len);
+}
+
+/* see sph_jh.h */
+void
+sph_jh384_close(void *cc, void *dst)
+{
+    jh_close(cc, 0, 0, dst, 12, IV384);
+}
+
+/* see sph_jh.h */
+void
+sph_jh384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    jh_close(cc, ub, n, dst, 12, IV384);
+}
+
+/* see sph_jh.h */
+void
+sph_jh512_init(void *cc)
+{
+    jh_init(cc, IV512);
+}
+
+/* see sph_jh.h */
+void
+sph_jh512(void *cc, const void *data, size_t len)
+{
+    jh_core(cc, data, len);
+}
+
+/* see sph_jh.h */
+void
+sph_jh512_close(void *cc, void *dst)
+{
+    jh_close(cc, 0, 0, dst, 16, IV512);
+}
+
+/* see sph_jh.h */
+void
+sph_jh512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+    jh_close(cc, ub, n, dst, 16, IV512);
+}
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/sha3/sph_jh.h b/sha3/sph_jh.h
new file mode 100644
index 0000000..08a9797
--- /dev/null
+++ b/sha3/sph_jh.h
@@ -0,0 +1,298 @@
+/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */
+/**
+ * JH interface. JH is a family of functions which differ by
+ * their output size; this implementation defines JH for output
+ * sizes 224, 256, 384 and 512 bits.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_jh.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_JH_H__
+#define SPH_JH_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+ * Output size (in bits) for JH-224.
+ */
+#define SPH_SIZE_jh224   224
+
+/**
+ * Output size (in bits) for JH-256.
+ */
+#define SPH_SIZE_jh256   256
+
+/**
+ * Output size (in bits) for JH-384.
+ */
+#define SPH_SIZE_jh384   384
+
+/**
+ * Output size (in bits) for JH-512.
+ */
+#define SPH_SIZE_jh512   512
+
+/**
+ * This structure is a context for JH computations: it contains the
+ * intermediate values and some data from the last entered block. Once
+ * a JH computation has been performed, the context can be reused for
+ * another computation.
+ *
+ * The contents of this structure are private. A running JH computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+    unsigned char buf[64];    /* first field, for alignment */
+    size_t ptr;
+    union {
+#if SPH_64
+        sph_u64 wide[16];
+#endif
+        sph_u32 narrow[32];
+    } H;
+#if SPH_64
+    sph_u64 block_count;
+#else
+    sph_u32 block_count_high, block_count_low;
+#endif
+#endif
+} sph_jh_context;
+
+/**
+ * Type for a JH-224 context (identical to the common context).
+ */
+typedef sph_jh_context sph_jh224_context;
+
+/**
+ * Type for a JH-256 context (identical to the common context).
+ */
+typedef sph_jh_context sph_jh256_context;
+
+/**
+ * Type for a JH-384 context (identical to the common context).
+ */
+typedef sph_jh_context sph_jh384_context;
+
+/**
+ * Type for a JH-512 context (identical to the common context).
+ */
+typedef sph_jh_context sph_jh512_context;
+
+/**
+ * Initialize a JH-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the JH-224 context (pointer to a
+ *             <code>sph_jh224_context</code>)
+ */
+void sph_jh224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the JH-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_jh224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current JH-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the JH-224 context
+ * @param dst   the destination buffer
+ */
+void sph_jh224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the JH-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_jh224_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a JH-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the JH-256 context (pointer to a
+ *             <code>sph_jh256_context</code>)
+ */
+void sph_jh256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the JH-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_jh256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current JH-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the JH-256 context
+ * @param dst   the destination buffer
+ */
+void sph_jh256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the JH-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_jh256_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a JH-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the JH-384 context (pointer to a
+ *             <code>sph_jh384_context</code>)
+ */
+void sph_jh384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the JH-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_jh384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current JH-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the JH-384 context
+ * @param dst   the destination buffer
+ */
+void sph_jh384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the JH-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_jh384_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a JH-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the JH-512 context (pointer to a
+ *             <code>sph_jh512_context</code>)
+ */
+void sph_jh512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the JH-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_jh512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current JH-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the JH-512 context
+ * @param dst   the destination buffer
+ */
+void sph_jh512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the JH-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_jh512_addbits_and_close(
+        void *cc, unsigned ub, unsigned n, void *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/sha3/sph_skein.c b/sha3/sph_skein.c
new file mode 100644
index 0000000..63f8798
--- /dev/null
+++ b/sha3/sph_skein.c
@@ -0,0 +1,1254 @@
+/* $Id: skein.c 254 2011-06-07 19:38:58Z tp $ */
+/*
+ * Skein implementation.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_skein.h"
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SKEIN
+#define SPH_SMALL_FOOTPRINT_SKEIN   1
+#endif
+
+#ifdef _MSC_VER
+#pragma warning (disable: 4146)
+#endif
+
+#if SPH_64
+
+#if 0
+/* obsolete */
+/*
+ * M5_ ## s ## _ ## i  evaluates to s+i mod 5 (0 <= s <= 18, 0 <= i <= 3).
+ */
+
+#define M5_0_0    0
+#define M5_0_1    1
+#define M5_0_2    2
+#define M5_0_3    3
+
+#define M5_1_0    1
+#define M5_1_1    2
+#define M5_1_2    3
+#define M5_1_3    4
+
+#define M5_2_0    2
+#define M5_2_1    3
+#define M5_2_2    4
+#define M5_2_3    0
+
+#define M5_3_0    3
+#define M5_3_1    4
+#define M5_3_2    0
+#define M5_3_3    1
+
+#define M5_4_0    4
+#define M5_4_1    0
+#define M5_4_2    1
+#define M5_4_3    2
+
+#define M5_5_0    0
+#define M5_5_1    1
+#define M5_5_2    2
+#define M5_5_3    3
+
+#define M5_6_0    1
+#define M5_6_1    2
+#define M5_6_2    3
+#define M5_6_3    4
+
+#define M5_7_0    2
+#define M5_7_1    3
+#define M5_7_2    4
+#define M5_7_3    0
+
+#define M5_8_0    3
+#define M5_8_1    4
+#define M5_8_2    0
+#define M5_8_3    1
+
+#define M5_9_0    4
+#define M5_9_1    0
+#define M5_9_2    1
+#define M5_9_3    2
+
+#define M5_10_0   0
+#define M5_10_1   1
+#define M5_10_2   2
+#define M5_10_3   3
+
+#define M5_11_0   1
+#define M5_11_1   2
+#define M5_11_2   3
+#define M5_11_3   4
+
+#define M5_12_0   2
+#define M5_12_1   3
+#define M5_12_2   4
+#define M5_12_3   0
+
+#define M5_13_0   3
+#define M5_13_1   4
+#define M5_13_2   0
+#define M5_13_3   1
+
+#define M5_14_0   4
+#define M5_14_1   0
+#define M5_14_2   1
+#define M5_14_3   2
+
+#define M5_15_0   0
+#define M5_15_1   1
+#define M5_15_2   2
+#define M5_15_3   3
+
+#define M5_16_0   1
+#define M5_16_1   2
+#define M5_16_2   3
+#define M5_16_3   4
+
+#define M5_17_0   2
+#define M5_17_1   3
+#define M5_17_2   4
+#define M5_17_3   0
+
+#define M5_18_0   3
+#define M5_18_1   4
+#define M5_18_2   0
+#define M5_18_3   1
+#endif
+
+/*
+ * M9_ ## s ## _ ## i  evaluates to s+i mod 9 (0 <= s <= 18, 0 <= i <= 7).
+ */
+
+#define M9_0_0    0
+#define M9_0_1    1
+#define M9_0_2    2
+#define M9_0_3    3
+#define M9_0_4    4
+#define M9_0_5    5
+#define M9_0_6    6
+#define M9_0_7    7
+
+#define M9_1_0    1
+#define M9_1_1    2
+#define M9_1_2    3
+#define M9_1_3    4
+#define M9_1_4    5
+#define M9_1_5    6
+#define M9_1_6    7
+#define M9_1_7    8
+
+#define M9_2_0    2
+#define M9_2_1    3
+#define M9_2_2    4
+#define M9_2_3    5
+#define M9_2_4    6
+#define M9_2_5    7
+#define M9_2_6    8
+#define M9_2_7    0
+
+#define M9_3_0    3
+#define M9_3_1    4
+#define M9_3_2    5
+#define M9_3_3    6
+#define M9_3_4    7
+#define M9_3_5    8
+#define M9_3_6    0
+#define M9_3_7    1
+
+#define M9_4_0    4
+#define M9_4_1    5
+#define M9_4_2    6
+#define M9_4_3    7
+#define M9_4_4    8
+#define M9_4_5    0
+#define M9_4_6    1
+#define M9_4_7    2
+
+#define M9_5_0    5
+#define M9_5_1    6
+#define M9_5_2    7
+#define M9_5_3    8
+#define M9_5_4    0
+#define M9_5_5    1
+#define M9_5_6    2
+#define M9_5_7    3
+
+#define M9_6_0    6
+#define M9_6_1    7
+#define M9_6_2    8
+#define M9_6_3    0
+#define M9_6_4    1
+#define M9_6_5    2
+#define M9_6_6    3
+#define M9_6_7    4
+
+#define M9_7_0    7
+#define M9_7_1    8
+#define M9_7_2    0
+#define M9_7_3    1
+#define M9_7_4    2
+#define M9_7_5    3
+#define M9_7_6    4
+#define M9_7_7    5
+
+#define M9_8_0    8
+#define M9_8_1    0
+#define M9_8_2    1
+#define M9_8_3    2
+#define M9_8_4    3
+#define M9_8_5    4
+#define M9_8_6    5
+#define M9_8_7    6
+
+#define M9_9_0    0
+#define M9_9_1    1
+#define M9_9_2    2
+#define M9_9_3    3
+#define M9_9_4    4
+#define M9_9_5    5
+#define M9_9_6    6
+#define M9_9_7    7
+
+#define M9_10_0   1
+#define M9_10_1   2
+#define M9_10_2   3
+#define M9_10_3   4
+#define M9_10_4   5
+#define M9_10_5   6
+#define M9_10_6   7
+#define M9_10_7   8
+
+#define M9_11_0   2
+#define M9_11_1   3
+#define M9_11_2   4
+#define M9_11_3   5
+#define M9_11_4   6
+#define M9_11_5   7
+#define M9_11_6   8
+#define M9_11_7   0
+
+#define M9_12_0   3
+#define M9_12_1   4
+#define M9_12_2   5
+#define M9_12_3   6
+#define M9_12_4   7
+#define M9_12_5   8
+#define M9_12_6   0
+#define M9_12_7   1
+
+#define M9_13_0   4
+#define M9_13_1   5
+#define M9_13_2   6
+#define M9_13_3   7
+#define M9_13_4   8
+#define M9_13_5   0
+#define M9_13_6   1
+#define M9_13_7   2
+
+#define M9_14_0   5
+#define M9_14_1   6
+#define M9_14_2   7
+#define M9_14_3   8
+#define M9_14_4   0
+#define M9_14_5   1
+#define M9_14_6   2
+#define M9_14_7   3
+
+#define M9_15_0   6
+#define M9_15_1   7
+#define M9_15_2   8
+#define M9_15_3   0
+#define M9_15_4   1
+#define M9_15_5   2
+#define M9_15_6   3
+#define M9_15_7   4
+
+#define M9_16_0   7
+#define M9_16_1   8
+#define M9_16_2   0
+#define M9_16_3   1
+#define M9_16_4   2
+#define M9_16_5   3
+#define M9_16_6   4
+#define M9_16_7   5
+
+#define M9_17_0   8
+#define M9_17_1   0
+#define M9_17_2   1
+#define M9_17_3   2
+#define M9_17_4   3
+#define M9_17_5   4
+#define M9_17_6   5
+#define M9_17_7   6
+
+#define M9_18_0   0
+#define M9_18_1   1
+#define M9_18_2   2
+#define M9_18_3   3
+#define M9_18_4   4
+#define M9_18_5   5
+#define M9_18_6   6
+#define M9_18_7   7
+
+/*
+ * M3_ ## s ## _ ## i  evaluates to s+i mod 3 (0 <= s <= 18, 0 <= i <= 1).
+ */
+
+#define M3_0_0    0
+#define M3_0_1    1
+#define M3_1_0    1
+#define M3_1_1    2
+#define M3_2_0    2
+#define M3_2_1    0
+#define M3_3_0    0
+#define M3_3_1    1
+#define M3_4_0    1
+#define M3_4_1    2
+#define M3_5_0    2
+#define M3_5_1    0
+#define M3_6_0    0
+#define M3_6_1    1
+#define M3_7_0    1
+#define M3_7_1    2
+#define M3_8_0    2
+#define M3_8_1    0
+#define M3_9_0    0
+#define M3_9_1    1
+#define M3_10_0   1
+#define M3_10_1   2
+#define M3_11_0   2
+#define M3_11_1   0
+#define M3_12_0   0
+#define M3_12_1   1
+#define M3_13_0   1
+#define M3_13_1   2
+#define M3_14_0   2
+#define M3_14_1   0
+#define M3_15_0   0
+#define M3_15_1   1
+#define M3_16_0   1
+#define M3_16_1   2
+#define M3_17_0   2
+#define M3_17_1   0
+#define M3_18_0   0
+#define M3_18_1   1
+
+#define XCAT(x, y)     XCAT_(x, y)
+#define XCAT_(x, y)    x ## y
+
+#if 0
+/* obsolete */
+#define SKSI(k, s, i)   XCAT(k, XCAT(XCAT(XCAT(M5_, s), _), i))
+#define SKST(t, s, v)   XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
+#endif
+
+#define SKBI(k, s, i)   XCAT(k, XCAT(XCAT(XCAT(M9_, s), _), i))
+#define SKBT(t, s, v)   XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
+
+#if 0
+/* obsolete */
+#define TFSMALL_KINIT(k0, k1, k2, k3, k4, t0, t1, t2)   do { \
+		k4 = (k0 ^ k1) ^ (k2 ^ k3) ^ SPH_C64(0x1BD11BDAA9FC1A22); \
+		t2 = t0 ^ t1; \
+	} while (0)
+#endif
+
+#define TFBIG_KINIT(k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2)   do { \
+		k8 = ((k0 ^ k1) ^ (k2 ^ k3)) ^ ((k4 ^ k5) ^ (k6 ^ k7)) \
+			^ SPH_C64(0x1BD11BDAA9FC1A22); \
+		t2 = t0 ^ t1; \
+	} while (0)
+
+#if 0
+/* obsolete */
+#define TFSMALL_ADDKEY(w0, w1, w2, w3, k, t, s)   do { \
+		w0 = SPH_T64(w0 + SKSI(k, s, 0)); \
+		w1 = SPH_T64(w1 + SKSI(k, s, 1) + SKST(t, s, 0)); \
+		w2 = SPH_T64(w2 + SKSI(k, s, 2) + SKST(t, s, 1)); \
+		w3 = SPH_T64(w3 + SKSI(k, s, 3) + (sph_u64)s); \
+	} while (0)
+#endif
+
+#if SPH_SMALL_FOOTPRINT_SKEIN
+
+#define TFBIG_ADDKEY(s, tt0, tt1)   do { \
+		p0 = SPH_T64(p0 + h[s + 0]); \
+		p1 = SPH_T64(p1 + h[s + 1]); \
+		p2 = SPH_T64(p2 + h[s + 2]); \
+		p3 = SPH_T64(p3 + h[s + 3]); \
+		p4 = SPH_T64(p4 + h[s + 4]); \
+		p5 = SPH_T64(p5 + h[s + 5] + tt0); \
+		p6 = SPH_T64(p6 + h[s + 6] + tt1); \
+		p7 = SPH_T64(p7 + h[s + 7] + (sph_u64)s); \
+	} while (0)
+
+#else
+
+#define TFBIG_ADDKEY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s)   do { \
+		w0 = SPH_T64(w0 + SKBI(k, s, 0)); \
+		w1 = SPH_T64(w1 + SKBI(k, s, 1)); \
+		w2 = SPH_T64(w2 + SKBI(k, s, 2)); \
+		w3 = SPH_T64(w3 + SKBI(k, s, 3)); \
+		w4 = SPH_T64(w4 + SKBI(k, s, 4)); \
+		w5 = SPH_T64(w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \
+		w6 = SPH_T64(w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \
+		w7 = SPH_T64(w7 + SKBI(k, s, 7) + (sph_u64)s); \
+	} while (0)
+
+#endif
+
+#if 0
+/* obsolete */
+#define TFSMALL_MIX(x0, x1, rc)   do { \
+		x0 = SPH_T64(x0 + x1); \
+		x1 = SPH_ROTL64(x1, rc) ^ x0; \
+	} while (0)
+#endif
+
+#define TFBIG_MIX(x0, x1, rc)   do { \
+		x0 = SPH_T64(x0 + x1); \
+		x1 = SPH_ROTL64(x1, rc) ^ x0; \
+	} while (0)
+
+#if 0
+/* obsolete */
+#define TFSMALL_MIX4(w0, w1, w2, w3, rc0, rc1)  do { \
+		TFSMALL_MIX(w0, w1, rc0); \
+		TFSMALL_MIX(w2, w3, rc1); \
+	} while (0)
+#endif
+
+#define TFBIG_MIX8(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3)  do { \
+		TFBIG_MIX(w0, w1, rc0); \
+		TFBIG_MIX(w2, w3, rc1); \
+		TFBIG_MIX(w4, w5, rc2); \
+		TFBIG_MIX(w6, w7, rc3); \
+	} while (0)
+
+#if 0
+/* obsolete */
+#define TFSMALL_4e(s)   do { \
+		TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, s); \
+		TFSMALL_MIX4(p0, p1, p2, p3, 14, 16); \
+		TFSMALL_MIX4(p0, p3, p2, p1, 52, 57); \
+		TFSMALL_MIX4(p0, p1, p2, p3, 23, 40); \
+		TFSMALL_MIX4(p0, p3, p2, p1,  5, 37); \
+	} while (0)
+
+#define TFSMALL_4o(s)   do { \
+		TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, s); \
+		TFSMALL_MIX4(p0, p1, p2, p3, 25, 33); \
+		TFSMALL_MIX4(p0, p3, p2, p1, 46, 12); \
+		TFSMALL_MIX4(p0, p1, p2, p3, 58, 22); \
+		TFSMALL_MIX4(p0, p3, p2, p1, 32, 32); \
+	} while (0)
+#endif
+
+#if SPH_SMALL_FOOTPRINT_SKEIN
+
+#define TFBIG_4e(s)   do { \
+		TFBIG_ADDKEY(s, t0, t1); \
+		TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \
+		TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \
+		TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \
+		TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 44,  9, 54, 56); \
+	} while (0)
+
+#define TFBIG_4o(s)   do { \
+		TFBIG_ADDKEY(s, t1, t2); \
+		TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \
+		TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \
+		TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \
+		TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3,  8, 35, 56, 22); \
+	} while (0)
+
+#else
+
+#define TFBIG_4e(s)   do { \
+		TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
+		TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \
+		TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \
+		TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \
+		TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 44,  9, 54, 56); \
+	} while (0)
+
+#define TFBIG_4o(s)   do { \
+		TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
+		TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \
+		TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \
+		TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \
+		TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3,  8, 35, 56, 22); \
+	} while (0)
+
+#endif
+
+#if 0
+/* obsolete */
+#define UBI_SMALL(etype, extra)  do { \
+		sph_u64 h4, t0, t1, t2; \
+		sph_u64 m0 = sph_dec64le(buf +  0); \
+		sph_u64 m1 = sph_dec64le(buf +  8); \
+		sph_u64 m2 = sph_dec64le(buf + 16); \
+		sph_u64 m3 = sph_dec64le(buf + 24); \
+		sph_u64 p0 = m0; \
+		sph_u64 p1 = m1; \
+		sph_u64 p2 = m2; \
+		sph_u64 p3 = m3; \
+		t0 = SPH_T64(bcount << 5) + (sph_u64)(extra); \
+		t1 = (bcount >> 59) + ((sph_u64)(etype) << 55); \
+		TFSMALL_KINIT(h0, h1, h2, h3, h4, t0, t1, t2); \
+		TFSMALL_4e(0); \
+		TFSMALL_4o(1); \
+		TFSMALL_4e(2); \
+		TFSMALL_4o(3); \
+		TFSMALL_4e(4); \
+		TFSMALL_4o(5); \
+		TFSMALL_4e(6); \
+		TFSMALL_4o(7); \
+		TFSMALL_4e(8); \
+		TFSMALL_4o(9); \
+		TFSMALL_4e(10); \
+		TFSMALL_4o(11); \
+		TFSMALL_4e(12); \
+		TFSMALL_4o(13); \
+		TFSMALL_4e(14); \
+		TFSMALL_4o(15); \
+		TFSMALL_4e(16); \
+		TFSMALL_4o(17); \
+		TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, 18); \
+		h0 = m0 ^ p0; \
+		h1 = m1 ^ p1; \
+		h2 = m2 ^ p2; \
+		h3 = m3 ^ p3; \
+	} while (0)
+#endif
+
+#if SPH_SMALL_FOOTPRINT_SKEIN
+
+#define UBI_BIG(etype, extra)  do { \
+		sph_u64 t0, t1, t2; \
+		unsigned u; \
+		sph_u64 m0 = sph_dec64le_aligned(buf +  0); \
+		sph_u64 m1 = sph_dec64le_aligned(buf +  8); \
+		sph_u64 m2 = sph_dec64le_aligned(buf + 16); \
+		sph_u64 m3 = sph_dec64le_aligned(buf + 24); \
+		sph_u64 m4 = sph_dec64le_aligned(buf + 32); \
+		sph_u64 m5 = sph_dec64le_aligned(buf + 40); \
+		sph_u64 m6 = sph_dec64le_aligned(buf + 48); \
+		sph_u64 m7 = sph_dec64le_aligned(buf + 56); \
+		sph_u64 p0 = m0; \
+		sph_u64 p1 = m1; \
+		sph_u64 p2 = m2; \
+		sph_u64 p3 = m3; \
+		sph_u64 p4 = m4; \
+		sph_u64 p5 = m5; \
+		sph_u64 p6 = m6; \
+		sph_u64 p7 = m7; \
+		t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
+		t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
+		TFBIG_KINIT(h[0], h[1], h[2], h[3], h[4], h[5], \
+			h[6], h[7], h[8], t0, t1, t2); \
+		for (u = 0; u <= 15; u += 3) { \
+			h[u +  9] = h[u + 0]; \
+			h[u + 10] = h[u + 1]; \
+			h[u + 11] = h[u + 2]; \
+		} \
+		for (u = 0; u < 9; u ++) { \
+			sph_u64 s = u << 1; \
+			sph_u64 tmp; \
+			TFBIG_4e(s); \
+			TFBIG_4o(s + 1); \
+			tmp = t2; \
+			t2 = t1; \
+			t1 = t0; \
+			t0 = tmp; \
+		} \
+		TFBIG_ADDKEY(18, t0, t1); \
+		h[0] = m0 ^ p0; \
+		h[1] = m1 ^ p1; \
+		h[2] = m2 ^ p2; \
+		h[3] = m3 ^ p3; \
+		h[4] = m4 ^ p4; \
+		h[5] = m5 ^ p5; \
+		h[6] = m6 ^ p6; \
+		h[7] = m7 ^ p7; \
+	} while (0)
+
+#else
+
+#define UBI_BIG(etype, extra)  do { \
+		sph_u64 h8, t0, t1, t2; \
+		sph_u64 m0 = sph_dec64le_aligned(buf +  0); \
+		sph_u64 m1 = sph_dec64le_aligned(buf +  8); \
+		sph_u64 m2 = sph_dec64le_aligned(buf + 16); \
+		sph_u64 m3 = sph_dec64le_aligned(buf + 24); \
+		sph_u64 m4 = sph_dec64le_aligned(buf + 32); \
+		sph_u64 m5 = sph_dec64le_aligned(buf + 40); \
+		sph_u64 m6 = sph_dec64le_aligned(buf + 48); \
+		sph_u64 m7 = sph_dec64le_aligned(buf + 56); \
+		sph_u64 p0 = m0; \
+		sph_u64 p1 = m1; \
+		sph_u64 p2 = m2; \
+		sph_u64 p3 = m3; \
+		sph_u64 p4 = m4; \
+		sph_u64 p5 = m5; \
+		sph_u64 p6 = m6; \
+		sph_u64 p7 = m7; \
+		t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
+		t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
+		TFBIG_KINIT(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
+		TFBIG_4e(0); \
+		TFBIG_4o(1); \
+		TFBIG_4e(2); \
+		TFBIG_4o(3); \
+		TFBIG_4e(4); \
+		TFBIG_4o(5); \
+		TFBIG_4e(6); \
+		TFBIG_4o(7); \
+		TFBIG_4e(8); \
+		TFBIG_4o(9); \
+		TFBIG_4e(10); \
+		TFBIG_4o(11); \
+		TFBIG_4e(12); \
+		TFBIG_4o(13); \
+		TFBIG_4e(14); \
+		TFBIG_4o(15); \
+		TFBIG_4e(16); \
+		TFBIG_4o(17); \
+		TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, 18); \
+		h0 = m0 ^ p0; \
+		h1 = m1 ^ p1; \
+		h2 = m2 ^ p2; \
+		h3 = m3 ^ p3; \
+		h4 = m4 ^ p4; \
+		h5 = m5 ^ p5; \
+		h6 = m6 ^ p6; \
+		h7 = m7 ^ p7; \
+	} while (0)
+
+#endif
+
+#if 0
+/* obsolete */
+#define DECL_STATE_SMALL \
+	sph_u64 h0, h1, h2, h3; \
+	sph_u64 bcount;
+
+#define READ_STATE_SMALL(sc)   do { \
+		h0 = (sc)->h0; \
+		h1 = (sc)->h1; \
+		h2 = (sc)->h2; \
+		h3 = (sc)->h3; \
+		bcount = sc->bcount; \
+	} while (0)
+
+#define WRITE_STATE_SMALL(sc)   do { \
+		(sc)->h0 = h0; \
+		(sc)->h1 = h1; \
+		(sc)->h2 = h2; \
+		(sc)->h3 = h3; \
+		sc->bcount = bcount; \
+	} while (0)
+#endif
+
+#if SPH_SMALL_FOOTPRINT_SKEIN
+
+#define DECL_STATE_BIG \
+	sph_u64 h[27]; \
+	sph_u64 bcount;
+
+#define READ_STATE_BIG(sc)   do { \
+		h[0] = (sc)->h0; \
+		h[1] = (sc)->h1; \
+		h[2] = (sc)->h2; \
+		h[3] = (sc)->h3; \
+		h[4] = (sc)->h4; \
+		h[5] = (sc)->h5; \
+		h[6] = (sc)->h6; \
+		h[7] = (sc)->h7; \
+		bcount = sc->bcount; \
+	} while (0)
+
+#define WRITE_STATE_BIG(sc)   do { \
+		(sc)->h0 = h[0]; \
+		(sc)->h1 = h[1]; \
+		(sc)->h2 = h[2]; \
+		(sc)->h3 = h[3]; \
+		(sc)->h4 = h[4]; \
+		(sc)->h5 = h[5]; \
+		(sc)->h6 = h[6]; \
+		(sc)->h7 = h[7]; \
+		sc->bcount = bcount; \
+	} while (0)
+
+#else
+
+#define DECL_STATE_BIG \
+	sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; \
+	sph_u64 bcount;
+
+#define READ_STATE_BIG(sc)   do { \
+		h0 = (sc)->h0; \
+		h1 = (sc)->h1; \
+		h2 = (sc)->h2; \
+		h3 = (sc)->h3; \
+		h4 = (sc)->h4; \
+		h5 = (sc)->h5; \
+		h6 = (sc)->h6; \
+		h7 = (sc)->h7; \
+		bcount = sc->bcount; \
+	} while (0)
+
+#define WRITE_STATE_BIG(sc)   do { \
+		(sc)->h0 = h0; \
+		(sc)->h1 = h1; \
+		(sc)->h2 = h2; \
+		(sc)->h3 = h3; \
+		(sc)->h4 = h4; \
+		(sc)->h5 = h5; \
+		(sc)->h6 = h6; \
+		(sc)->h7 = h7; \
+		sc->bcount = bcount; \
+	} while (0)
+
+#endif
+
+#if 0
+/* obsolete */
+static void
+skein_small_init(sph_skein_small_context *sc, const sph_u64 *iv)
+{
+	sc->h0 = iv[0];
+	sc->h1 = iv[1];
+	sc->h2 = iv[2];
+	sc->h3 = iv[3];
+	sc->bcount = 0;
+	sc->ptr = 0;
+}
+#endif
+
+static void
+skein_big_init(sph_skein_big_context *sc, const sph_u64 *iv)
+{
+	sc->h0 = iv[0];
+	sc->h1 = iv[1];
+	sc->h2 = iv[2];
+	sc->h3 = iv[3];
+	sc->h4 = iv[4];
+	sc->h5 = iv[5];
+	sc->h6 = iv[6];
+	sc->h7 = iv[7];
+	sc->bcount = 0;
+	sc->ptr = 0;
+}
+
+#if 0
+/* obsolete */
+static void
+skein_small_core(sph_skein_small_context *sc, const void *data, size_t len)
+{
+	unsigned char *buf;
+	size_t ptr, clen;
+	unsigned first;
+	DECL_STATE_SMALL
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+	clen = (sizeof sc->buf) - ptr;
+	if (len <= clen) {
+		memcpy(buf + ptr, data, len);
+		sc->ptr = ptr + len;
+		return;
+	}
+	if (clen != 0) {
+		memcpy(buf + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+	}
+
+#if SPH_SMALL_FOOTPRINT_SKEIN
+
+	READ_STATE_SMALL(sc);
+	first = (bcount == 0) << 7;
+	for (;;) {
+		bcount ++;
+		UBI_SMALL(96 + first, 0);
+		if (len <= sizeof sc->buf)
+			break;
+		first = 0;
+		memcpy(buf, data, sizeof sc->buf);
+		data = (const unsigned char *)data + sizeof sc->buf;
+		len -= sizeof sc->buf;
+	}
+	WRITE_STATE_SMALL(sc);
+	sc->ptr = len;
+	memcpy(buf, data, len);
+
+#else
+
+	/*
+	 * Unrolling the loop yields a slight performance boost, while
+	 * keeping the code size aorund 24 kB on 32-bit x86.
+	 */
+	READ_STATE_SMALL(sc);
+	first = (bcount == 0) << 7;
+	for (;;) {
+		bcount ++;
+		UBI_SMALL(96 + first, 0);
+		if (len <= sizeof sc->buf)
+			break;
+		buf = (unsigned char *)data;
+		bcount ++;
+		UBI_SMALL(96, 0);
+		if (len <= 2 * sizeof sc->buf) {
+			data = buf + sizeof sc->buf;
+			len -= sizeof sc->buf;
+			break;
+		}
+		buf += sizeof sc->buf;
+		data = buf + sizeof sc->buf;
+		first = 0;
+		len -= 2 * sizeof sc->buf;
+	}
+	WRITE_STATE_SMALL(sc);
+	sc->ptr = len;
+	memcpy(sc->buf, data, len);
+
+#endif
+}
+#endif
+
+static void
+skein_big_core(sph_skein_big_context *sc, const void *data, size_t len)
+{
+	/*
+	 * The Skein "final bit" in the tweak is troublesome here,
+	 * because if the input has a length which is a multiple of the
+	 * block size (512 bits) then that bit must be set for the
+	 * final block, which is full of message bits (padding in
+	 * Skein can be reduced to no extra bit at all). However, this
+	 * function cannot know whether it processes the last chunks of
+	 * the message or not. Hence we may keep a full block of buffered
+	 * data (64 bytes).
+	 */
+	unsigned char *buf;
+	size_t ptr;
+	unsigned first;
+	DECL_STATE_BIG
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+	if (len <= (sizeof sc->buf) - ptr) {
+		memcpy(buf + ptr, data, len);
+		ptr += len;
+		sc->ptr = ptr;
+		return;
+	}
+
+	READ_STATE_BIG(sc);
+	first = (bcount == 0) << 7;
+	do {
+		size_t clen;
+
+		if (ptr == sizeof sc->buf) {
+			bcount ++;
+			UBI_BIG(96 + first, 0);
+			first = 0;
+			ptr = 0;
+		}
+		clen = (sizeof sc->buf) - ptr;
+		if (clen > len)
+			clen = len;
+		memcpy(buf + ptr, data, clen);
+		ptr += clen;
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+	} while (len > 0);
+	WRITE_STATE_BIG(sc);
+	sc->ptr = ptr;
+}
+
+#if 0
+/* obsolete */
+static void
+skein_small_close(sph_skein_small_context *sc, unsigned ub, unsigned n,
+	void *dst, size_t out_len)
+{
+	unsigned char *buf;
+	size_t ptr;
+	unsigned et;
+	int i;
+	DECL_STATE_SMALL
+
+	if (n != 0) {
+		unsigned z;
+		unsigned char x;
+
+		z = 0x80 >> n;
+		x = ((ub & -z) | z) & 0xFF;
+		skein_small_core(sc, &x, 1);
+	}
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+	READ_STATE_SMALL(sc);
+	memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+	et = 352 + ((bcount == 0) << 7) + (n != 0);
+	for (i = 0; i < 2; i ++) {
+		UBI_SMALL(et, ptr);
+		if (i == 0) {
+			memset(buf, 0, sizeof sc->buf);
+			bcount = 0;
+			et = 510;
+			ptr = 8;
+		}
+	}
+
+	sph_enc64le_aligned(buf +  0, h0);
+	sph_enc64le_aligned(buf +  8, h1);
+	sph_enc64le_aligned(buf + 16, h2);
+	sph_enc64le_aligned(buf + 24, h3);
+	memcpy(dst, buf, out_len);
+}
+#endif
+
+static void
+skein_big_close(sph_skein_big_context *sc, unsigned ub, unsigned n,
+	void *dst, size_t out_len)
+{
+	unsigned char *buf;
+	size_t ptr;
+	unsigned et;
+	int i;
+#if SPH_SMALL_FOOTPRINT_SKEIN
+	size_t u;
+#endif
+	DECL_STATE_BIG
+
+	/*
+	 * Add bit padding if necessary.
+	 */
+	if (n != 0) {
+		unsigned z;
+		unsigned char x;
+
+		z = 0x80 >> n;
+		x = ((ub & -z) | z) & 0xFF;
+		skein_big_core(sc, &x, 1);
+	}
+
+	buf = sc->buf;
+	ptr = sc->ptr;
+
+	/*
+	 * At that point, if ptr == 0, then the message was empty;
+	 * otherwise, there is between 1 and 64 bytes (inclusive) which
+	 * are yet to be processed. Either way, we complete the buffer
+	 * to a full block with zeros (the Skein specification mandates
+	 * that an empty message is padded so that there is at least
+	 * one block to process).
+	 *
+	 * Once this block has been processed, we do it again, with
+	 * a block full of zeros, for the output (that block contains
+	 * the encoding of "0", over 8 bytes, then padded with zeros).
+	 */
+	READ_STATE_BIG(sc);
+	memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
+	et = 352 + ((bcount == 0) << 7) + (n != 0);
+	for (i = 0; i < 2; i ++) {
+		UBI_BIG(et, ptr);
+		if (i == 0) {
+			memset(buf, 0, sizeof sc->buf);
+			bcount = 0;
+			et = 510;
+			ptr = 8;
+		}
+	}
+
+#if SPH_SMALL_FOOTPRINT_SKEIN
+
+	/*
+	 * We use a temporary buffer because we must support the case
+	 * where output size is not a multiple of 64 (namely, a 224-bit
+	 * output).
+	 */
+	for (u = 0; u < out_len; u += 8)
+		sph_enc64le_aligned(buf + u, h[u >> 3]);
+	memcpy(dst, buf, out_len);
+
+#else
+
+	sph_enc64le_aligned(buf +  0, h0);
+	sph_enc64le_aligned(buf +  8, h1);
+	sph_enc64le_aligned(buf + 16, h2);
+	sph_enc64le_aligned(buf + 24, h3);
+	sph_enc64le_aligned(buf + 32, h4);
+	sph_enc64le_aligned(buf + 40, h5);
+	sph_enc64le_aligned(buf + 48, h6);
+	sph_enc64le_aligned(buf + 56, h7);
+	memcpy(dst, buf, out_len);
+
+#endif
+}
+
+#if 0
+/* obsolete */
+static const sph_u64 IV224[] = {
+	SPH_C64(0xC6098A8C9AE5EA0B), SPH_C64(0x876D568608C5191C),
+	SPH_C64(0x99CB88D7D7F53884), SPH_C64(0x384BDDB1AEDDB5DE)
+};
+
+static const sph_u64 IV256[] = {
+	SPH_C64(0xFC9DA860D048B449), SPH_C64(0x2FCA66479FA7D833),
+	SPH_C64(0xB33BC3896656840F), SPH_C64(0x6A54E920FDE8DA69)
+};
+#endif
+
+static const sph_u64 IV224[] = {
+	SPH_C64(0xCCD0616248677224), SPH_C64(0xCBA65CF3A92339EF),
+	SPH_C64(0x8CCD69D652FF4B64), SPH_C64(0x398AED7B3AB890B4),
+	SPH_C64(0x0F59D1B1457D2BD0), SPH_C64(0x6776FE6575D4EB3D),
+	SPH_C64(0x99FBC70E997413E9), SPH_C64(0x9E2CFCCFE1C41EF7)
+};
+
+static const sph_u64 IV256[] = {
+	SPH_C64(0xCCD044A12FDB3E13), SPH_C64(0xE83590301A79A9EB),
+	SPH_C64(0x55AEA0614F816E6F), SPH_C64(0x2A2767A4AE9B94DB),
+	SPH_C64(0xEC06025E74DD7683), SPH_C64(0xE7A436CDC4746251),
+	SPH_C64(0xC36FBAF9393AD185), SPH_C64(0x3EEDBA1833EDFC13)
+};
+
+static const sph_u64 IV384[] = {
+	SPH_C64(0xA3F6C6BF3A75EF5F), SPH_C64(0xB0FEF9CCFD84FAA4),
+	SPH_C64(0x9D77DD663D770CFE), SPH_C64(0xD798CBF3B468FDDA),
+	SPH_C64(0x1BC4A6668A0E4465), SPH_C64(0x7ED7D434E5807407),
+	SPH_C64(0x548FC1ACD4EC44D6), SPH_C64(0x266E17546AA18FF8)
+};
+
+static const sph_u64 IV512[] = {
+	SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
+	SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
+	SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
+	SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
+};
+
+#if 0
+/* obsolete */
+/* see sph_skein.h */
+void
+sph_skein224_init(void *cc)
+{
+	skein_small_init(cc, IV224);
+}
+
+/* see sph_skein.h */
+void
+sph_skein224(void *cc, const void *data, size_t len)
+{
+	skein_small_core(cc, data, len);
+}
+
+/* see sph_skein.h */
+void
+sph_skein224_close(void *cc, void *dst)
+{
+	sph_skein224_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_skein.h */
+void
+sph_skein224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	skein_small_close(cc, ub, n, dst, 28);
+	sph_skein224_init(cc);
+}
+
+/* see sph_skein.h */
+void
+sph_skein256_init(void *cc)
+{
+	skein_small_init(cc, IV256);
+}
+
+/* see sph_skein.h */
+void
+sph_skein256(void *cc, const void *data, size_t len)
+{
+	skein_small_core(cc, data, len);
+}
+
+/* see sph_skein.h */
+void
+sph_skein256_close(void *cc, void *dst)
+{
+	sph_skein256_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_skein.h */
+void
+sph_skein256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	skein_small_close(cc, ub, n, dst, 32);
+	sph_skein256_init(cc);
+}
+#endif
+
+/* see sph_skein.h */
+void
+sph_skein224_init(void *cc)
+{
+	skein_big_init(cc, IV224);
+}
+
+/* see sph_skein.h */
+void
+sph_skein224(void *cc, const void *data, size_t len)
+{
+	skein_big_core(cc, data, len);
+}
+
+/* see sph_skein.h */
+void
+sph_skein224_close(void *cc, void *dst)
+{
+	sph_skein224_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_skein.h */
+void
+sph_skein224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	skein_big_close(cc, ub, n, dst, 28);
+	sph_skein224_init(cc);
+}
+
+/* see sph_skein.h */
+void
+sph_skein256_init(void *cc)
+{
+	skein_big_init(cc, IV256);
+}
+
+/* see sph_skein.h */
+void
+sph_skein256(void *cc, const void *data, size_t len)
+{
+	skein_big_core(cc, data, len);
+}
+
+/* see sph_skein.h */
+void
+sph_skein256_close(void *cc, void *dst)
+{
+	sph_skein256_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_skein.h */
+void
+sph_skein256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	skein_big_close(cc, ub, n, dst, 32);
+	sph_skein256_init(cc);
+}
+
+/* see sph_skein.h */
+void
+sph_skein384_init(void *cc)
+{
+	skein_big_init(cc, IV384);
+}
+
+/* see sph_skein.h */
+void
+sph_skein384(void *cc, const void *data, size_t len)
+{
+	skein_big_core(cc, data, len);
+}
+
+/* see sph_skein.h */
+void
+sph_skein384_close(void *cc, void *dst)
+{
+	sph_skein384_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_skein.h */
+void
+sph_skein384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	skein_big_close(cc, ub, n, dst, 48);
+	sph_skein384_init(cc);
+}
+
+/* see sph_skein.h */
+void
+sph_skein512_init(void *cc)
+{
+	skein_big_init(cc, IV512);
+}
+
+/* see sph_skein.h */
+void
+sph_skein512(void *cc, const void *data, size_t len)
+{
+	skein_big_core(cc, data, len);
+}
+
+/* see sph_skein.h */
+void
+sph_skein512_close(void *cc, void *dst)
+{
+	sph_skein512_addbits_and_close(cc, 0, 0, dst);
+}
+
+/* see sph_skein.h */
+void
+sph_skein512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
+{
+	skein_big_close(cc, ub, n, dst, 64);
+	sph_skein512_init(cc);
+}
+
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/sha3/sph_skein.h b/sha3/sph_skein.h
new file mode 100644
index 0000000..181e6fd
--- /dev/null
+++ b/sha3/sph_skein.h
@@ -0,0 +1,298 @@
+/* $Id: sph_skein.h 253 2011-06-07 18:33:10Z tp $ */
+/**
+ * Skein interface. The Skein specification defines three main
+ * functions, called Skein-256, Skein-512 and Skein-1024, which can be
+ * further parameterized with an output length. For the SHA-3
+ * competition, Skein-512 is used for output sizes of 224, 256, 384 and
+ * 512 bits; this is what this code implements. Thus, we hereafter call
+ * Skein-224, Skein-256, Skein-384 and Skein-512 what the Skein
+ * specification defines as Skein-512-224, Skein-512-256, Skein-512-384
+ * and Skein-512-512, respectively.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_skein.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_SKEIN_H__
+#define SPH_SKEIN_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#include <stddef.h>
+#include "sph_types.h"
+
+#if SPH_64
+
+/**
+ * Output size (in bits) for Skein-224.
+ */
+#define SPH_SIZE_skein224   224
+
+/**
+ * Output size (in bits) for Skein-256.
+ */
+#define SPH_SIZE_skein256   256
+
+/**
+ * Output size (in bits) for Skein-384.
+ */
+#define SPH_SIZE_skein384   384
+
+/**
+ * Output size (in bits) for Skein-512.
+ */
+#define SPH_SIZE_skein512   512
+
+/**
+ * This structure is a context for Skein computations (with a 384- or
+ * 512-bit output): it contains the intermediate values and some data
+ * from the last entered block. Once a Skein computation has been
+ * performed, the context can be reused for another computation.
+ *
+ * The contents of this structure are private. A running Skein computation
+ * can be cloned by copying the context (e.g. with a simple
+ * <code>memcpy()</code>).
+ */
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+	unsigned char buf[64];    /* first field, for alignment */
+	size_t ptr;
+	sph_u64 h0, h1, h2, h3, h4, h5, h6, h7;
+	sph_u64 bcount;
+#endif
+} sph_skein_big_context;
+
+/**
+ * Type for a Skein-224 context (identical to the common "big" context).
+ */
+typedef sph_skein_big_context sph_skein224_context;
+
+/**
+ * Type for a Skein-256 context (identical to the common "big" context).
+ */
+typedef sph_skein_big_context sph_skein256_context;
+
+/**
+ * Type for a Skein-384 context (identical to the common "big" context).
+ */
+typedef sph_skein_big_context sph_skein384_context;
+
+/**
+ * Type for a Skein-512 context (identical to the common "big" context).
+ */
+typedef sph_skein_big_context sph_skein512_context;
+
+/**
+ * Initialize a Skein-224 context. This process performs no memory allocation.
+ *
+ * @param cc   the Skein-224 context (pointer to a
+ *             <code>sph_skein224_context</code>)
+ */
+void sph_skein224_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Skein-224 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_skein224(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Skein-224 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (28 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Skein-224 context
+ * @param dst   the destination buffer
+ */
+void sph_skein224_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (28 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Skein-224 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_skein224_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Skein-256 context. This process performs no memory allocation.
+ *
+ * @param cc   the Skein-256 context (pointer to a
+ *             <code>sph_skein256_context</code>)
+ */
+void sph_skein256_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Skein-256 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_skein256(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Skein-256 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (32 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Skein-256 context
+ * @param dst   the destination buffer
+ */
+void sph_skein256_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (32 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Skein-256 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_skein256_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Skein-384 context. This process performs no memory allocation.
+ *
+ * @param cc   the Skein-384 context (pointer to a
+ *             <code>sph_skein384_context</code>)
+ */
+void sph_skein384_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Skein-384 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_skein384(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Skein-384 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (48 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Skein-384 context
+ * @param dst   the destination buffer
+ */
+void sph_skein384_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (48 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Skein-384 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_skein384_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+/**
+ * Initialize a Skein-512 context. This process performs no memory allocation.
+ *
+ * @param cc   the Skein-512 context (pointer to a
+ *             <code>sph_skein512_context</code>)
+ */
+void sph_skein512_init(void *cc);
+
+/**
+ * Process some data bytes. It is acceptable that <code>len</code> is zero
+ * (in which case this function does nothing).
+ *
+ * @param cc     the Skein-512 context
+ * @param data   the input data
+ * @param len    the input data length (in bytes)
+ */
+void sph_skein512(void *cc, const void *data, size_t len);
+
+/**
+ * Terminate the current Skein-512 computation and output the result into
+ * the provided buffer. The destination buffer must be wide enough to
+ * accomodate the result (64 bytes). The context is automatically
+ * reinitialized.
+ *
+ * @param cc    the Skein-512 context
+ * @param dst   the destination buffer
+ */
+void sph_skein512_close(void *cc, void *dst);
+
+/**
+ * Add a few additional bits (0 to 7) to the current computation, then
+ * terminate it and output the result in the provided buffer, which must
+ * be wide enough to accomodate the result (64 bytes). If bit number i
+ * in <code>ub</code> has value 2^i, then the extra bits are those
+ * numbered 7 downto 8-n (this is the big-endian convention at the byte
+ * level). The context is automatically reinitialized.
+ *
+ * @param cc    the Skein-512 context
+ * @param ub    the extra bits
+ * @param n     the number of extra bits (0 to 7)
+ * @param dst   the destination buffer
+ */
+void sph_skein512_addbits_and_close(
+	void *cc, unsigned ub, unsigned n, void *dst);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/sha3/sph_types.h b/sha3/sph_types.h
new file mode 100644
index 0000000..ee0e474
--- /dev/null
+++ b/sha3/sph_types.h
@@ -0,0 +1,1986 @@
+/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */
+/**
+ * Basic type definitions.
+ *
+ * This header file defines the generic integer types that will be used
+ * for the implementation of hash functions; it also contains helper
+ * functions which encode and decode multi-byte integer values, using
+ * either little-endian or big-endian conventions.
+ *
+ * This file contains a compile-time test on the size of a byte
+ * (the <code>unsigned char</code> C type). If bytes are not octets,
+ * i.e. if they do not have a size of exactly 8 bits, then compilation
+ * is aborted. Architectures where bytes are not octets are relatively
+ * rare, even in the embedded devices market. We forbid non-octet bytes
+ * because there is no clear convention on how octet streams are encoded
+ * on such systems.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @file     sph_types.h
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#ifndef SPH_TYPES_H__
+#define SPH_TYPES_H__
+
+#include <limits.h>
+
+/*
+ * All our I/O functions are defined over octet streams. We do not know
+ * how to handle input data if bytes are not octets.
+ */
+#if CHAR_BIT != 8
+#error This code requires 8-bit bytes
+#endif
+
+/* ============= BEGIN documentation block for Doxygen ============ */
+
+#ifdef DOXYGEN_IGNORE
+
+/** @mainpage sphlib C code documentation
+ *
+ * @section overview Overview
+ *
+ * <code>sphlib</code> is a library which contains implementations of
+ * various cryptographic hash functions. These pages have been generated
+ * with <a href="http://www.doxygen.org/index.html">doxygen</a> and
+ * document the API for the C implementations.
+ *
+ * The API is described in appropriate header files, which are available
+ * in the "Files" section. Each hash function family has its own header,
+ * whose name begins with <code>"sph_"</code> and contains the family
+ * name. For instance, the API for the RIPEMD hash functions is available
+ * in the header file <code>sph_ripemd.h</code>.
+ *
+ * @section principles API structure and conventions
+ *
+ * @subsection io Input/output conventions
+ *
+ * In all generality, hash functions operate over strings of bits.
+ * Individual bits are rarely encountered in C programming or actual
+ * communication protocols; most protocols converge on the ubiquitous
+ * "octet" which is a group of eight bits. Data is thus expressed as a
+ * stream of octets. The C programming language contains the notion of a
+ * "byte", which is a data unit managed under the type <code>"unsigned
+ * char"</code>. The C standard prescribes that a byte should hold at
+ * least eight bits, but possibly more. Most modern architectures, even
+ * in the embedded world, feature eight-bit bytes, i.e. map bytes to
+ * octets.
+ *
+ * Nevertheless, for some of the implemented hash functions, an extra
+ * API has been added, which allows the input of arbitrary sequences of
+ * bits: when the computation is about to be closed, 1 to 7 extra bits
+ * can be added. The functions for which this API is implemented include
+ * the SHA-2 functions and all SHA-3 candidates.
+ *
+ * <code>sphlib</code> defines hash function which may hash octet streams,
+ * i.e. streams of bits where the number of bits is a multiple of eight.
+ * The data input functions in the <code>sphlib</code> API expect data
+ * as anonymous pointers (<code>"const void *"</code>) with a length
+ * (of type <code>"size_t"</code>) which gives the input data chunk length
+ * in bytes. A byte is assumed to be an octet; the <code>sph_types.h</code>
+ * header contains a compile-time test which prevents compilation on
+ * architectures where this property is not met.
+ *
+ * The hash function output is also converted into bytes. All currently
+ * implemented hash functions have an output width which is a multiple of
+ * eight, and this is likely to remain true for new designs.
+ *
+ * Most hash functions internally convert input data into 32-bit of 64-bit
+ * words, using either little-endian or big-endian conversion. The hash
+ * output also often consists of such words, which are encoded into output
+ * bytes with a similar endianness convention. Some hash functions have
+ * been only loosely specified on that subject; when necessary,
+ * <code>sphlib</code> has been tested against published "reference"
+ * implementations in order to use the same conventions.
+ *
+ * @subsection shortname Function short name
+ *
+ * Each implemented hash function has a "short name" which is used
+ * internally to derive the identifiers for the functions and context
+ * structures which the function uses. For instance, MD5 has the short
+ * name <code>"md5"</code>. Short names are listed in the next section,
+ * for the implemented hash functions. In subsequent sections, the
+ * short name will be assumed to be <code>"XXX"</code>: replace with the
+ * actual hash function name to get the C identifier.
+ *
+ * Note: some functions within the same family share the same core
+ * elements, such as update function or context structure. Correspondingly,
+ * some of the defined types or functions may actually be macros which
+ * transparently evaluate to another type or function name.
+ *
+ * @subsection context Context structure
+ *
+ * Each implemented hash fonction has its own context structure, available
+ * under the type name <code>"sph_XXX_context"</code> for the hash function
+ * with short name <code>"XXX"</code>. This structure holds all needed
+ * state for a running hash computation.
+ *
+ * The contents of these structures are meant to be opaque, and private
+ * to the implementation. However, these contents are specified in the
+ * header files so that application code which uses <code>sphlib</code>
+ * may access the size of those structures.
+ *
+ * The caller is responsible for allocating the context structure,
+ * whether by dynamic allocation (<code>malloc()</code> or equivalent),
+ * static allocation (a global permanent variable), as an automatic
+ * variable ("on the stack"), or by any other mean which ensures proper
+ * structure alignment. <code>sphlib</code> code performs no dynamic
+ * allocation by itself.
+ *
+ * The context must be initialized before use, using the
+ * <code>sph_XXX_init()</code> function. This function sets the context
+ * state to proper initial values for hashing.
+ *
+ * Since all state data is contained within the context structure,
+ * <code>sphlib</code> is thread-safe and reentrant: several hash
+ * computations may be performed in parallel, provided that they do not
+ * operate on the same context. Moreover, a running computation can be
+ * cloned by copying the context (with a simple <code>memcpy()</code>):
+ * the context and its clone are then independant and may be updated
+ * with new data and/or closed without interfering with each other.
+ * Similarly, a context structure can be moved in memory at will:
+ * context structures contain no pointer, in particular no pointer to
+ * themselves.
+ *
+ * @subsection dataio Data input
+ *
+ * Hashed data is input with the <code>sph_XXX()</code> fonction, which
+ * takes as parameters a pointer to the context, a pointer to the data
+ * to hash, and the number of data bytes to hash. The context is updated
+ * with the new data.
+ *
+ * Data can be input in one or several calls, with arbitrary input lengths.
+ * However, it is best, performance wise, to input data by relatively big
+ * chunks (say a few kilobytes), because this allows <code>sphlib</code> to
+ * optimize things and avoid internal copying.
+ *
+ * When all data has been input, the context can be closed with
+ * <code>sph_XXX_close()</code>. The hash output is computed and written
+ * into the provided buffer. The caller must take care to provide a
+ * buffer of appropriate length; e.g., when using SHA-1, the output is
+ * a 20-byte word, therefore the output buffer must be at least 20-byte
+ * long.
+ *
+ * For some hash functions, the <code>sph_XXX_addbits_and_close()</code>
+ * function can be used instead of <code>sph_XXX_close()</code>. This
+ * function can take a few extra <strong>bits</strong> to be added at
+ * the end of the input message. This allows hashing messages with a
+ * bit length which is not a multiple of 8. The extra bits are provided
+ * as an unsigned integer value, and a bit count. The bit count must be
+ * between 0 and 7, inclusive. The extra bits are provided as bits 7 to
+ * 0 (bits of numerical value 128, 64, 32... downto 0), in that order.
+ * For instance, to add three bits of value 1, 1 and 0, the unsigned
+ * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count
+ * will be 3.
+ *
+ * The <code>SPH_SIZE_XXX</code> macro is defined for each hash function;
+ * it evaluates to the function output size, expressed in bits. For instance,
+ * <code>SPH_SIZE_sha1</code> evaluates to <code>160</code>.
+ *
+ * When closed, the context is automatically reinitialized and can be
+ * immediately used for another computation. It is not necessary to call
+ * <code>sph_XXX_init()</code> after a close. Note that
+ * <code>sph_XXX_init()</code> can still be called to "reset" a context,
+ * i.e. forget previously input data, and get back to the initial state.
+ *
+ * @subsection alignment Data alignment
+ *
+ * "Alignment" is a property of data, which is said to be "properly
+ * aligned" when its emplacement in memory is such that the data can
+ * be optimally read by full words. This depends on the type of access;
+ * basically, some hash functions will read data by 32-bit or 64-bit
+ * words. <code>sphlib</code> does not mandate such alignment for input
+ * data, but using aligned data can substantially improve performance.
+ *
+ * As a rule, it is best to input data by chunks whose length (in bytes)
+ * is a multiple of eight, and which begins at "generally aligned"
+ * addresses, such as the base address returned by a call to
+ * <code>malloc()</code>.
+ *
+ * @section functions Implemented functions
+ *
+ * We give here the list of implemented functions. They are grouped by
+ * family; to each family corresponds a specific header file. Each
+ * individual function has its associated "short name". Please refer to
+ * the documentation for that header file to get details on the hash
+ * function denomination and provenance.
+ *
+ * Note: the functions marked with a '(64)' in the list below are
+ * available only if the C compiler provides an integer type of length
+ * 64 bits or more. Such a type is mandatory in the latest C standard
+ * (ISO 9899:1999, aka "C99") and is present in several older compilers
+ * as well, so chances are that such a type is available.
+ *
+ * - HAVAL family: file <code>sph_haval.h</code>
+ *   - HAVAL-128/3 (128-bit, 3 passes): short name: <code>haval128_3</code>
+ *   - HAVAL-128/4 (128-bit, 4 passes): short name: <code>haval128_4</code>
+ *   - HAVAL-128/5 (128-bit, 5 passes): short name: <code>haval128_5</code>
+ *   - HAVAL-160/3 (160-bit, 3 passes): short name: <code>haval160_3</code>
+ *   - HAVAL-160/4 (160-bit, 4 passes): short name: <code>haval160_4</code>
+ *   - HAVAL-160/5 (160-bit, 5 passes): short name: <code>haval160_5</code>
+ *   - HAVAL-192/3 (192-bit, 3 passes): short name: <code>haval192_3</code>
+ *   - HAVAL-192/4 (192-bit, 4 passes): short name: <code>haval192_4</code>
+ *   - HAVAL-192/5 (192-bit, 5 passes): short name: <code>haval192_5</code>
+ *   - HAVAL-224/3 (224-bit, 3 passes): short name: <code>haval224_3</code>
+ *   - HAVAL-224/4 (224-bit, 4 passes): short name: <code>haval224_4</code>
+ *   - HAVAL-224/5 (224-bit, 5 passes): short name: <code>haval224_5</code>
+ *   - HAVAL-256/3 (256-bit, 3 passes): short name: <code>haval256_3</code>
+ *   - HAVAL-256/4 (256-bit, 4 passes): short name: <code>haval256_4</code>
+ *   - HAVAL-256/5 (256-bit, 5 passes): short name: <code>haval256_5</code>
+ * - MD2: file <code>sph_md2.h</code>, short name: <code>md2</code>
+ * - MD4: file <code>sph_md4.h</code>, short name: <code>md4</code>
+ * - MD5: file <code>sph_md5.h</code>, short name: <code>md5</code>
+ * - PANAMA: file <code>sph_panama.h</code>, short name: <code>panama</code>
+ * - RadioGatun family: file <code>sph_radiogatun.h</code>
+ *   - RadioGatun[32]: short name: <code>radiogatun32</code>
+ *   - RadioGatun[64]: short name: <code>radiogatun64</code> (64)
+ * - RIPEMD family: file <code>sph_ripemd.h</code>
+ *   - RIPEMD: short name: <code>ripemd</code>
+ *   - RIPEMD-128: short name: <code>ripemd128</code>
+ *   - RIPEMD-160: short name: <code>ripemd160</code>
+ * - SHA-0: file <code>sph_sha0.h</code>, short name: <code>sha0</code>
+ * - SHA-1: file <code>sph_sha1.h</code>, short name: <code>sha1</code>
+ * - SHA-2 family, 32-bit hashes: file <code>sph_sha2.h</code>
+ *   - SHA-224: short name: <code>sha224</code>
+ *   - SHA-256: short name: <code>sha256</code>
+ *   - SHA-384: short name: <code>sha384</code> (64)
+ *   - SHA-512: short name: <code>sha512</code> (64)
+ * - Tiger family: file <code>sph_tiger.h</code>
+ *   - Tiger: short name: <code>tiger</code> (64)
+ *   - Tiger2: short name: <code>tiger2</code> (64)
+ * - WHIRLPOOL family: file <code>sph_whirlpool.h</code>
+ *   - WHIRLPOOL-0: short name: <code>whirlpool0</code> (64)
+ *   - WHIRLPOOL-1: short name: <code>whirlpool1</code> (64)
+ *   - WHIRLPOOL: short name: <code>whirlpool</code> (64)
+ *
+ * The fourteen second-round SHA-3 candidates are also implemented;
+ * when applicable, the implementations follow the "final" specifications
+ * as published for the third round of the SHA-3 competition (BLAKE,
+ * Groestl, JH, Keccak and Skein have been tweaked for third round).
+ *
+ * - BLAKE family: file <code>sph_blake.h</code>
+ *   - BLAKE-224: short name: <code>blake224</code>
+ *   - BLAKE-256: short name: <code>blake256</code>
+ *   - BLAKE-384: short name: <code>blake384</code>
+ *   - BLAKE-512: short name: <code>blake512</code>
+ * - BMW (Blue Midnight Wish) family: file <code>sph_bmw.h</code>
+ *   - BMW-224: short name: <code>bmw224</code>
+ *   - BMW-256: short name: <code>bmw256</code>
+ *   - BMW-384: short name: <code>bmw384</code> (64)
+ *   - BMW-512: short name: <code>bmw512</code> (64)
+ * - CubeHash family: file <code>sph_cubehash.h</code> (specified as
+ *   CubeHash16/32 in the CubeHash specification)
+ *   - CubeHash-224: short name: <code>cubehash224</code>
+ *   - CubeHash-256: short name: <code>cubehash256</code>
+ *   - CubeHash-384: short name: <code>cubehash384</code>
+ *   - CubeHash-512: short name: <code>cubehash512</code>
+ * - ECHO family: file <code>sph_echo.h</code>
+ *   - ECHO-224: short name: <code>echo224</code>
+ *   - ECHO-256: short name: <code>echo256</code>
+ *   - ECHO-384: short name: <code>echo384</code>
+ *   - ECHO-512: short name: <code>echo512</code>
+ * - Fugue family: file <code>sph_fugue.h</code>
+ *   - Fugue-224: short name: <code>fugue224</code>
+ *   - Fugue-256: short name: <code>fugue256</code>
+ *   - Fugue-384: short name: <code>fugue384</code>
+ *   - Fugue-512: short name: <code>fugue512</code>
+ * - Groestl family: file <code>sph_groestl.h</code>
+ *   - Groestl-224: short name: <code>groestl224</code>
+ *   - Groestl-256: short name: <code>groestl256</code>
+ *   - Groestl-384: short name: <code>groestl384</code>
+ *   - Groestl-512: short name: <code>groestl512</code>
+ * - Hamsi family: file <code>sph_hamsi.h</code>
+ *   - Hamsi-224: short name: <code>hamsi224</code>
+ *   - Hamsi-256: short name: <code>hamsi256</code>
+ *   - Hamsi-384: short name: <code>hamsi384</code>
+ *   - Hamsi-512: short name: <code>hamsi512</code>
+ * - JH family: file <code>sph_jh.h</code>
+ *   - JH-224: short name: <code>jh224</code>
+ *   - JH-256: short name: <code>jh256</code>
+ *   - JH-384: short name: <code>jh384</code>
+ *   - JH-512: short name: <code>jh512</code>
+ * - Keccak family: file <code>sph_keccak.h</code>
+ *   - Keccak-224: short name: <code>keccak224</code>
+ *   - Keccak-256: short name: <code>keccak256</code>
+ *   - Keccak-384: short name: <code>keccak384</code>
+ *   - Keccak-512: short name: <code>keccak512</code>
+ * - Luffa family: file <code>sph_luffa.h</code>
+ *   - Luffa-224: short name: <code>luffa224</code>
+ *   - Luffa-256: short name: <code>luffa256</code>
+ *   - Luffa-384: short name: <code>luffa384</code>
+ *   - Luffa-512: short name: <code>luffa512</code>
+ * - Shabal family: file <code>sph_shabal.h</code>
+ *   - Shabal-192: short name: <code>shabal192</code>
+ *   - Shabal-224: short name: <code>shabal224</code>
+ *   - Shabal-256: short name: <code>shabal256</code>
+ *   - Shabal-384: short name: <code>shabal384</code>
+ *   - Shabal-512: short name: <code>shabal512</code>
+ * - SHAvite-3 family: file <code>sph_shavite.h</code>
+ *   - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"):
+ *     short name: <code>shabal224</code>
+ *   - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"):
+ *     short name: <code>shabal256</code>
+ *   - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"):
+ *     short name: <code>shabal384</code>
+ *   - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"):
+ *     short name: <code>shabal512</code>
+ * - SIMD family: file <code>sph_simd.h</code>
+ *   - SIMD-224: short name: <code>simd224</code>
+ *   - SIMD-256: short name: <code>simd256</code>
+ *   - SIMD-384: short name: <code>simd384</code>
+ *   - SIMD-512: short name: <code>simd512</code>
+ * - Skein family: file <code>sph_skein.h</code>
+ *   - Skein-224 (nominally specified as Skein-512-224): short name:
+ *     <code>skein224</code> (64)
+ *   - Skein-256 (nominally specified as Skein-512-256): short name:
+ *     <code>skein256</code> (64)
+ *   - Skein-384 (nominally specified as Skein-512-384): short name:
+ *     <code>skein384</code> (64)
+ *   - Skein-512 (nominally specified as Skein-512-512): short name:
+ *     <code>skein512</code> (64)
+ *
+ * For the second-round SHA-3 candidates, the functions are as specified
+ * for round 2, i.e. with the "tweaks" that some candidates added
+ * between round 1 and round 2. Also, some of the submitted packages for
+ * round 2 contained errors, in the specification, reference code, or
+ * both. <code>sphlib</code> implements the corrected versions.
+ */
+
+/** @hideinitializer
+ * Unsigned integer type whose length is at least 32 bits; on most
+ * architectures, it will have a width of exactly 32 bits. Unsigned C
+ * types implement arithmetics modulo a power of 2; use the
+ * <code>SPH_T32()</code> macro to ensure that the value is truncated
+ * to exactly 32 bits. Unless otherwise specified, all macros and
+ * functions which accept <code>sph_u32</code> values assume that these
+ * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures
+ * where <code>sph_u32</code> is larger than that.
+ */
+typedef __arch_dependant__ sph_u32;
+
+/** @hideinitializer
+ * Signed integer type corresponding to <code>sph_u32</code>; it has
+ * width 32 bits or more.
+ */
+typedef __arch_dependant__ sph_s32;
+
+/** @hideinitializer
+ * Unsigned integer type whose length is at least 64 bits; on most
+ * architectures which feature such a type, it will have a width of
+ * exactly 64 bits. C99-compliant platform will have this type; it
+ * is also defined when the GNU compiler (gcc) is used, and on
+ * platforms where <code>unsigned long</code> is large enough. If this
+ * type is not available, then some hash functions which depends on
+ * a 64-bit type will not be available (most notably SHA-384, SHA-512,
+ * Tiger and WHIRLPOOL).
+ */
+typedef __arch_dependant__ sph_u64;
+
+/** @hideinitializer
+ * Signed integer type corresponding to <code>sph_u64</code>; it has
+ * width 64 bits or more.
+ */
+typedef __arch_dependant__ sph_s64;
+
+/**
+ * This macro expands the token <code>x</code> into a suitable
+ * constant expression of type <code>sph_u32</code>. Depending on
+ * how this type is defined, a suffix such as <code>UL</code> may
+ * be appended to the argument.
+ *
+ * @param x   the token to expand into a suitable constant expression
+ */
+#define SPH_C32(x)
+
+/**
+ * Truncate a 32-bit value to exactly 32 bits. On most systems, this is
+ * a no-op, recognized as such by the compiler.
+ *
+ * @param x   the value to truncate (of type <code>sph_u32</code>)
+ */
+#define SPH_T32(x)
+
+/**
+ * Rotate a 32-bit value by a number of bits to the left. The rotate
+ * count must reside between 1 and 31. This macro assumes that its
+ * first argument fits in 32 bits (no extra bit allowed on machines where
+ * <code>sph_u32</code> is wider); both arguments may be evaluated
+ * several times.
+ *
+ * @param x   the value to rotate (of type <code>sph_u32</code>)
+ * @param n   the rotation count (between 1 and 31, inclusive)
+ */
+#define SPH_ROTL32(x, n)
+
+/**
+ * Rotate a 32-bit value by a number of bits to the left. The rotate
+ * count must reside between 1 and 31. This macro assumes that its
+ * first argument fits in 32 bits (no extra bit allowed on machines where
+ * <code>sph_u32</code> is wider); both arguments may be evaluated
+ * several times.
+ *
+ * @param x   the value to rotate (of type <code>sph_u32</code>)
+ * @param n   the rotation count (between 1 and 31, inclusive)
+ */
+#define SPH_ROTR32(x, n)
+
+/**
+ * This macro is defined on systems for which a 64-bit type has been
+ * detected, and is used for <code>sph_u64</code>.
+ */
+#define SPH_64
+
+/**
+ * This macro is defined on systems for the "native" integer size is
+ * 64 bits (64-bit values fit in one register).
+ */
+#define SPH_64_TRUE
+
+/**
+ * This macro expands the token <code>x</code> into a suitable
+ * constant expression of type <code>sph_u64</code>. Depending on
+ * how this type is defined, a suffix such as <code>ULL</code> may
+ * be appended to the argument. This macro is defined only if a
+ * 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param x   the token to expand into a suitable constant expression
+ */
+#define SPH_C64(x)
+
+/**
+ * Truncate a 64-bit value to exactly 64 bits. On most systems, this is
+ * a no-op, recognized as such by the compiler. This macro is defined only
+ * if a 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param x   the value to truncate (of type <code>sph_u64</code>)
+ */
+#define SPH_T64(x)
+
+/**
+ * Rotate a 64-bit value by a number of bits to the left. The rotate
+ * count must reside between 1 and 63. This macro assumes that its
+ * first argument fits in 64 bits (no extra bit allowed on machines where
+ * <code>sph_u64</code> is wider); both arguments may be evaluated
+ * several times. This macro is defined only if a 64-bit type was detected
+ * and used for <code>sph_u64</code>.
+ *
+ * @param x   the value to rotate (of type <code>sph_u64</code>)
+ * @param n   the rotation count (between 1 and 63, inclusive)
+ */
+#define SPH_ROTL64(x, n)
+
+/**
+ * Rotate a 64-bit value by a number of bits to the left. The rotate
+ * count must reside between 1 and 63. This macro assumes that its
+ * first argument fits in 64 bits (no extra bit allowed on machines where
+ * <code>sph_u64</code> is wider); both arguments may be evaluated
+ * several times. This macro is defined only if a 64-bit type was detected
+ * and used for <code>sph_u64</code>.
+ *
+ * @param x   the value to rotate (of type <code>sph_u64</code>)
+ * @param n   the rotation count (between 1 and 63, inclusive)
+ */
+#define SPH_ROTR64(x, n)
+
+/**
+ * This macro evaluates to <code>inline</code> or an equivalent construction,
+ * if available on the compilation platform, or to nothing otherwise. This
+ * is used to declare inline functions, for which the compiler should
+ * endeavour to include the code directly in the caller. Inline functions
+ * are typically defined in header files as replacement for macros.
+ */
+#define SPH_INLINE
+
+/**
+ * This macro is defined if the platform has been detected as using
+ * little-endian convention. This implies that the <code>sph_u32</code>
+ * type (and the <code>sph_u64</code> type also, if it is defined) has
+ * an exact width (i.e. exactly 32-bit, respectively 64-bit).
+ */
+#define SPH_LITTLE_ENDIAN
+
+/**
+ * This macro is defined if the platform has been detected as using
+ * big-endian convention. This implies that the <code>sph_u32</code>
+ * type (and the <code>sph_u64</code> type also, if it is defined) has
+ * an exact width (i.e. exactly 32-bit, respectively 64-bit).
+ */
+#define SPH_BIG_ENDIAN
+
+/**
+ * This macro is defined if 32-bit words (and 64-bit words, if defined)
+ * can be read from and written to memory efficiently in little-endian
+ * convention. This is the case for little-endian platforms, and also
+ * for the big-endian platforms which have special little-endian access
+ * opcodes (e.g. Ultrasparc).
+ */
+#define SPH_LITTLE_FAST
+
+/**
+ * This macro is defined if 32-bit words (and 64-bit words, if defined)
+ * can be read from and written to memory efficiently in big-endian
+ * convention. This is the case for little-endian platforms, and also
+ * for the little-endian platforms which have special big-endian access
+ * opcodes.
+ */
+#define SPH_BIG_FAST
+
+/**
+ * On some platforms, this macro is defined to an unsigned integer type
+ * into which pointer values may be cast. The resulting value can then
+ * be tested for being a multiple of 2, 4 or 8, indicating an aligned
+ * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses.
+ */
+#define SPH_UPTR
+
+/**
+ * When defined, this macro indicates that unaligned memory accesses
+ * are possible with only a minor penalty, and thus should be prefered
+ * over strategies which first copy data to an aligned buffer.
+ */
+#define SPH_UNALIGNED
+
+/**
+ * Byte-swap a 32-bit word (i.e. <code>0x12345678</code> becomes
+ * <code>0x78563412</code>). This is an inline function which resorts
+ * to inline assembly on some platforms, for better performance.
+ *
+ * @param x   the 32-bit value to byte-swap
+ * @return  the byte-swapped value
+ */
+static inline sph_u32 sph_bswap32(sph_u32 x);
+
+/**
+ * Byte-swap a 64-bit word. This is an inline function which resorts
+ * to inline assembly on some platforms, for better performance. This
+ * function is defined only if a suitable 64-bit type was found for
+ * <code>sph_u64</code>
+ *
+ * @param x   the 64-bit value to byte-swap
+ * @return  the byte-swapped value
+ */
+static inline sph_u64 sph_bswap64(sph_u64 x);
+
+/**
+ * Decode a 16-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first).
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline unsigned sph_dec16le(const void *src);
+
+/**
+ * Encode a 16-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first).
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc16le(void *dst, unsigned val);
+
+/**
+ * Decode a 16-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first).
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline unsigned sph_dec16be(const void *src);
+
+/**
+ * Encode a 16-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first).
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc16be(void *dst, unsigned val);
+
+/**
+ * Decode a 32-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first).
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u32 sph_dec32le(const void *src);
+
+/**
+ * Decode a 32-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first). This function assumes that the
+ * source address is suitably aligned for a direct access, if the platform
+ * supports such things; it can thus be marginally faster than the generic
+ * <code>sph_dec32le()</code> function.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u32 sph_dec32le_aligned(const void *src);
+
+/**
+ * Encode a 32-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first).
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc32le(void *dst, sph_u32 val);
+
+/**
+ * Encode a 32-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first). This function assumes that the
+ * destination address is suitably aligned for a direct access, if the
+ * platform supports such things; it can thus be marginally faster than
+ * the generic <code>sph_enc32le()</code> function.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc32le_aligned(void *dst, sph_u32 val);
+
+/**
+ * Decode a 32-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first).
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u32 sph_dec32be(const void *src);
+
+/**
+ * Decode a 32-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first). This function assumes that the
+ * source address is suitably aligned for a direct access, if the platform
+ * supports such things; it can thus be marginally faster than the generic
+ * <code>sph_dec32be()</code> function.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u32 sph_dec32be_aligned(const void *src);
+
+/**
+ * Encode a 32-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first).
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc32be(void *dst, sph_u32 val);
+
+/**
+ * Encode a 32-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first). This function assumes that the
+ * destination address is suitably aligned for a direct access, if the
+ * platform supports such things; it can thus be marginally faster than
+ * the generic <code>sph_enc32be()</code> function.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc32be_aligned(void *dst, sph_u32 val);
+
+/**
+ * Decode a 64-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first). This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u64 sph_dec64le(const void *src);
+
+/**
+ * Decode a 64-bit unsigned value from memory, in little-endian convention
+ * (least significant byte comes first). This function assumes that the
+ * source address is suitably aligned for a direct access, if the platform
+ * supports such things; it can thus be marginally faster than the generic
+ * <code>sph_dec64le()</code> function. This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u64 sph_dec64le_aligned(const void *src);
+
+/**
+ * Encode a 64-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first). This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc64le(void *dst, sph_u64 val);
+
+/**
+ * Encode a 64-bit unsigned value into memory, in little-endian convention
+ * (least significant byte comes first). This function assumes that the
+ * destination address is suitably aligned for a direct access, if the
+ * platform supports such things; it can thus be marginally faster than
+ * the generic <code>sph_enc64le()</code> function. This function is defined
+ * only if a suitable 64-bit type was detected and used for
+ * <code>sph_u64</code>.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc64le_aligned(void *dst, sph_u64 val);
+
+/**
+ * Decode a 64-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first). This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u64 sph_dec64be(const void *src);
+
+/**
+ * Decode a 64-bit unsigned value from memory, in big-endian convention
+ * (most significant byte comes first). This function assumes that the
+ * source address is suitably aligned for a direct access, if the platform
+ * supports such things; it can thus be marginally faster than the generic
+ * <code>sph_dec64be()</code> function. This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param src   the source address
+ * @return  the decoded value
+ */
+static inline sph_u64 sph_dec64be_aligned(const void *src);
+
+/**
+ * Encode a 64-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first). This function is defined only
+ * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc64be(void *dst, sph_u64 val);
+
+/**
+ * Encode a 64-bit unsigned value into memory, in big-endian convention
+ * (most significant byte comes first). This function assumes that the
+ * destination address is suitably aligned for a direct access, if the
+ * platform supports such things; it can thus be marginally faster than
+ * the generic <code>sph_enc64be()</code> function. This function is defined
+ * only if a suitable 64-bit type was detected and used for
+ * <code>sph_u64</code>.
+ *
+ * @param dst   the destination buffer
+ * @param val   the value to encode
+ */
+static inline void sph_enc64be_aligned(void *dst, sph_u64 val);
+
+#endif
+
+/* ============== END documentation block for Doxygen ============= */
+
+#ifndef DOXYGEN_IGNORE
+
+/*
+ * We want to define the types "sph_u32" and "sph_u64" which hold
+ * unsigned values of at least, respectively, 32 and 64 bits. These
+ * tests should select appropriate types for most platforms. The
+ * macro "SPH_64" is defined if the 64-bit is supported.
+ */
+
+#undef SPH_64
+#undef SPH_64_TRUE
+
+#if defined __STDC__ && __STDC_VERSION__ >= 199901L
+
+/*
+ * On C99 implementations, we can use <stdint.h> to get an exact 64-bit
+ * type, if any, or otherwise use a wider type (which must exist, for
+ * C99 conformance).
+ */
+
+#include <stdint.h>
+
+#ifdef UINT32_MAX
+typedef uint32_t sph_u32;
+typedef int32_t sph_s32;
+#else
+typedef uint_fast32_t sph_u32;
+typedef int_fast32_t sph_s32;
+#endif
+#if !SPH_NO_64
+#ifdef UINT64_MAX
+typedef uint64_t sph_u64;
+typedef int64_t sph_s64;
+#else
+typedef uint_fast64_t sph_u64;
+typedef int_fast64_t sph_s64;
+#endif
+#endif
+
+#define SPH_C32(x)    ((sph_u32)(x))
+#if !SPH_NO_64
+#define SPH_C64(x)    ((sph_u64)(x))
+#define SPH_64  1
+#endif
+
+#else
+
+/*
+ * On non-C99 systems, we use "unsigned int" if it is wide enough,
+ * "unsigned long" otherwise. This supports all "reasonable" architectures.
+ * We have to be cautious: pre-C99 preprocessors handle constants
+ * differently in '#if' expressions. Hence the shifts to test UINT_MAX.
+ */
+
+#if ((UINT_MAX >> 11) >> 11) >= 0x3FF
+
+typedef unsigned int sph_u32;
+typedef int sph_s32;
+
+#define SPH_C32(x)    ((sph_u32)(x ## U))
+
+#else
+
+typedef unsigned long sph_u32;
+typedef long sph_s32;
+
+#define SPH_C32(x)    ((sph_u32)(x ## UL))
+
+#endif
+
+#if !SPH_NO_64
+
+/*
+ * We want a 64-bit type. We use "unsigned long" if it is wide enough (as
+ * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
+ * "unsigned long long" otherwise, if available. We use ULLONG_MAX to
+ * test whether "unsigned long long" is available; we also know that
+ * gcc features this type, even if the libc header do not know it.
+ */
+
+#if ((ULONG_MAX >> 31) >> 31) >= 3
+
+typedef unsigned long sph_u64;
+typedef long sph_s64;
+
+#define SPH_C64(x)    ((sph_u64)(x ## UL))
+
+#define SPH_64  1
+
+#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
+
+typedef unsigned long long sph_u64;
+typedef long long sph_s64;
+
+#define SPH_C64(x)    ((sph_u64)(x ## ULL))
+
+#define SPH_64  1
+
+#else
+
+/*
+ * No 64-bit type...
+ */
+
+#endif
+
+#endif
+
+#endif
+
+/*
+ * If the "unsigned long" type has length 64 bits or more, then this is
+ * a "true" 64-bit architectures. This is also true with Visual C on
+ * amd64, even though the "long" type is limited to 32 bits.
+ */
+#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
+#define SPH_64_TRUE   1
+#endif
+
+/*
+ * Implementation note: some processors have specific opcodes to perform
+ * a rotation. Recent versions of gcc recognize the expression above and
+ * use the relevant opcodes, when appropriate.
+ */
+
+#define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+#ifdef _MSC_VER
+#define SPH_ROTL32(x, n)   _rotl(x, n)
+#define SPH_ROTR32(x, n)   _rotr(x, n)
+#else
+#define SPH_ROTL32(x, n)   SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
+#define SPH_ROTR32(x, n)   SPH_ROTL32(x, (32 - (n)))
+#endif
+
+#if SPH_64
+
+#define SPH_T64(x)    ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
+#ifdef _MSC_VER
+#define SPH_ROTL64(x, n)   _rotl64(x, n)
+#define SPH_ROTR64(x, n)   _rotr64(x, n)
+#else
+#define SPH_ROTL64(x, n)   SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
+#define SPH_ROTR64(x, n)   SPH_ROTL64(x, (64 - (n)))
+#endif
+
+#endif
+
+#ifndef DOXYGEN_IGNORE
+/*
+ * Define SPH_INLINE to be an "inline" qualifier, if available. We define
+ * some small macro-like functions which benefit greatly from being inlined.
+ */
+#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
+#define SPH_INLINE inline
+#elif defined _MSC_VER
+#define SPH_INLINE __inline
+#else
+#define SPH_INLINE
+#endif
+#endif
+
+/*
+ * We define some macros which qualify the architecture. These macros
+ * may be explicit set externally (e.g. as compiler parameters). The
+ * code below sets those macros if they are not already defined.
+ *
+ * Most macros are boolean, thus evaluate to either zero or non-zero.
+ * The SPH_UPTR macro is special, in that it evaluates to a C type,
+ * or is not defined.
+ *
+ * SPH_UPTR             if defined: unsigned type to cast pointers into
+ *
+ * SPH_UNALIGNED        non-zero if unaligned accesses are efficient
+ * SPH_LITTLE_ENDIAN    non-zero if architecture is known to be little-endian
+ * SPH_BIG_ENDIAN       non-zero if architecture is known to be big-endian
+ * SPH_LITTLE_FAST      non-zero if little-endian decoding is fast
+ * SPH_BIG_FAST         non-zero if big-endian decoding is fast
+ *
+ * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
+ * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN
+ * _must_ be non-zero in those situations. The 32-bit and 64-bit types
+ * _must_ also have an exact width.
+ *
+ * SPH_SPARCV9_GCC_32   UltraSPARC-compatible with gcc, 32-bit mode
+ * SPH_SPARCV9_GCC_64   UltraSPARC-compatible with gcc, 64-bit mode
+ * SPH_SPARCV9_GCC      UltraSPARC-compatible with gcc
+ * SPH_I386_GCC         x86-compatible (32-bit) with gcc
+ * SPH_I386_MSVC        x86-compatible (32-bit) with Microsoft Visual C
+ * SPH_AMD64_GCC        x86-compatible (64-bit) with gcc
+ * SPH_AMD64_MSVC       x86-compatible (64-bit) with Microsoft Visual C
+ * SPH_PPC32_GCC        PowerPC, 32-bit, with gcc
+ * SPH_PPC64_GCC        PowerPC, 64-bit, with gcc
+ *
+ * TODO: enhance automatic detection, for more architectures and compilers.
+ * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with
+ * some very fast functions (e.g. MD4) when using unaligned input data.
+ * The CPU-specific-with-GCC macros are useful only for inline assembly,
+ * normally restrained to this header file.
+ */
+
+/*
+ * 32-bit x86, aka "i386 compatible".
+ */
+#if defined __i386__ || defined _M_IX86
+
+#define SPH_DETECT_UNALIGNED         1
+#define SPH_DETECT_LITTLE_ENDIAN     1
+#define SPH_DETECT_UPTR              sph_u32
+#ifdef __GNUC__
+#define SPH_DETECT_I386_GCC          1
+#endif
+#ifdef _MSC_VER
+#define SPH_DETECT_I386_MSVC         1
+#endif
+
+/*
+ * 64-bit x86, hereafter known as "amd64".
+ */
+#elif defined __x86_64 || defined _M_X64
+
+#define SPH_DETECT_UNALIGNED         1
+#define SPH_DETECT_LITTLE_ENDIAN     1
+#define SPH_DETECT_UPTR              sph_u64
+#ifdef __GNUC__
+#define SPH_DETECT_AMD64_GCC         1
+#endif
+#ifdef _MSC_VER
+#define SPH_DETECT_AMD64_MSVC        1
+#endif
+
+/*
+ * 64-bit Sparc architecture (implies v9).
+ */
+#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
+	|| defined __sparcv9
+
+#define SPH_DETECT_BIG_ENDIAN        1
+#define SPH_DETECT_UPTR              sph_u64
+#ifdef __GNUC__
+#define SPH_DETECT_SPARCV9_GCC_64    1
+#define SPH_DETECT_LITTLE_FAST       1
+#endif
+
+/*
+ * 32-bit Sparc.
+ */
+#elif (defined __sparc__ || defined __sparc) \
+	&& !(defined __sparcv9 || defined __arch64__)
+
+#define SPH_DETECT_BIG_ENDIAN        1
+#define SPH_DETECT_UPTR              sph_u32
+#if defined __GNUC__ && defined __sparc_v9__
+#define SPH_DETECT_SPARCV9_GCC_32    1
+#define SPH_DETECT_LITTLE_FAST       1
+#endif
+
+/*
+ * ARM, little-endian.
+ */
+#elif defined __arm__ && __ARMEL__
+
+#define SPH_DETECT_LITTLE_ENDIAN     1
+
+/*
+ * MIPS, little-endian.
+ */
+#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
+
+#define SPH_DETECT_LITTLE_ENDIAN     1
+
+/*
+ * MIPS, big-endian.
+ */
+#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
+
+#define SPH_DETECT_BIG_ENDIAN        1
+
+/*
+ * PowerPC.
+ */
+#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
+	|| defined _ARCH_PPC
+
+/*
+ * Note: we do not declare cross-endian access to be "fast": even if
+ * using inline assembly, implementation should still assume that
+ * keeping the decoded word in a temporary is faster than decoding
+ * it again.
+ */
+#if defined __GNUC__
+#if SPH_64_TRUE
+#define SPH_DETECT_PPC64_GCC         1
+#else
+#define SPH_DETECT_PPC32_GCC         1
+#endif
+#endif
+
+#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
+#define SPH_DETECT_BIG_ENDIAN        1
+#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+#define SPH_DETECT_LITTLE_ENDIAN     1
+#endif
+
+/*
+ * Itanium, 64-bit.
+ */
+#elif defined __ia64 || defined __ia64__ \
+	|| defined __itanium__ || defined _M_IA64
+
+#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
+#define SPH_DETECT_BIG_ENDIAN        1
+#else
+#define SPH_DETECT_LITTLE_ENDIAN     1
+#endif
+#if defined __LP64__ || defined _LP64
+#define SPH_DETECT_UPTR              sph_u64
+#else
+#define SPH_DETECT_UPTR              sph_u32
+#endif
+
+#endif
+
+#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64
+#define SPH_DETECT_SPARCV9_GCC       1
+#endif
+
+#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED
+#define SPH_UNALIGNED         SPH_DETECT_UNALIGNED
+#endif
+#if defined SPH_DETECT_UPTR && !defined SPH_UPTR
+#define SPH_UPTR              SPH_DETECT_UPTR
+#endif
+#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN
+#define SPH_LITTLE_ENDIAN     SPH_DETECT_LITTLE_ENDIAN
+#endif
+#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN
+#define SPH_BIG_ENDIAN        SPH_DETECT_BIG_ENDIAN
+#endif
+#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST
+#define SPH_LITTLE_FAST       SPH_DETECT_LITTLE_FAST
+#endif
+#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST
+#define SPH_BIG_FAST    SPH_DETECT_BIG_FAST
+#endif
+#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32
+#define SPH_SPARCV9_GCC_32    SPH_DETECT_SPARCV9_GCC_32
+#endif
+#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64
+#define SPH_SPARCV9_GCC_64    SPH_DETECT_SPARCV9_GCC_64
+#endif
+#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC
+#define SPH_SPARCV9_GCC       SPH_DETECT_SPARCV9_GCC
+#endif
+#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC
+#define SPH_I386_GCC          SPH_DETECT_I386_GCC
+#endif
+#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC
+#define SPH_I386_MSVC         SPH_DETECT_I386_MSVC
+#endif
+#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC
+#define SPH_AMD64_GCC         SPH_DETECT_AMD64_GCC
+#endif
+#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC
+#define SPH_AMD64_MSVC        SPH_DETECT_AMD64_MSVC
+#endif
+#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC
+#define SPH_PPC32_GCC         SPH_DETECT_PPC32_GCC
+#endif
+#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC
+#define SPH_PPC64_GCC         SPH_DETECT_PPC64_GCC
+#endif
+
+#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST
+#define SPH_LITTLE_FAST              1
+#endif
+#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST
+#define SPH_BIG_FAST                 1
+#endif
+
+#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN)
+#error SPH_UPTR defined, but endianness is not known.
+#endif
+
+#if SPH_I386_GCC && !SPH_NO_ASM
+
+/*
+ * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
+ * values.
+ */
+
+static SPH_INLINE sph_u32
+sph_bswap32(sph_u32 x)
+{
+	__asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
+	return x;
+}
+
+#if SPH_64
+
+static SPH_INLINE sph_u64
+sph_bswap64(sph_u64 x)
+{
+	return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
+		| (sph_u64)sph_bswap32((sph_u32)(x >> 32));
+}
+
+#endif
+
+#elif SPH_AMD64_GCC && !SPH_NO_ASM
+
+/*
+ * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
+ * and 64-bit values.
+ */
+
+static SPH_INLINE sph_u32
+sph_bswap32(sph_u32 x)
+{
+	__asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
+	return x;
+}
+
+#if SPH_64
+
+static SPH_INLINE sph_u64
+sph_bswap64(sph_u64 x)
+{
+	__asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
+	return x;
+}
+
+#endif
+
+/*
+ * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
+ * to generate proper opcodes for endianness swapping with the pure C
+ * implementation below.
+ *
+
+#elif SPH_I386_MSVC && !SPH_NO_ASM
+
+static __inline sph_u32 __declspec(naked) __fastcall
+sph_bswap32(sph_u32 x)
+{
+	__asm {
+		bswap  ecx
+		mov    eax,ecx
+		ret
+	}
+}
+
+#if SPH_64
+
+static SPH_INLINE sph_u64
+sph_bswap64(sph_u64 x)
+{
+	return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
+		| (sph_u64)sph_bswap32((sph_u32)(x >> 32));
+}
+
+#endif
+
+ *
+ * [end of disabled code]
+ */
+
+#else
+
+static SPH_INLINE sph_u32
+sph_bswap32(sph_u32 x)
+{
+    x = SPH_T32((x << 16) | (x >> 16));
+    x = ((x & SPH_C32(0xFF00FF00)) >> 8)
+        | ((x & SPH_C32(0x00FF00FF)) << 8);
+    return x;
+}
+
+#if SPH_64
+
+/**
+ * Byte-swap a 64-bit value.
+ *
+ * @param x   the input value
+ * @return  the byte-swapped value
+ */
+static SPH_INLINE sph_u64
+sph_bswap64(sph_u64 x)
+{
+	x = SPH_T64((x << 32) | (x >> 32));
+	x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16)
+		| ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16);
+	x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8)
+		| ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8);
+	return x;
+}
+
+#endif
+
+#endif
+
+#if SPH_SPARCV9_GCC && !SPH_NO_ASM
+
+/*
+ * On UltraSPARC systems, native ordering is big-endian, but it is
+ * possible to perform little-endian read accesses by specifying the
+ * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
+ * the opcode "lda [%reg]0x88,%dst", where %reg is the register which
+ * contains the source address and %dst is the destination register,
+ * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
+ * to get the address space name. The latter format is better since it
+ * combines an addition and the actual access in a single opcode; but
+ * it requires the setting (and subsequent resetting) of %asi, which is
+ * slow. Some operations (i.e. MD5 compression function) combine many
+ * successive little-endian read accesses, which may share the same
+ * %asi setting. The macros below contain the appropriate inline
+ * assembly.
+ */
+
+#define SPH_SPARCV9_SET_ASI   \
+	sph_u32 sph_sparcv9_asi; \
+	__asm__ __volatile__ ( \
+		"rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi));
+
+#define SPH_SPARCV9_RESET_ASI  \
+	__asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi));
+
+#define SPH_SPARCV9_DEC32LE(base, idx)   ({ \
+		sph_u32 sph_sparcv9_tmp; \
+		__asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
+			: "=r" (sph_sparcv9_tmp) : "r" (base)); \
+		sph_sparcv9_tmp; \
+	})
+
+#endif
+
+static SPH_INLINE void
+sph_enc16be(void *dst, unsigned val)
+{
+    ((unsigned char *)dst)[0] = (val >> 8);
+    ((unsigned char *)dst)[1] = val;
+}
+
+static SPH_INLINE unsigned
+sph_dec16be(const void *src)
+{
+    return ((unsigned)(((const unsigned char *)src)[0]) << 8)
+           | (unsigned)(((const unsigned char *)src)[1]);
+}
+
+static SPH_INLINE void
+sph_enc16le(void *dst, unsigned val)
+{
+    ((unsigned char *)dst)[0] = val;
+    ((unsigned char *)dst)[1] = val >> 8;
+}
+
+static SPH_INLINE unsigned
+sph_dec16le(const void *src)
+{
+    return (unsigned)(((const unsigned char *)src)[0])
+           | ((unsigned)(((const unsigned char *)src)[1]) << 8);
+}
+
+/**
+ * Encode a 32-bit value into the provided buffer (big endian convention).
+ *
+ * @param dst   the destination buffer
+ * @param val   the 32-bit value to encode
+ */
+static SPH_INLINE void
+sph_enc32be(void *dst, sph_u32 val)
+{
+#if defined SPH_UPTR
+    #if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	val = sph_bswap32(val);
+#endif
+	*(sph_u32 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 3) == 0) {
+#if SPH_LITTLE_ENDIAN
+		val = sph_bswap32(val);
+#endif
+		*(sph_u32 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = (val >> 24);
+		((unsigned char *)dst)[1] = (val >> 16);
+		((unsigned char *)dst)[2] = (val >> 8);
+		((unsigned char *)dst)[3] = val;
+	}
+#endif
+#else
+    ((unsigned char *)dst)[0] = (val >> 24);
+    ((unsigned char *)dst)[1] = (val >> 16);
+    ((unsigned char *)dst)[2] = (val >> 8);
+    ((unsigned char *)dst)[3] = val;
+#endif
+}
+
+/**
+ * Encode a 32-bit value into the provided buffer (big endian convention).
+ * The destination buffer must be properly aligned.
+ *
+ * @param dst   the destination buffer (32-bit aligned)
+ * @param val   the value to encode
+ */
+static SPH_INLINE void
+sph_enc32be_aligned(void *dst, sph_u32 val)
+{
+#if SPH_LITTLE_ENDIAN
+    *(sph_u32 *)dst = sph_bswap32(val);
+#elif SPH_BIG_ENDIAN
+    *(sph_u32 *)dst = val;
+#else
+    ((unsigned char *)dst)[0] = (val >> 24);
+    ((unsigned char *)dst)[1] = (val >> 16);
+    ((unsigned char *)dst)[2] = (val >> 8);
+    ((unsigned char *)dst)[3] = val;
+#endif
+}
+
+/**
+ * Decode a 32-bit value from the provided buffer (big endian convention).
+ *
+ * @param src   the source buffer
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u32
+sph_dec32be(const void *src)
+{
+#if defined SPH_UPTR
+    #if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	return sph_bswap32(*(const sph_u32 *)src);
+#else
+	return *(const sph_u32 *)src;
+#endif
+#else
+	if (((SPH_UPTR)src & 3) == 0) {
+#if SPH_LITTLE_ENDIAN
+		return sph_bswap32(*(const sph_u32 *)src);
+#else
+		return *(const sph_u32 *)src;
+#endif
+	} else {
+		return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
+			| ((sph_u32)(((const unsigned char *)src)[1]) << 16)
+			| ((sph_u32)(((const unsigned char *)src)[2]) << 8)
+			| (sph_u32)(((const unsigned char *)src)[3]);
+	}
+#endif
+#else
+    return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
+           | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
+           | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
+           | (sph_u32)(((const unsigned char *)src)[3]);
+#endif
+}
+
+/**
+ * Decode a 32-bit value from the provided buffer (big endian convention).
+ * The source buffer must be properly aligned.
+ *
+ * @param src   the source buffer (32-bit aligned)
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u32
+sph_dec32be_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+    return sph_bswap32(*(const sph_u32 *)src);
+#elif SPH_BIG_ENDIAN
+    return *(const sph_u32 *)src;
+#else
+    return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
+           | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
+           | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
+           | (sph_u32)(((const unsigned char *)src)[3]);
+#endif
+}
+
+/**
+ * Encode a 32-bit value into the provided buffer (little endian convention).
+ *
+ * @param dst   the destination buffer
+ * @param val   the 32-bit value to encode
+ */
+static SPH_INLINE void
+sph_enc32le(void *dst, sph_u32 val)
+{
+#if defined SPH_UPTR
+    #if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	val = sph_bswap32(val);
+#endif
+	*(sph_u32 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 3) == 0) {
+#if SPH_BIG_ENDIAN
+		val = sph_bswap32(val);
+#endif
+		*(sph_u32 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = val;
+		((unsigned char *)dst)[1] = (val >> 8);
+		((unsigned char *)dst)[2] = (val >> 16);
+		((unsigned char *)dst)[3] = (val >> 24);
+	}
+#endif
+#else
+    ((unsigned char *)dst)[0] = val;
+    ((unsigned char *)dst)[1] = (val >> 8);
+    ((unsigned char *)dst)[2] = (val >> 16);
+    ((unsigned char *)dst)[3] = (val >> 24);
+#endif
+}
+
+/**
+ * Encode a 32-bit value into the provided buffer (little endian convention).
+ * The destination buffer must be properly aligned.
+ *
+ * @param dst   the destination buffer (32-bit aligned)
+ * @param val   the value to encode
+ */
+static SPH_INLINE void
+sph_enc32le_aligned(void *dst, sph_u32 val)
+{
+#if SPH_LITTLE_ENDIAN
+    *(sph_u32 *)dst = val;
+#elif SPH_BIG_ENDIAN
+    *(sph_u32 *)dst = sph_bswap32(val);
+#else
+    ((unsigned char *)dst)[0] = val;
+    ((unsigned char *)dst)[1] = (val >> 8);
+    ((unsigned char *)dst)[2] = (val >> 16);
+    ((unsigned char *)dst)[3] = (val >> 24);
+#endif
+}
+
+/**
+ * Decode a 32-bit value from the provided buffer (little endian convention).
+ *
+ * @param src   the source buffer
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u32
+sph_dec32le(const void *src)
+{
+#if defined SPH_UPTR
+    #if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	return sph_bswap32(*(const sph_u32 *)src);
+#else
+	return *(const sph_u32 *)src;
+#endif
+#else
+	if (((SPH_UPTR)src & 3) == 0) {
+#if SPH_BIG_ENDIAN
+#if SPH_SPARCV9_GCC && !SPH_NO_ASM
+		sph_u32 tmp;
+
+		/*
+		 * "__volatile__" is needed here because without it,
+		 * gcc-3.4.3 miscompiles the code and performs the
+		 * access before the test on the address, thus triggering
+		 * a bus error...
+		 */
+		__asm__ __volatile__ (
+			"lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+		return tmp;
+/*
+ * On PowerPC, this turns out not to be worth the effort: the inline
+ * assembly makes GCC optimizer uncomfortable, which tends to nullify
+ * the decoding gains.
+ *
+ * For most hash functions, using this inline assembly trick changes
+ * hashing speed by less than 5% and often _reduces_ it. The biggest
+ * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
+ * less then 10%. The speed gain on CubeHash is probably due to the
+ * chronic shortage of registers that CubeHash endures; for the other
+ * functions, the generic code appears to be efficient enough already.
+ *
+#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
+		sph_u32 tmp;
+
+		__asm__ __volatile__ (
+			"lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+		return tmp;
+ */
+#else
+		return sph_bswap32(*(const sph_u32 *)src);
+#endif
+#else
+		return *(const sph_u32 *)src;
+#endif
+	} else {
+		return (sph_u32)(((const unsigned char *)src)[0])
+			| ((sph_u32)(((const unsigned char *)src)[1]) << 8)
+			| ((sph_u32)(((const unsigned char *)src)[2]) << 16)
+			| ((sph_u32)(((const unsigned char *)src)[3]) << 24);
+	}
+#endif
+#else
+    return (sph_u32)(((const unsigned char *)src)[0])
+           | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
+           | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
+           | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
+#endif
+}
+
+/**
+ * Decode a 32-bit value from the provided buffer (little endian convention).
+ * The source buffer must be properly aligned.
+ *
+ * @param src   the source buffer (32-bit aligned)
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u32
+sph_dec32le_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+    return *(const sph_u32 *)src;
+#elif SPH_BIG_ENDIAN
+    #if SPH_SPARCV9_GCC && !SPH_NO_ASM
+	sph_u32 tmp;
+
+	__asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+	return tmp;
+/*
+ * Not worth it generally.
+ *
+#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
+	sph_u32 tmp;
+
+	__asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+	return tmp;
+ */
+#else
+	return sph_bswap32(*(const sph_u32 *)src);
+#endif
+#else
+    return (sph_u32)(((const unsigned char *)src)[0])
+           | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
+           | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
+           | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
+#endif
+}
+
+#if SPH_64
+
+/**
+ * Encode a 64-bit value into the provided buffer (big endian convention).
+ *
+ * @param dst   the destination buffer
+ * @param val   the 64-bit value to encode
+ */
+static SPH_INLINE void
+sph_enc64be(void *dst, sph_u64 val)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	val = sph_bswap64(val);
+#endif
+	*(sph_u64 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 7) == 0) {
+#if SPH_LITTLE_ENDIAN
+		val = sph_bswap64(val);
+#endif
+		*(sph_u64 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = (val >> 56);
+		((unsigned char *)dst)[1] = (val >> 48);
+		((unsigned char *)dst)[2] = (val >> 40);
+		((unsigned char *)dst)[3] = (val >> 32);
+		((unsigned char *)dst)[4] = (val >> 24);
+		((unsigned char *)dst)[5] = (val >> 16);
+		((unsigned char *)dst)[6] = (val >> 8);
+		((unsigned char *)dst)[7] = val;
+	}
+#endif
+#else
+	((unsigned char *)dst)[0] = (val >> 56);
+	((unsigned char *)dst)[1] = (val >> 48);
+	((unsigned char *)dst)[2] = (val >> 40);
+	((unsigned char *)dst)[3] = (val >> 32);
+	((unsigned char *)dst)[4] = (val >> 24);
+	((unsigned char *)dst)[5] = (val >> 16);
+	((unsigned char *)dst)[6] = (val >> 8);
+	((unsigned char *)dst)[7] = val;
+#endif
+}
+
+/**
+ * Encode a 64-bit value into the provided buffer (big endian convention).
+ * The destination buffer must be properly aligned.
+ *
+ * @param dst   the destination buffer (64-bit aligned)
+ * @param val   the value to encode
+ */
+static SPH_INLINE void
+sph_enc64be_aligned(void *dst, sph_u64 val)
+{
+#if SPH_LITTLE_ENDIAN
+	*(sph_u64 *)dst = sph_bswap64(val);
+#elif SPH_BIG_ENDIAN
+	*(sph_u64 *)dst = val;
+#else
+	((unsigned char *)dst)[0] = (val >> 56);
+	((unsigned char *)dst)[1] = (val >> 48);
+	((unsigned char *)dst)[2] = (val >> 40);
+	((unsigned char *)dst)[3] = (val >> 32);
+	((unsigned char *)dst)[4] = (val >> 24);
+	((unsigned char *)dst)[5] = (val >> 16);
+	((unsigned char *)dst)[6] = (val >> 8);
+	((unsigned char *)dst)[7] = val;
+#endif
+}
+
+/**
+ * Decode a 64-bit value from the provided buffer (big endian convention).
+ *
+ * @param src   the source buffer
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u64
+sph_dec64be(const void *src)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_LITTLE_ENDIAN
+	return sph_bswap64(*(const sph_u64 *)src);
+#else
+	return *(const sph_u64 *)src;
+#endif
+#else
+	if (((SPH_UPTR)src & 7) == 0) {
+#if SPH_LITTLE_ENDIAN
+		return sph_bswap64(*(const sph_u64 *)src);
+#else
+		return *(const sph_u64 *)src;
+#endif
+	} else {
+		return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
+			| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
+			| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
+			| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
+			| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
+			| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
+			| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
+			| (sph_u64)(((const unsigned char *)src)[7]);
+	}
+#endif
+#else
+	return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
+		| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
+		| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
+		| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
+		| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
+		| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
+		| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
+		| (sph_u64)(((const unsigned char *)src)[7]);
+#endif
+}
+
+/**
+ * Decode a 64-bit value from the provided buffer (big endian convention).
+ * The source buffer must be properly aligned.
+ *
+ * @param src   the source buffer (64-bit aligned)
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u64
+sph_dec64be_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+	return sph_bswap64(*(const sph_u64 *)src);
+#elif SPH_BIG_ENDIAN
+	return *(const sph_u64 *)src;
+#else
+	return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
+		| ((sph_u64)(((const unsigned char *)src)[1]) << 48)
+		| ((sph_u64)(((const unsigned char *)src)[2]) << 40)
+		| ((sph_u64)(((const unsigned char *)src)[3]) << 32)
+		| ((sph_u64)(((const unsigned char *)src)[4]) << 24)
+		| ((sph_u64)(((const unsigned char *)src)[5]) << 16)
+		| ((sph_u64)(((const unsigned char *)src)[6]) << 8)
+		| (sph_u64)(((const unsigned char *)src)[7]);
+#endif
+}
+
+/**
+ * Encode a 64-bit value into the provided buffer (little endian convention).
+ *
+ * @param dst   the destination buffer
+ * @param val   the 64-bit value to encode
+ */
+static SPH_INLINE void
+sph_enc64le(void *dst, sph_u64 val)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	val = sph_bswap64(val);
+#endif
+	*(sph_u64 *)dst = val;
+#else
+	if (((SPH_UPTR)dst & 7) == 0) {
+#if SPH_BIG_ENDIAN
+		val = sph_bswap64(val);
+#endif
+		*(sph_u64 *)dst = val;
+	} else {
+		((unsigned char *)dst)[0] = val;
+		((unsigned char *)dst)[1] = (val >> 8);
+		((unsigned char *)dst)[2] = (val >> 16);
+		((unsigned char *)dst)[3] = (val >> 24);
+		((unsigned char *)dst)[4] = (val >> 32);
+		((unsigned char *)dst)[5] = (val >> 40);
+		((unsigned char *)dst)[6] = (val >> 48);
+		((unsigned char *)dst)[7] = (val >> 56);
+	}
+#endif
+#else
+	((unsigned char *)dst)[0] = val;
+	((unsigned char *)dst)[1] = (val >> 8);
+	((unsigned char *)dst)[2] = (val >> 16);
+	((unsigned char *)dst)[3] = (val >> 24);
+	((unsigned char *)dst)[4] = (val >> 32);
+	((unsigned char *)dst)[5] = (val >> 40);
+	((unsigned char *)dst)[6] = (val >> 48);
+	((unsigned char *)dst)[7] = (val >> 56);
+#endif
+}
+
+/**
+ * Encode a 64-bit value into the provided buffer (little endian convention).
+ * The destination buffer must be properly aligned.
+ *
+ * @param dst   the destination buffer (64-bit aligned)
+ * @param val   the value to encode
+ */
+static SPH_INLINE void
+sph_enc64le_aligned(void *dst, sph_u64 val)
+{
+#if SPH_LITTLE_ENDIAN
+	*(sph_u64 *)dst = val;
+#elif SPH_BIG_ENDIAN
+	*(sph_u64 *)dst = sph_bswap64(val);
+#else
+	((unsigned char *)dst)[0] = val;
+	((unsigned char *)dst)[1] = (val >> 8);
+	((unsigned char *)dst)[2] = (val >> 16);
+	((unsigned char *)dst)[3] = (val >> 24);
+	((unsigned char *)dst)[4] = (val >> 32);
+	((unsigned char *)dst)[5] = (val >> 40);
+	((unsigned char *)dst)[6] = (val >> 48);
+	((unsigned char *)dst)[7] = (val >> 56);
+#endif
+}
+
+/**
+ * Decode a 64-bit value from the provided buffer (little endian convention).
+ *
+ * @param src   the source buffer
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u64
+sph_dec64le(const void *src)
+{
+#if defined SPH_UPTR
+#if SPH_UNALIGNED
+#if SPH_BIG_ENDIAN
+	return sph_bswap64(*(const sph_u64 *)src);
+#else
+	return *(const sph_u64 *)src;
+#endif
+#else
+	if (((SPH_UPTR)src & 7) == 0) {
+#if SPH_BIG_ENDIAN
+#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
+		sph_u64 tmp;
+
+		__asm__ __volatile__ (
+			"ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+		return tmp;
+/*
+ * Not worth it generally.
+ *
+#elif SPH_PPC32_GCC && !SPH_NO_ASM
+		return (sph_u64)sph_dec32le_aligned(src)
+			| ((sph_u64)sph_dec32le_aligned(
+				(const char *)src + 4) << 32);
+#elif SPH_PPC64_GCC && !SPH_NO_ASM
+		sph_u64 tmp;
+
+		__asm__ __volatile__ (
+			"ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+		return tmp;
+ */
+#else
+		return sph_bswap64(*(const sph_u64 *)src);
+#endif
+#else
+		return *(const sph_u64 *)src;
+#endif
+	} else {
+		return (sph_u64)(((const unsigned char *)src)[0])
+			| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
+			| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
+			| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
+			| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
+			| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
+			| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
+			| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
+	}
+#endif
+#else
+	return (sph_u64)(((const unsigned char *)src)[0])
+		| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
+		| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
+		| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
+		| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
+		| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
+		| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
+		| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
+#endif
+}
+
+/**
+ * Decode a 64-bit value from the provided buffer (little endian convention).
+ * The source buffer must be properly aligned.
+ *
+ * @param src   the source buffer (64-bit aligned)
+ * @return  the decoded value
+ */
+static SPH_INLINE sph_u64
+sph_dec64le_aligned(const void *src)
+{
+#if SPH_LITTLE_ENDIAN
+	return *(const sph_u64 *)src;
+#elif SPH_BIG_ENDIAN
+#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
+	sph_u64 tmp;
+
+	__asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
+	return tmp;
+/*
+ * Not worth it generally.
+ *
+#elif SPH_PPC32_GCC && !SPH_NO_ASM
+	return (sph_u64)sph_dec32le_aligned(src)
+		| ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32);
+#elif SPH_PPC64_GCC && !SPH_NO_ASM
+	sph_u64 tmp;
+
+	__asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
+	return tmp;
+ */
+#else
+	return sph_bswap64(*(const sph_u64 *)src);
+#endif
+#else
+	return (sph_u64)(((const unsigned char *)src)[0])
+		| ((sph_u64)(((const unsigned char *)src)[1]) << 8)
+		| ((sph_u64)(((const unsigned char *)src)[2]) << 16)
+		| ((sph_u64)(((const unsigned char *)src)[3]) << 24)
+		| ((sph_u64)(((const unsigned char *)src)[4]) << 32)
+		| ((sph_u64)(((const unsigned char *)src)[5]) << 40)
+		| ((sph_u64)(((const unsigned char *)src)[6]) << 48)
+		| ((sph_u64)(((const unsigned char *)src)[7]) << 56);
+#endif
+}
+
+#endif
+
+#endif /* Doxygen excluded block */
+
+#endif
\ No newline at end of file
diff --git a/test.js b/test.js
new file mode 100644
index 0000000..4a106f3
--- /dev/null
+++ b/test.js
@@ -0,0 +1,20 @@
+'use strict';
+
+const phi1 = require('./index.js');
+const INPUT = 'ab783e17a48c060f9899b7287bf66bc1e9c13eef4e61745db03d0534a3ecb58074c6890ff98cf6295fc8b2deb4172e0f4490e4e3f3d720f0fd22491340f35913';
+const EXPECTED_HASH = 'cab83ca8c94af98db37f62234dca35efdbfe025ca420e47856f3efcf1b806b41';
+
+process.title = 'phi1-test';
+
+const input = Buffer.from(INPUT, 'hex');
+
+console.log(`Expected:    ${EXPECTED_HASH}`)
+
+const hash = phi1.phi1612(input).toString('hex');
+
+console.log(`Hash result: ${hash}`);
+
+if (hash !== EXPECTED_HASH)
+    throw new Error(`Invalid hash result.`);
+
+console.log('Test complete.');
\ No newline at end of file